tar -zcf $(WORKSPACE)/nnfw-package.tar.gz -C $(INSTALL_PATH) lib
tar -zcf $(WORKSPACE)/nnfw-devel-package.tar.gz -C $(INSTALL_PATH) include/nnfw
tar -zcf $(WORKSPACE)/nnfw-plugin-devel-package.tar.gz -C $(INSTALL_PATH) include/onert
- tar -zcf $(WORKSPACE)/nnfw-test-package.tar.gz -C ${INSTALL_PATH} bin test unittest unittest_standalone
+ tar -zcf $(WORKSPACE)/nnfw-test-package.tar.gz -C $(INSTALL_PATH) $(shell ls $(INSTALL_PATH) -I lib -I include)
acl_tar_internal: $(BUILD_FOLDER)
- tar -zcf $(WORKSPACE)/nnfw-acl.tar.gz -C ${OVERLAY_FOLDER} lib
+ tar -zcf $(WORKSPACE)/nnfw-acl.tar.gz -C ${OVERLAY_FOLDER} lib/libarm_compute.so lib/libarm_compute_core.so lib/libarm_compute_graph.so
install_internal_acl:
# Workaround to install acl for test (ignore error when there is no file to copy)
- cp $(OVERLAY_FOLDER)/lib/* $(INSTALL_ALIAS)/lib || true
+ cp $(OVERLAY_FOLDER)/lib/libarm_compute* $(INSTALL_ALIAS)/lib || true
build_test_suite: install_internal install_internal_acl
@echo "packaging test suite"
-file(GLOB_RECURSE SOURCES "src/*.cpp")
+set (SOURCES src/CircleQuantizer.cpp)
add_executable(circle-quantizer "${SOURCES}")
-target_include_directories(circle-quantizer PRIVATE include)
-target_include_directories(circle-quantizer PRIVATE src)
target_link_libraries(circle-quantizer foder)
target_link_libraries(circle-quantizer safemain)
target_link_libraries(circle-quantizer oops)
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLEQUANTIZER_CIRCLEXPCONTRACT_H__
-#define __CIRCLEQUANTIZER_CIRCLEXPCONTRACT_H__
-
-#include <loco.h>
-#include <luci/CircleExporter.h>
-#include <luci/IR/Module.h>
-
-#include <memory>
-#include <string>
-
-struct CircleExpContract : public luci::CircleExporter::Contract
-{
-public:
- CircleExpContract(luci::Module *module, const std::string &filename)
- : _module(module), _filepath(filename)
- {
- // NOTHING TO DO
- }
- virtual ~CircleExpContract() = default;
-
-public:
- loco::Graph *graph(void) const final { return nullptr; }
- luci::Module *module(void) const final { return _module; };
-
-public:
- bool store(const char *ptr, const size_t size) const final;
-
-private:
- luci::Module *_module;
- const std::string _filepath;
-};
-
-#endif // __CIRCLEQUANTIZER_CIRCLEXPCONTRACT_H__
* limitations under the License.
*/
-#include "CircleExpContract.h"
-
#include <foder/FileLoader.h>
#include <luci/Importer.h>
#include <luci/CircleOptimizer.h>
#include <luci/Service/Validate.h>
#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
#include <oops/InternalExn.h>
#include <arser/arser.h>
using Algorithms = luci::CircleOptimizer::Options::Algorithm;
using AlgorithmParameters = luci::CircleOptimizer::Options::AlgorithmParameters;
+void print_exclusive_options(void)
+{
+ std::cout << "Use only one of the 3 options below." << std::endl;
+ std::cout << " --quantize_dequantize_weights" << std::endl;
+ std::cout << " --quantize_with_minmax" << std::endl;
+ std::cout << " --requantize" << std::endl;
+}
+
void print_version(void)
{
std::cout << "circle-quantizer version " << vconone::get_string() << std::endl;
const std::string qdqw = "--quantize_dequantize_weights";
const std::string qwmm = "--quantize_with_minmax";
+ const std::string rq = "--requantize";
arser::Arser arser("circle-quantizer provides circle model quantization");
"Three arguments required: input_dtype(float32) "
"output_dtype(uint8) granularity(layer, channel)");
+ arser.add_argument(rq)
+ .nargs(2)
+ .type(arser::DataType::STR_VEC)
+ .required(false)
+ .help("Requantize a quantized model. "
+ "Two arguments required: input_dtype(int8) "
+ "output_dtype(uint8)");
+
arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
if (arser[qdqw])
{
+ if (arser[qwmm] || arser[rq])
+ {
+ print_exclusive_options();
+ return 255;
+ }
auto values = arser.get<std::vector<std::string>>(qdqw);
if (values.size() != 3)
{
if (arser[qwmm])
{
+ if (arser[qdqw] || arser[rq])
+ {
+ print_exclusive_options();
+ return 255;
+ }
auto values = arser.get<std::vector<std::string>>(qwmm);
if (values.size() != 3)
{
options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
}
+ if (arser[rq])
+ {
+ if (arser[qwmm] || arser[qdqw])
+ {
+ print_exclusive_options();
+ return 255;
+ }
+ auto values = arser.get<std::vector<std::string>>(rq);
+ if (values.size() != 2)
+ {
+ std::cerr << arser;
+ return 255;
+ }
+ options->enable(Algorithms::Requantize);
+
+ options->param(AlgorithmParameters::Quantize_input_dtype, values.at(0));
+ options->param(AlgorithmParameters::Quantize_output_dtype, values.at(1));
+ }
+
std::string input_path = arser.get<std::string>("input");
std::string output_path = arser.get<std::string>("output");
// Load model from the file
foder::FileLoader file_loader{input_path};
std::vector<char> model_data = file_loader.load();
+
+ // Verify flatbuffers
+ flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
+ if (!circle::VerifyModelBuffer(verifier))
+ {
+ std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
+ return EXIT_FAILURE;
+ }
+
const circle::Model *circle_model = circle::GetModel(model_data.data());
if (circle_model == nullptr)
{
// Export to output Circle file
luci::CircleExporter exporter;
- CircleExpContract contract(module.get(), output_path);
+ luci::CircleFileExpContract contract(module.get(), output_path);
if (!exporter.invoke(&contract))
{
## TFLITE RECIPE
+Add(Net_TConv_BN_000 PASS fuse_batchnorm_with_tconv)
Add(Net_InstanceNorm_001 PASS fuse_instnorm)
Add(Net_InstanceNorm_002 PASS fuse_instnorm)
Add(BatchMatMulV2_000 PASS resolve_customop_batchmatmul)
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLE2CIRCLE_CIRCLEXPCONTRACT_H__
-#define __CIRCLE2CIRCLE_CIRCLEXPCONTRACT_H__
-
-#include <loco.h>
-#include <luci/CircleExporter.h>
-#include <luci/IR/Module.h>
-#include <mio/circle/schema_generated.h>
-
-#include <memory>
-#include <string>
-
-struct CircleExpContract : public luci::CircleExporter::Contract
-{
-public:
- CircleExpContract(luci::Module *module, const std::string &filename)
- : _module(module), _filepath(filename)
- {
- // NOTHING TO DO
- }
- virtual ~CircleExpContract() = default;
-
-public:
- loco::Graph *graph(void) const final { return nullptr; }
- luci::Module *module(void) const final { return _module; };
-
-public:
- bool store(const char *ptr, const size_t size) const final;
-
-private:
- luci::Module *_module;
- const std::string _filepath;
-};
-
-#endif // __CIRCLE2CIRCLE_CIRCLEXPCONTRACT_H__
* limitations under the License.
*/
-#include "CircleExpContract.h"
-
#include <foder/FileLoader.h>
#include <luci/Importer.h>
#include <luci/CircleOptimizer.h>
#include <luci/Service/Validate.h>
#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
#include <luci/UserSettings.h>
#include <oops/InternalExn.h>
arser.add_argument("--all").nargs(0).required(false).default_value(false).help(
"Enable all optimize options");
+ arser.add_argument("--fuse_batchnorm_with_tconv")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will fuse BatchNorm operators to Transposed Convolution operator");
+
arser.add_argument("--fuse_bcq")
.nargs(0)
.required(false)
.nargs(0)
.required(false)
.default_value(false)
- .help("This will turn off operator vaidations. May help input model investigation.");
+ .help("This will turn off operator validations. May help input model investigation.");
arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
options->enable(Algorithms::ResolveCustomOpBatchMatMul);
options->enable(Algorithms::ResolveCustomOpMatMul);
}
+ if (arser.get<bool>("--fuse_batchnorm_with_tconv"))
+ options->enable(Algorithms::FuseBatchNormWithTConv);
if (arser.get<bool>("--fuse_bcq"))
options->enable(Algorithms::FuseBCQ);
if (arser.get<bool>("--fuse_instnorm"))
std::cerr << err.what() << std::endl;
return EXIT_FAILURE;
}
+
+ flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
+ if (!circle::VerifyModelBuffer(verifier))
+ {
+ std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
+ return EXIT_FAILURE;
+ }
+
const circle::Model *circle_model = circle::GetModel(model_data.data());
if (circle_model == nullptr)
{
if (!luci::validate(graph))
{
- std::cerr << "ERROR: Optimized graph is invalid" << std::endl;
- return 255;
+ if (settings->get(luci::UserSettings::Key::DisableValidation))
+ std::cerr << "WARNING: Optimized graph is invalid" << std::endl;
+ else
+ {
+ std::cerr << "ERROR: Optimized graph is invalid" << std::endl;
+ return 255;
+ }
}
}
// Export to output Circle file
luci::CircleExporter exporter;
- CircleExpContract contract(module.get(), output_path);
+ luci::CircleFileExpContract contract(module.get(), output_path);
if (!exporter.invoke(&contract))
{
target_link_libraries(circlechef_circle mio_circle)
target_link_libraries(circlechef_circle stdex)
target_link_libraries(circlechef_circle cwrap)
+target_link_libraries(circlechef_circle souschef)
#include <mio/circle/schema_generated.h>
+#include <souschef/TensorFiller.h>
+
#include <circlechef.pb.h>
#include <map>
/**
* @brief Loads TF lite file and provides helpers to access attributes
*/
-class CircleImport
+class CircleImport : public souschef::TensorFiller
{
public:
CircleImport(const circle::Model *model);
std::string opcode_name(const circle::Operator *op) const;
size_t buffer_info(const circle::Tensor *tensor, const uint8_t **buff_data);
- /**
- * @brief This will record the tensor by index, if it needs filler option,
- * such as kernel, bias.
- */
- void set_tensor_filler(uint32_t tensor_index) { _tensor_filler[tensor_index] = true; }
-
- /**
- * @brief This will store int32 filler values such as reshape information for the tensor
- */
- void set_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
- {
- _tensor_filler_vint32[tensor_index] = expvalues;
- }
-
- void set_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
- {
- _tensor_filler_vfloat[tensor_index] = expvalues;
- }
-
- /**
- * @brief This will return true if the tensor by index, needs a filler option.
- */
- bool get_tensor_filler(uint32_t tensor_index)
- {
- auto it = _tensor_filler.find(tensor_index);
- if (it != _tensor_filler.end())
- {
- return it->second;
- }
- return false;
- }
-
- /**
- * @brief This will return true if the tensor by index, needs a int array filler option.
- */
- bool get_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
- {
- auto it = _tensor_filler_vint32.find(tensor_index);
- if (it != _tensor_filler_vint32.end())
- {
- expvalues = it->second;
- return true;
- }
- return false;
- }
-
- bool get_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
- {
- auto it = _tensor_filler_vfloat.find(tensor_index);
- if (it != _tensor_filler_vfloat.end())
- {
- expvalues = it->second;
- return true;
- }
- return false;
- }
-
private:
const CircleSubGraphs_t *_subgraphs{nullptr};
const CircleBuffers_t *_buffers{nullptr};
std::vector<const circle::OperatorCode *> _op_codes{};
std::vector<int32_t> _inputs{};
std::vector<int32_t> _outputs{};
-
- std::map<uint32_t, bool> _tensor_filler{};
- std::map<uint32_t, std::vector<int32_t>> _tensor_filler_vint32{};
- std::map<uint32_t, std::vector<float>> _tensor_filler_vfloat{};
};
} // namespace circlechef
#include "OpChefs.h"
#include <souschef/Dataset.h>
+#include <souschef/Dims.h>
#include "Log.h"
#include <sstream>
#include <stdexcept>
-namespace
-{
-
using namespace souschef;
-template <typename T> std::vector<T> as_vector(const ::google::protobuf::RepeatedPtrField<T> &field)
-{
- std::vector<T> res;
- for (const auto &elem : field)
- {
- res.emplace_back(elem);
- }
- return res;
-}
-
-template <typename T> Dataset<T> as_dataset(const ::google::protobuf::RepeatedPtrField<T> &field)
-{
- return Dataset<T>(as_vector<T>(field));
-}
-
-} // namespace
-
-namespace
-{
-
-template <typename T> using Dims = std::vector<T>;
-
-Dims<int32_t> as_dims(const circlechef::TensorShape &shape)
-{
- std::vector<int32_t> res;
-
- for (auto &dim : shape.dim())
- {
- res.emplace_back(static_cast<int32_t>(dim));
- }
-
- return res;
-}
-
-int32_t element_count(const Dims<int32_t> &dims)
-{
- return std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<int32_t>());
-}
-
-} // namespace
-
namespace
{
_op_map[circle::BuiltinOperator_MIRROR_PAD] = make_unique<MirrorPadPrinter>();
_op_map[circle::BuiltinOperator_MUL] = make_unique<MulPrinter>();
// There is no Option for NON_MAX_SUPPRESSION_V4
+ // There is no Option for NON_MAX_SUPPRESSION_V5
_op_map[circle::BuiltinOperator_ONE_HOT] = make_unique<OneHotPrinter>();
_op_map[circle::BuiltinOperator_PACK] = make_unique<PackPrinter>();
// There is no Option for PAD
set(REQUIREMENTS_OVERLAY_PATH_TF_1_13_2 "${VIRTUALENV_OVERLAY_TF_1_13_2}/${REQUIREMENTS_FILE}")
set(REQUIREMENTS_OVERLAY_PATH_TF_2_3_0 "${VIRTUALENV_OVERLAY_TF_2_3_0}/${REQUIREMENTS_FILE}")
+# TODO remove version number of '--upgrade pip==20.2.1 setuptools==49.3.0'
+# NOTE adding version is for temporary hotfix of setuptools 50.x.y version
add_custom_command(
OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
COMMAND ${CMAKE_COMMAND} -E echo "tensorflow==1.13.2" > ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
- COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install --upgrade pip setuptools
+ COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0
COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} --upgrade
DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2}
)
COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.3.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
- COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip setuptools
+ COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0
COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0} --upgrade
DEPENDS ${VIRTUALENV_OVERLAY_TF_2_3_0}
)
set(INPUT_HDF5_FILE "${RECIPE}${OPT_FORMAT}.input.h5")
set(INPUT_BIN_PATH "${CMAKE_CURRENT_BINARY_DIR}/${INPUT_HDF5_FILE}")
-
+
set(EXPECTED_HDF5_FILE "${RECIPE}${OPT_FORMAT}.expected.h5")
set(EXPECTED_BIN_PATH "${CMAKE_CURRENT_BINARY_DIR}/${EXPECTED_HDF5_FILE}")
-
+
if(NOT DEFINED NO_TCGEN_${RECIPE})
# Generate input.h5, expected.h5
add_custom_command(OUTPUT ${INPUT_BIN_PATH} ${EXPECTED_BIN_PATH}
DEPENDS $<TARGET_FILE:testDataGenerator> ${MODEL_FILE}
COMMENT "Generate ${INPUT_BIN_PATH} and ${EXPECTED_BIN_PATH}"
)
-
+
# Generate test directory
set(TC_DIRECTORY "${NNPKG_PATH}/metadata/tc")
add_custom_command(OUTPUT ${TC_DIRECTORY}
DEPENDS ${NNPKG_PATH}
COMMENT "Generate ${RECIPE} nnpackage test directory"
)
-
+
# Move input hdf5 file to test directory
set(INPUT_NNPKG_PATH "${TC_DIRECTORY}/input.h5")
add_custom_command(OUTPUT ${INPUT_NNPKG_PATH}
DEPENDS ${INPUT_BIN_PATH} ${TC_DIRECTORY}
COMMENT "Move ${INPUT_HDF5_FILE} to nnpackage"
)
-
+
# Move expected hdf5 file to test directory
set(EXPECTED_NNPKG_PATH "${TC_DIRECTORY}/expected.h5")
add_custom_command(OUTPUT ${EXPECTED_NNPKG_PATH}
tcgenerate(Minimum_000)
tcgenerate(NonMaxSuppressionV4_000)
tcgenerate(NonMaxSuppressionV4_001)
+tcgenerate(NonMaxSuppressionV5_000)
+tcgenerate(NonMaxSuppressionV5_001)
tcgenerate(MirrorPad_000)
tcgenerate(Mul_U8_000)
tcgenerate(Neg_000)
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::BiasAdd<loco::Domain::Tensor> *bias_add)
+using namespace locomotiv;
+
+void execute_node(loco::BiasAdd<loco::Domain::Tensor> *bias_add)
{
validate(bias_add, "BiasAdd is nullptr");
annot_domain(bias_add, annot_domain(bias_add->value()));
}
-void NodeExecution::execute(loco::BiasAdd<loco::Domain::Feature> *bias_add)
+void execute_node(loco::BiasAdd<loco::Domain::Feature> *bias_add)
{
validate(bias_add, "BiasAdd is nullptr");
annot_domain(bias_add, loco::Domain::Feature);
}
-} // namespace locomotiv
+} // namespace
namespace
{
}
} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::BiasAdd<loco::Domain::Tensor> *bias_add)
+{
+ execute_node(bias_add);
+}
+
+void NodeExecution::execute(loco::BiasAdd<loco::Domain::Feature> *bias_add)
+{
+ execute_node(bias_add);
+}
+
+} // namespace locomotiv
#include <stdexcept>
#include <cassert>
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::BiasEncode *bias_enc)
+using namespace locomotiv;
+
+void execute_node(loco::BiasEncode *bias_enc)
{
auto input_data = annot_data(bias_enc->input());
annot_domain(bias_enc, loco::Domain::Bias);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::BiasEncode *bias_enc) { execute_node(bias_enc); }
+
} // namespace locomotiv
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::ConstGen *constgen)
+using namespace locomotiv;
+
+void execute_node(loco::ConstGen *constgen)
{
uint32_t volume = 1;
annot_domain(constgen, loco::Domain::Tensor);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::ConstGen *constgen) { execute_node(constgen); }
+
} // namespace locomotiv
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::Conv2D *conv2d)
+using namespace locomotiv;
+
+void execute_node(loco::Conv2D *conv2d)
{
auto ifm_data = annot_data(conv2d->ifm());
auto ker_data = annot_data(conv2d->ker());
annot_domain(conv2d, loco::Domain::Feature);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Conv2D *conv2d) { execute_node(conv2d); }
+
} // namespace locomotiv
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::DepthwiseConv2D *dw_conv2d)
+using namespace locomotiv;
+
+void execute_node(loco::DepthwiseConv2D *dw_conv2d)
{
auto ifm_data = annot_data(dw_conv2d->ifm());
auto ker_data = annot_data(dw_conv2d->ker());
annot_domain(dw_conv2d, loco::Domain::Feature);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::DepthwiseConv2D *dw_conv2d) { execute_node(dw_conv2d); }
+
} // namespace locomotiv
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::DepthwiseFilterEncode *enc)
+using namespace locomotiv;
+
+void execute_node(loco::DepthwiseFilterEncode *enc)
{
auto input_data = annot_data(enc->input());
annot_domain(enc, loco::Domain::DepthwiseFilter);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::DepthwiseFilterEncode *enc) { execute_node(enc); }
+
} // namespace locomotiv
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::FeatureDecode *dec)
+using namespace locomotiv;
+
+void execute_node(loco::FeatureDecode *dec)
{
auto input_data = annot_data(dec->input());
annot_domain(dec, loco::Domain::Tensor);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::FeatureDecode *dec) { execute_node(dec); }
+
} // namespace locomotiv
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::FilterEncode *enc)
+using namespace locomotiv;
+
+void execute_node(loco::FilterEncode *enc)
{
auto input_data = annot_data(enc->input());
annot_domain(enc, loco::Domain::Filter);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::FilterEncode *enc) { execute_node(enc); }
+
} // namespace locomotiv
#include <stdexcept>
#include <cassert>
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::Forward *forward)
+using namespace locomotiv;
+
+void execute_node(loco::Forward *forward)
{
auto input_data = annot_data(forward->input());
annot_domain(forward, annot_domain(forward->input()));
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Forward *forward) { execute_node(forward); }
+
} // namespace locomotiv
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::MatMul *mat_mul)
+using namespace locomotiv;
+
+void execute_node(loco::MatMul *mat_mul)
{
auto lhs_data = annot_data(mat_mul->lhs());
auto rhs_data = annot_data(mat_mul->rhs());
annot_domain(mat_mul, loco::Domain::Matrix);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::MatMul *mat_mul) { execute_node(mat_mul); }
+
} // namespace locomotiv
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::MatrixDecode *matrix_dec)
+using namespace locomotiv;
+
+void execute_node(loco::MatrixDecode *matrix_dec)
{
auto input_data = annot_data(matrix_dec->input());
annot_domain(matrix_dec, loco::Domain::Tensor);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::MatrixDecode *matrix_dec) { execute_node(matrix_dec); }
+
} // namespace locomotiv
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::MaxPool2D *maxpool2d)
+using namespace locomotiv;
+
+void execute_node(loco::MaxPool2D *maxpool2d)
{
auto ifm_data = annot_data(maxpool2d->ifm());
annot_domain(maxpool2d, loco::Domain::Feature);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::MaxPool2D *maxpool2d) { execute_node(maxpool2d); }
+
} // namespace locomotiv
#include <cassert>
#include <stdexcept>
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::Pull *pull)
+using namespace locomotiv;
+
+void execute_node(loco::Pull *pull)
{
// TODO Remove deprecated code
#if 0
annot_domain(pull, loco::Domain::Tensor);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Pull *pull) { execute_node(pull); }
+
} // namespace locomotiv
#include <stdexcept>
#include <cassert>
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::Push *push)
+using namespace locomotiv;
+
+void execute_node(loco::Push *push)
{
auto from_data = annot_data(push->from());
annot_domain(push, loco::Domain::Tensor);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Push *push) { execute_node(push); }
+
} // namespace locomotiv
#include <cstring>
#include <vector>
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::Reshape<loco::ReshapeType::Fixed> *reshape)
+using namespace locomotiv;
+
+void execute_node(loco::Reshape<loco::ReshapeType::Fixed> *reshape)
{
auto input_data = annot_data(reshape->input());
annot_domain(reshape, annot_domain(reshape->input()));
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Reshape<loco::ReshapeType::Fixed> *reshape)
+{
+ execute_node(reshape);
+}
+
} // namespace locomotiv
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::TensorSoftmax *softmax)
+using namespace locomotiv;
+
+void execute_node(loco::TensorSoftmax *softmax)
{
auto input_data = annot_data(softmax->input());
annot_domain(softmax, annot_domain(softmax->input()));
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorSoftmax *softmax) { execute_node(softmax); }
+
} // namespace locomotiv
#include <cassert>
#include <stdexcept>
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::TensorBroadcast *tensor_broadcast)
+using namespace locomotiv;
+
+void execute_node(loco::TensorBroadcast *tensor_broadcast)
{
auto input_data = annot_data(tensor_broadcast->input());
annot_domain(tensor_broadcast, loco::Domain::Tensor);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorBroadcast *tensor_broadcast)
+{
+ execute_node(tensor_broadcast);
+}
+
} // namespace locomotiv
#include <cassert>
#include <stdexcept>
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::TensorConcat *tensor_concat)
+using namespace locomotiv;
+
+void execute_node(loco::TensorConcat *tensor_concat)
{
validate(tensor_concat, "TensorConcat is nullptr");
annot_domain(tensor_concat, loco::Domain::Tensor);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorConcat *tensor_concat) { execute_node(tensor_concat); }
+
} // namespace locomotiv
using nncc::core::ADT::tensor::LexicalLayout;
using nncc::core::ADT::tensor::make_buffer;
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::TensorConstantPad *pad)
+using namespace locomotiv;
+
+void execute_node(loco::TensorConstantPad *pad)
{
validate(pad, "TensorConstantPad is nullptr");
annot_domain(pad, annot_domain(pad->input()));
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorConstantPad *pad) { execute_node(pad); }
+
} // namespace locomotiv
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::TensorReduce *node)
+using namespace locomotiv;
+
+void execute_node(loco::TensorReduce *node)
{
auto input_data = annot_data(node->input());
validate(input_data, "Input not ready");
annot_domain(node, annot_domain(node->input()));
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorReduce *node) { execute_node(node); }
+
} // namespace locomotiv
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::TransposedConv2D *tr_conv2d)
+using namespace locomotiv;
+
+void execute_node(loco::TransposedConv2D *tr_conv2d)
{
auto ifm_data = annot_data(tr_conv2d->ifm());
auto ker_data = annot_data(tr_conv2d->ker());
annot_domain(tr_conv2d, loco::Domain::Feature);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TransposedConv2D *tr_conv2d) { execute_node(tr_conv2d); }
+
} // namespace locomotiv
-nnas_find_package(TensorFlowSource EXACT 2.1.0 QUIET)
-nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.1.0 QUIET)
-nnas_find_package(TensorFlowEigenSource EXACT 2.1.0 QUIET)
+nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET)
+nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET)
+nnas_find_package(TensorFlowEigenSource EXACT 2.3.0 QUIET)
+nnas_find_package(TensorFlowRuySource EXACT 2.3.0 QUIET)
if (NOT TensorFlowSource_FOUND)
message(STATUS "Skipping luci-interpreter: TensorFlow not found")
return()
endif ()
+if (NOT TensorFlowRuySource_FOUND)
+ message(STATUS "Skipping luci-interpreter: Ruy not found")
+ return()
+endif ()
+
add_subdirectory(core)
add_subdirectory(kernels)
add_subdirectory(loader)
/*dimension_data=*/{3}, /*output_data=*/{3, 1});
}
+TEST(ArgMaxTest, UnsupportedType_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4}, {
+ 1, 2, 7, 8, 1, 9, 7, 3,
+ });
+ Tensor dimension_tensor = makeInputTensor<DataType::S32>({}, {3});
+ Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+ ArgMaxParams params{};
+ params.output_type = DataType::U8;
+ ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params);
+ kernel.configure();
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
void AveragePool2D::configure()
{
+ if (input()->element_type() != output()->element_type())
+ {
+ throw std::runtime_error("Input Tensor and Output Tensor Type must be same");
+ }
+ if (input()->shape().num_dims() != 4)
+ {
+ throw std::runtime_error("Input Tensor Shape must be 4-D");
+ }
const Shape &input_shape = input()->shape();
const int32_t batches = input_shape.dim(0);
computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
_padding_width =
computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
-
+ if (input()->element_type() == DataType::U8)
+ {
+ if (input()->scale() != output()->scale() || input()->zero_point() != output()->zero_point())
+ {
+ throw std::runtime_error(
+ "Quantization param for Input and output must be same(scale or zero-point)");
+ }
+ }
output()->resize({batches, output_height, output_width, depth});
}
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1}));
}
+TEST(AveragePool2DTest, Invalid_Input_Shape_NEG)
+{
+ Shape input_shape{1, 3, 5};
+ std::vector<float> input_data{
+ -4, -3, -2, -1, 0, //
+ 1, 2, 3, 4, 5, //
+ 6, 7, 8, 9, 10, //
+ };
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.filter_height = 2;
+ params.filter_width = 3;
+ params.stride_height = 1;
+ params.stride_width = 2;
+ params.activation = Activation::RELU6;
+
+ AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(AveragePool2DTest, In_Out_Type_NEG)
+{
+ Shape input_shape{1, 3, 5, 1};
+ std::vector<float> input_data{
+ -4, -3, -2, -1, 0, //
+ 1, 2, 3, 4, 5, //
+ 6, 7, 8, 9, 10, //
+ };
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.filter_height = 2;
+ params.filter_width = 3;
+ params.stride_height = 1;
+ params.stride_width = 2;
+ params.activation = Activation::RELU6;
+
+ AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(AveragePool2DTest, Quant_Param_NEG)
+{
+ std::pair<float, int32_t> quant_param1 = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
+ std::pair<float, int32_t> quant_param2 = quantizationParams<uint8_t>(-7.875f, 7.875f);
+ Tensor input_tensor{
+ DataType::U8, {1, 2, 4, 1}, {{quant_param1.first}, {quant_param1.second}}, ""};
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param2.first, quant_param2.second);
+
+ std::vector<uint8_t> quant_input = quantize<uint8_t>(
+ {
+ 0, -6, 12, 4, //
+ -3, -2, 10, 7, //
+ },
+ quant_param1.first, quant_param1.second);
+ input_tensor.writeData(quant_input.data(), quant_input.size() * sizeof(uint8_t));
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.filter_height = 2;
+ params.filter_width = 2;
+ params.stride_height = 2;
+ params.stride_width = 2;
+ params.activation = Activation::RELU6;
+
+ AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
Reshape.cpp
Reverse.h
Reverse.cpp
+ Rsqrt.h
+ Rsqrt.cpp
Slice.h
Slice.cpp
Softmax.h
Split.cpp
StridedSlice.h
StridedSlice.cpp
+ Sqrt.h
+ Sqrt.cpp
Squeeze.h
Squeeze.cpp
+ Tanh.h
+ Tanh.cpp
Transpose.h
Transpose.cpp
TransposeConv.h
Unpack.h
Unpack.cpp)
-list(APPEND SOURCES Utils.h Utils.cpp)
+list(APPEND SOURCES Utils.h Utils.cpp ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
add_library(luci_interpreter_kernels STATIC ${SOURCES})
set_target_properties(luci_interpreter_kernels PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(luci_interpreter_kernels PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
target_include_directories(luci_interpreter_kernels SYSTEM PRIVATE
+ "${TensorFlowRuySource_DIR}"
"${TensorFlowGEMMLowpSource_DIR}"
"${TensorFlowEigenSource_DIR}"
"${TensorFlowSource_DIR}")
Pad.test.cpp
Reshape.test.cpp
Reverse.test.cpp
+ Rsqrt.test.cpp
Slice.test.cpp
Softmax.test.cpp
SpaceToDepth.test.cpp
Split.test.cpp
StridedSlice.test.cpp
+ Sqrt.test.cpp
Squeeze.test.cpp
+ Tanh.test.cpp
Transpose.test.cpp
TransposeConv.test.cpp
Unpack.test.cpp)
// We only support (1) and (3) for now.
if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
{
- assert(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
}
else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
{
- assert(bias() == nullptr || bias()->element_type() == DataType::S32);
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
}
else
{
throw std::runtime_error("Unsupported type.");
}
- assert(output()->element_type() == input()->element_type());
+ LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type());
const Shape &input_shape = input()->shape();
const Shape &filter_shape = filter()->shape();
- assert(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
const int32_t batches = input_shape.dim(0);
const int32_t input_height = input_shape.dim(1);
const int32_t output_depth = filter_shape.dim(0);
const int32_t filter_height = filter_shape.dim(1);
const int32_t filter_width = filter_shape.dim(2);
- assert(filter_shape.dim(3) == input_shape.dim(3));
+ LUCI_INTERPRETER_CHECK(filter_shape.dim(3) == input_shape.dim(3));
- assert(bias() == nullptr ||
- (bias()->shape().num_dims() == 1 && bias()->shape().dim(0) == output_depth));
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 &&
+ bias()->shape().dim(0) == output_depth));
const int32_t output_height =
computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
}
+TEST(Conv2DTest, Unsupported_Type_Configure_NEG)
+{
+ Shape input_shape{1, 4, 3, 2};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{2};
+ std::vector<int32_t> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<float> bias_data{1, 2};
+ Tensor input_tensor = makeInputTensor<DataType::S32>(input_shape, input_data);
+ Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+ Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(Conv2DTest, Invalid_Bias_Type_NEG)
+{
+ Shape input_shape{1, 4, 3, 2};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{2};
+ std::vector<float> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<uint8_t> bias_data{1, 2};
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+ Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(Conv2DTest, Invalid_Bias_Data_NEG)
+{
+ Shape input_shape{1, 4, 3, 2};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{3};
+ std::vector<float> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<float> bias_data{1, 2, 3};
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+ Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(Conv2DTest, Invalid_Input_Shape_NEG)
+{
+ Shape input_shape{1, 4, 6, 1};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{2};
+ std::vector<float> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<float> bias_data{1, 2};
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+ Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
assert(input()->element_type() == output()->element_type());
if (input()->element_type() == DataType::U8)
{
- _q_alpha = static_cast<uint8_t>(std::max<float>(
- std::numeric_limits<uint8_t>::min(),
- std::min<float>(std::numeric_limits<uint8_t>::max(),
- std::round(input()->zero_point() + (params().alpha / input()->scale())))));
- double real_multiplier = input()->scale() * input()->scale() / output()->scale();
- quantizeMultiplierSmallerThanOneExp(real_multiplier, &_output_multiplier, &_output_shift);
+ double alpha_multiplier = input()->scale() * params().alpha / output()->scale();
+ quantizeMultiplier(alpha_multiplier, &_output_multiplier_alpha, &_output_shift_alpha);
+ double identity_multiplier = input()->scale() / output()->scale();
+ quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
}
output()->resize(input()->shape());
}
{
tflite::LeakyReluParams op_params{};
op_params.input_offset = input()->zero_point();
- op_params.alpha_offset = input()->zero_point();
op_params.output_offset = output()->zero_point();
-
- op_params.output_multiplier = _output_multiplier;
- op_params.output_shift = _output_shift;
+ op_params.output_multiplier_alpha = _output_multiplier_alpha;
+ op_params.output_shift_alpha = _output_shift_alpha;
+ op_params.output_multiplier_identity = _output_multiplier_identity;
+ op_params.output_shift_identity = _output_shift_identity;
tflite::reference_ops::QuantizeLeakyRelu(
- op_params, _q_alpha, getTensorShape(input()), getTensorData<uint8_t>(input()),
- getTensorShape(output()), getTensorData<uint8_t>(output()));
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
}
} // namespace kernels
void evalQuantized() const;
private:
- uint8_t _q_alpha = 0;
- int32_t _output_multiplier = 0;
- int _output_shift = 0;
+ int32_t _output_multiplier_alpha = 0;
+ int _output_shift_alpha = 0;
+ int32_t _output_multiplier_identity = 0;
+ int _output_shift_identity = 0;
};
} // namespace kernels
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
#include <stdexcept>
if (need_broadcast)
{
- tflite::reference_ops::BroadcastMul4DSlow(
+ tflite::optimized_ops::BroadcastMul4DSlow(
params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
}
else
{
- tflite::reference_ops::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
+ tflite::optimized_ops::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
getTensorShape(input2()), getTensorData<float>(input2()),
getTensorShape(output()), getTensorData<float>(output()));
}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Rsqrt.h"
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Rsqrt::Rsqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Rsqrt::configure()
+{
+ if (input()->element_type() != output()->element_type())
+ {
+ throw std::runtime_error("Input/output tensor data type mismatch.");
+ }
+ output()->resize(input()->shape());
+}
+
+void Rsqrt::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Rsqrt::evalFloat() const
+{
+ auto in = getTensorData<float>(input());
+ auto out = getTensorData<float>(output());
+ auto size = getTensorShape(input()).FlatSize();
+ for (auto i = in; i != in + size; ++i)
+ {
+ *out = 1.f / std::sqrt(*i);
+ ++out;
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RSQRT_H
+#define LUCI_INTERPRETER_KERNELS_RSQRT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Rsqrt : public Kernel
+{
+public:
+ Rsqrt(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RSQRT_H
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Rsqrt.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+ Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""};
+ input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(float));
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Rsqrt kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ArrayFloatNear(output_data)));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(RsqrtTest, SimpleRsqrt)
+{
+ Check(
+ /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+ /*input_data=*/
+ {
+ 5, 4, 8, 2, //
+ 6, 7.5, 9, 0.3, //
+ },
+ /*output_data=*/
+ {
+ 0.44721360, 0.5, 0.35355339, 0.70710678, //
+ 0.40824829, 0.36514837, 0.33333333, 1.8257419, //
+ });
+}
+
+TEST(RsqrtTest, Input_Output_Type_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ Rsqrt kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(RsqrtTest, Invalid_Input_Type_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
+ Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+ Rsqrt kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sqrt.h"
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Sqrt::Sqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Sqrt::configure()
+{
+ if (input()->element_type() != output()->element_type())
+ {
+ throw std::runtime_error("Input/output tensor data type mismatch.");
+ }
+ output()->resize(input()->shape());
+}
+
+void Sqrt::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Sqrt::evalFloat() const
+{
+ auto in = getTensorData<float>(input());
+ auto out = getTensorData<float>(output());
+ auto size = getTensorShape(input()).FlatSize();
+ for (auto i = in; i != in + size; ++i)
+ {
+ *out = std::sqrt(*i);
+ ++out;
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQRT_H
+#define LUCI_INTERPRETER_KERNELS_SQRT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Sqrt : public Kernel
+{
+public:
+ Sqrt(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQRT_H
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sqrt.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+ Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""};
+ input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(float));
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Sqrt kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ArrayFloatNear(output_data)));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(SqrtTest, SimpleSqrt)
+{
+ Check(
+ /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+ /*input_data=*/
+ {
+ 0, 8, 2, 4, //
+ 3, 7, 10, 0.3, //
+ },
+ /*output_data=*/
+ {
+ 0.0, 2.8284271, 1.4142136, 2, //
+ 1.7320508, 2.6457513, 3.1622777, 0.54772256, //
+ });
+}
+
+TEST(SqrtTest, Input_Output_Type_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ Sqrt kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(AddTest, Invalid_Input_Type_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
+ Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+ Sqrt kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Tanh.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Tanh::Tanh(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Tanh::configure()
+{
+ assert(input()->element_type() == output()->element_type());
+ if (input()->element_type() == DataType::U8)
+ {
+ populateLookupTable();
+ }
+ output()->resize(input()->shape());
+}
+
+void Tanh::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Tanh::evalFloat() const
+{
+ tflite::reference_ops::Tanh(getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void Tanh::evalQuantized() const
+{
+ const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+ uint8_t *output_data = getTensorData<uint8_t>(output());
+ const uint8_t *input_data = getTensorData<uint8_t>(input());
+ for (int i = 0; i < size; ++i)
+ {
+ output_data[i] = getTableValue(input_data[i]);
+ }
+}
+
+void Tanh::populateLookupTable()
+{
+ const auto input_scale = static_cast<double>(input()->scale());
+ const auto input_zero_point = static_cast<int32_t>(input()->zero_point());
+ const auto output_scale = static_cast<double>(output()->scale());
+ const auto output_zero_point = static_cast<int32_t>(output()->zero_point());
+ const float inverse_scale = 1 / output_scale;
+ int32_t maxval = std::numeric_limits<uint8_t>::max();
+ int32_t minval = std::numeric_limits<uint8_t>::min();
+ for (int32_t val = minval; val <= maxval; ++val)
+ {
+ const float dequantized = input_scale * (val - input_zero_point);
+ const float transformed = std::tanh(dequantized);
+ const float rescaled = std::round(transformed * inverse_scale);
+ const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
+ setTableValue(static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)),
+ static_cast<uint8_t>(val));
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TANH_H
+#define LUCI_INTERPRETER_KERNELS_TANH_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Tanh : public Kernel
+{
+public:
+ Tanh(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ void evalQuantized() const;
+ void populateLookupTable();
+ void setTableValue(uint8_t value, uint8_t idx) { _table[idx] = value; };
+ uint8_t getTableValue(uint8_t idx) const { return _table[idx]; };
+
+private:
+ uint8_t _table[256]{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TANH_H
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Tanh.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(TanhTest, Float)
+{
+ Shape input_shape{1, 2, 4, 1};
+ std::vector<float> input_data{
+ 0, -6, 2, 4, //
+ 3, -2, 10, 1, //
+ };
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Tanh kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 0, -0.9999877, 0.9640275, 0.999329, //
+ 0.99505475, -0.9640275, 1, 0.7615941, //
+ };
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ElementsAreArray(ArrayFloatNear(ref_output_data)));
+}
+
+TEST(TanhTest, Uint8)
+{
+ float kMin = -1;
+ float kMax = 127.f / 128.f;
+ float kTanhTolerance = 2 * (1. / 256);
+ std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(8 * kMin, 8 * kMax);
+ std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(kMin, kMax);
+ std::vector<float> input_data{
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ };
+ Tensor input_tensor{
+ DataType::U8, {2, 6, 4, 1}, {{input_quant_param.first}, {input_quant_param.second}}, ""};
+ Tensor output_tensor =
+ makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+ std::vector<uint8_t> quantize_input =
+ quantize<uint8_t>(input_data, input_quant_param.first, input_quant_param.second);
+ input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t));
+
+ Tanh kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ };
+ std::vector<int32_t> ref_output_shape{2, 6, 4, 1};
+ EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
+ output_tensor.zero_point()),
+ ElementsAreArray(ArrayFloatNear(ref_output_data, kTanhTolerance)));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
{
TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
- Tensor *output, const TransposeConvParams ¶ms)
- : KernelWithParams<TransposeConvParams>({output_shape, filter, input}, {output}, params)
+ const Tensor *bias, Tensor *output, const TransposeConvParams ¶ms)
+ : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias}, {output}, params)
{
}
op_params.output_multiplier = _output_multiplier;
tflite::reference_ops::TransposeConv(
op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
- getTensorData<float>(filter()), getTensorShape(output()), getTensorData<float>(output()),
- tflite::RuntimeShape(), (float *)nullptr);
+ getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
+ getTensorShape(output()), getTensorData<float>(output()), tflite::RuntimeShape(),
+ (float *)nullptr);
}
void TransposeConv::evalQuantized() const
tflite::reference_ops::TransposeConv(
op_params, getTensorShape(input()), getTensorData<uint8>(input()), getTensorShape(filter()),
- getTensorData<uint8>(filter()), getTensorShape(output()), getTensorData<uint8>(output()),
- tflite::RuntimeShape(), (uint8 *)nullptr, getTensorData<int32_t>(_scratch_tensor.get()));
+ getTensorData<uint8>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+ getTensorShape(output()), getTensorData<uint8>(output()), tflite::RuntimeShape(),
+ (uint8 *)nullptr, getTensorData<int32_t>(_scratch_tensor.get()));
}
} // namespace kernels
{
public:
TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
- Tensor *output, const TransposeConvParams ¶ms);
+ const Tensor *bias, Tensor *output, const TransposeConvParams ¶ms);
const Tensor *output_shape() const { return _inputs[0]; }
const Tensor *filter() const { return _inputs[1]; }
const Tensor *input() const { return _inputs[2]; }
+ const Tensor *bias() const { return _inputs[3]; }
Tensor *output() const { return _outputs[0]; }
void configure() override;
using namespace testing;
-template <typename T>
+template <typename T, typename B>
void Check(std::initializer_list<int32_t> output_shape_shape,
std::initializer_list<int32_t> weight_shape,
std::initializer_list<int32_t> input_data_shape,
- std::initializer_list<int32_t> output_shape,
+ std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
std::initializer_list<int32_t> output_shape_data, std::initializer_list<T> weight_data,
- std::initializer_list<T> input_data_data, std::initializer_list<T> output_data,
- luci::Padding padding, int32_t stride_height, int32_t stride_width,
- DataType element_type)
+ std::initializer_list<T> input_data_data, std::initializer_list<B> bias_data,
+ std::initializer_list<T> output_data, luci::Padding padding, int32_t stride_height,
+ int32_t stride_width, DataType element_type)
{
Tensor output_shape_tensor{element_type, output_shape_shape, {}, ""};
output_shape_tensor.writeData(output_shape_data.begin(), output_shape_data.size() * sizeof(T));
params.stride_height = stride_height;
params.stride_width = stride_width;
- TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &output_tensor,
- params);
- kernel.configure();
- kernel.execute();
-
+ if (bias_data.size() != 0)
+ {
+ Tensor bias_tensor = makeInputTensor<getElementType<B>()>(bias_shape, bias_data);
+ TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &bias_tensor,
+ &output_tensor, params);
+ kernel.configure();
+ kernel.execute();
+ }
+ else
+ {
+ TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, nullptr,
+ &output_tensor, params);
+ kernel.configure();
+ kernel.execute();
+ }
EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
}
TEST(TransposeConvTest, FloatSimple)
{
- Check<float>(
+ Check<float, float>(
/*outputShape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1},
- /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
+ /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
/*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9},
/*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+ /*bias_data=*/{},
/*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365},
/*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1,
getElementType<float>());
TEST(TransposeConvTest, FloatTwoFiltersTest)
{
- Check<float>(
+ Check<float, float>(
/*outputShape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2},
- /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
+ /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
/*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18},
/*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
+ /*bias_data=*/{},
/*output_data=*/{184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968,
3352, 3652, 2760},
/*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1,
SUCCEED();
}
+TEST(TransposeConvTest, SimpleBiasTest)
+{
+ Check<float, float>(
+ /*outputShape_shape=*/{4}, /*weight_shape=*/{2, 3, 3, 1},
+ /*input_shape=*/{1, 2, 2, 1},
+ /*bias_shape=*/{2}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 5, 5, 2},
+ /*weight_data=*/{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18},
+ /*input_data=*/{1, 2, 3, 4},
+ /*bias_data=*/{3, 4},
+ /*output_data=*/{4, 6, 6, 8, 10, 14, 9, 12, 13, 16, 10, 12, 12, 14, 28, 32, 21,
+ 24, 25, 28, 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, 24, 28, 30, 34,
+ 64, 72, 39, 44, 47, 52, 42, 46, 48, 52, 106, 114, 63, 68, 71, 76},
+ /*params.padding=*/luci::Padding::VALID, /*stride_height=*/2, /*stride_width=*/2,
+ getElementType<float>());
+
+ SUCCEED();
+}
+
// TODO Uint8Simple
// Implement GetDequantizedOutput Function.
// Create Test for Uint8 Case
namespace kernels
{
+#define LUCI_INTERPRETER_CHECK(cond) \
+ if (!(cond)) \
+ throw std::runtime_error(std::string(__FILE__) + ":" + std::to_string(__LINE__) + +"(" + \
+ std::string(#cond) + ") was not true.");
+
inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size,
int32_t filter_size, int32_t out_size)
{
#include "kernels/Pad.h"
#include "kernels/Reshape.h"
#include "kernels/Reverse.h"
+#include "kernels/Rsqrt.h"
#include "kernels/Slice.h"
#include "kernels/Softmax.h"
#include "kernels/SpaceToDepth.h"
#include "kernels/Split.h"
#include "kernels/StridedSlice.h"
+#include "kernels/Sqrt.h"
#include "kernels/Squeeze.h"
+#include "kernels/Tanh.h"
#include "kernels/Unpack.h"
#include "kernels/Transpose.h"
#include "kernels/TransposeConv.h"
return std::make_unique<kernels::Reverse>(input, axes, output);
}
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleRsqrt *node)
+{
+ assert(node->arity() == 1);
+
+ const Tensor *input = getInputTensor(node->x());
+ Tensor *output = getOutputTensor(node);
+
+ return std::make_unique<kernels::Rsqrt>(input, output);
+}
+
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSlice *node)
{
assert(node->arity() == 3);
return std::make_unique<kernels::Split>(axis, input, std::move(outputs));
}
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqrt *node)
+{
+ assert(node->arity() == 1);
+
+ const Tensor *input = getInputTensor(node->x());
+ Tensor *output = getOutputTensor(node);
+
+ return std::make_unique<kernels::Sqrt>(input, output);
+}
+
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqueeze *node)
{
assert(node->arity() == 1);
return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
}
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTanh *node)
+{
+ assert(node->arity() == 1);
+
+ const Tensor *input = getInputTensor(node->x());
+ Tensor *output = getOutputTensor(node);
+
+ return std::make_unique<kernels::Tanh>(input, output);
+}
+
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTranspose *node)
{
assert(node->arity() == 2);
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTransposeConv *node)
{
- assert(node->arity() == 3);
+ assert(node->arity() == 4);
const Tensor *input_sizes = getInputTensor(node->inputSizes());
const Tensor *filter = getInputTensor(node->filter());
const Tensor *out_backprop = getInputTensor(node->outBackprop());
+ const Tensor *bias = getOptionalInputTensor(node->bias());
Tensor *output = getOutputTensor(node);
params.stride_height = node->stride()->h();
params.stride_width = node->stride()->w();
- return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, output,
+ return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output,
params);
}
std::unique_ptr<Kernel> visit(const luci::CirclePad *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleReshape *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleReverseV2 *node) override;
+ std::unique_ptr<Kernel> visit(const luci::CircleRsqrt *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleSlice *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleSoftmax *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleSpaceToDepth *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleSplit *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleStridedSlice *node) override;
+ std::unique_ptr<Kernel> visit(const luci::CircleSqrt *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleSqueeze *node) override;
+ std::unique_ptr<Kernel> visit(const luci::CircleTanh *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleTranspose *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleTransposeConv *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleUnpack *node) override;
#include <kernels/Pad.h>
#include <kernels/Reshape.h>
#include <kernels/Reverse.h>
+#include <kernels/Rsqrt.h>
#include <kernels/Slice.h>
#include <kernels/Softmax.h>
#include <kernels/SpaceToDepth.h>
#include <kernels/Split.h>
+#include <kernels/Sqrt.h>
#include <kernels/Squeeze.h>
#include <kernels/StridedSlice.h>
+#include <kernels/Tanh.h>
#include <kernels/Transpose.h>
#include <kernels/TransposeConv.h>
#include <kernels/Unpack.h>
checkTensor(kernel->output(), op);
}
+TEST_F(KernelBuilderTest, Rsqrt)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleRsqrt>();
+ op->x(input);
+
+ auto kernel = buildKernel<kernels::Rsqrt>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
TEST_F(KernelBuilderTest, Slice)
{
auto *input = createInputNode();
checkTensor(kernel->output(1), output2);
}
+TEST_F(KernelBuilderTest, Sqrt)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleSqrt>();
+ op->x(input);
+
+ auto kernel = buildKernel<kernels::Sqrt>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
TEST_F(KernelBuilderTest, Squeeze)
{
auto *input = createInputNode();
EXPECT_THAT(kernel->params().shrink_axis_mask, Eq(op->shrink_axis_mask()));
}
+TEST_F(KernelBuilderTest, Tanh)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleTanh>();
+ op->x(input);
+
+ auto kernel = buildKernel<kernels::Tanh>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
TEST_F(KernelBuilderTest, Transpose)
{
auto *input = createInputNode();
auto *output_shape = createInputNode();
auto *filter = createInputNode();
auto *input = createInputNode();
+ auto *bias = createInputNode();
auto *op = createNode<luci::CircleTransposeConv>();
op->inputSizes(output_shape);
op->filter(filter);
op->outBackprop(input);
+ op->bias(bias);
op->padding(luci::Padding::SAME);
op->stride()->h(11);
checkTensor(kernel->filter(), filter);
checkTensor(kernel->input(), input);
checkTensor(kernel->output(), op);
+ checkTensor(kernel->bias(), bias);
EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
set(SRCS_EVAL_TESTER
src/EvalTester.cpp
- src/CircleExpContract.h
- src/CircleExpContract.cpp
)
add_executable(luci_eval_tester ${SRCS_EVAL_TESTER})
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_VALUE_TEST_CIRCLEXPCONTRACT_H__
-#define __LUCI_VALUE_TEST_CIRCLEXPCONTRACT_H__
-
-#include <loco.h>
-#include <luci/CircleExporter.h>
-#include <luci/IR/Module.h>
-
-#include <memory>
-#include <string>
-
-struct CircleExpContract : public luci::CircleExporter::Contract
-{
-public:
- CircleExpContract(luci::Module *module, const std::string &filename)
- : _module(module), _filepath(filename)
- {
- // NOTHING TO DO
- }
- virtual ~CircleExpContract() = default;
-
-public:
- loco::Graph *graph(void) const final { return nullptr; }
- luci::Module *module(void) const final { return _module; };
-
-public:
- bool store(const char *ptr, const size_t size) const final;
-
-private:
- luci::Module *_module;
- const std::string _filepath;
-};
-
-#endif // __LUCI_VALUE_TEST_CIRCLEXPCONTRACT_H__
* limitations under the License.
*/
-#include "CircleExpContract.h"
-
#include <luci/Importer.h>
#include <luci_interpreter/Interpreter.h>
+#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
#include <cstdlib>
#include <fstream>
// Export to a Circle file
luci::CircleExporter exporter;
- CircleExpContract contract(initial_module.get(), intermediate_filename);
+
+ luci::CircleFileExpContract contract(initial_module.get(), intermediate_filename);
+
if (!exporter.invoke(&contract))
{
std::cerr << "ERROR: Failed to export '" << intermediate_filename << "'" << std::endl;
* limitations under the License.
*/
-#ifndef __RECORD_MINMAX_CIRCLEXPCONTRACT_H__
-#define __RECORD_MINMAX_CIRCLEXPCONTRACT_H__
+#ifndef __LUCI_CIRCLEFILEEXPCONTRACT_H__
+#define __LUCI_CIRCLEFILEEXPCONTRACT_H__
#include <loco.h>
#include <luci/CircleExporter.h>
#include <luci/IR/Module.h>
+#include <oops/InternalExn.h>
#include <string>
+#include <fstream>
+#include <iostream>
-namespace record_minmax
+namespace luci
{
-struct CircleExpContract : public luci::CircleExporter::Contract
+struct CircleFileExpContract : public luci::CircleExporter::Contract
{
public:
- CircleExpContract(luci::Module *module, const std::string &filename)
+ CircleFileExpContract(luci::Module *module, const std::string &filename)
: _module(module), _filepath(filename)
{
// NOTHING TO DO
}
- virtual ~CircleExpContract() = default;
+ virtual ~CircleFileExpContract() = default;
public:
loco::Graph *graph(void) const final { return nullptr; }
- luci::Module *module(void) const final { return _module; };
+ luci::Module *module(void) const final { return _module; }
public:
- bool store(const char *ptr, const size_t size) const final;
+ bool store(const char *ptr, const size_t size) const final
+ {
+ if (!ptr)
+ INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
+
+ std::ofstream fs(_filepath, std::ofstream::binary);
+ fs.write(ptr, size);
+
+ return fs.good();
+ }
private:
luci::Module *_module;
const std::string _filepath;
};
-} // namespace record_minmax
+} // namespace luci
-#endif // __RECORD_MINMAX_CIRCLEXPCONTRACT_H__
+#endif // __LUCI_CIRCLEFILEEXPCONTRACT_H__
using namespace luci;
+struct ExportContext
+{
+ FlatBufferBuilder &builder;
+ SerializedModelData &md;
+ SerializedGraphData &gd;
+};
+
+/**
+ * @brief Exports CircleMaxPool2D or CircleAveragePool2D
+ *
+ * @note CirclePool2D should be one of CircleMaxPool2D or CircleAveragePool2D
+ */
+template <class CirclePool2D>
+void export_pool_2d(ExportContext &ctx, CirclePool2D *node, circle::BuiltinOperator builtin_op)
+{
+ LUCI_ASSERT(builtin_op == circle::BuiltinOperator_MAX_POOL_2D ||
+ builtin_op == circle::BuiltinOperator_L2_POOL_2D ||
+ builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
+ "Should be L2Pool, MaxPool or AvgPool");
+ LUCI_ASSERT(node->padding() != luci::Padding::UNDEFINED, "Padding is not set");
+
+ uint32_t op_idx = ctx.md.registerBuiltinOpcode(builtin_op, node->op_version());
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+
+ circle::Padding padding = getOpPadding(node->padding());
+
+ auto options = CreatePool2DOptions(ctx.builder, padding, node->stride()->w(), node->stride()->h(),
+ node->filter()->w(), node->filter()->h(),
+ to_circle_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_Pool2DOptions, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+/**
+ * @brief export simple nodes
+ */
+void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator bop,
+ circle::BuiltinOptions bot, flatbuffers::Offset<void> options_offset)
+{
+ uint32_t op_idx =
+ ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
+ std::vector<int32_t> inputs_vec;
+ std::vector<int32_t> outputs_vec{get_tensor_index(node)};
+ for (uint32_t i = 0; i < node->arity(); ++i)
+ inputs_vec.push_back(get_tensor_index(node->arg(i)));
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, bot, options_offset);
+ ctx.gd._operators.push_back(op_offset);
+}
+
+/**
+ * @brief export simple nodes having void options
+ */
+void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator bop)
+{
+ uint32_t op_idx =
+ ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
+ std::vector<int32_t> inputs_vec;
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ for (uint32_t i = 0; i < node->arity(); ++i)
+ inputs_vec.push_back(get_tensor_index(node->arg(i)));
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs);
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleAddN *node)
+{
+ uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_ADD_N, node->op_version());
+ std::vector<int32_t> inputs_vec;
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+
+ for (uint32_t i = 0; i < node->arity(); ++i)
+ inputs_vec.push_back(get_tensor_index(node->inputs(i)));
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateAddNOptions(ctx.builder);
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_AddNOptions, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleCast *node)
+{
+ uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CAST, node->op_version());
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+
+ flatbuffers::Offset<Operator> op_offset;
+ if (node->out_data_type() != loco::DataType::Unknown)
+ {
+ auto options = CreateCastOptions(ctx.builder, to_circle_tensortype(node->in_data_type()),
+ to_circle_tensortype(node->out_data_type()));
+ op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_CastOptions, options.Union());
+ }
+ else
+ {
+ op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs);
+ }
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleConcatenation *node)
+{
+ uint32_t op_idx =
+ ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION, node->op_version());
+ std::vector<int32_t> inputs_vec;
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+
+ for (uint32_t i = 0; i < node->numValues(); ++i)
+ inputs_vec.push_back(get_tensor_index(node->values(i)));
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateConcatenationOptions(ctx.builder, node->axis(),
+ to_circle_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_ConcatenationOptions, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleCustom *node)
+{
+ auto custom_outputs = loco::succs(node);
+
+ uint32_t op_idx = ctx.md.registerCustomOpcode(node->custom_code());
+ std::vector<int32_t> inputs_vec;
+ std::vector<int32_t> outputs_vec;
+
+ for (uint32_t index = 0; index < node->numInputs(); index++)
+ {
+ inputs_vec.push_back(get_tensor_index(node->inputs(index)));
+ }
+ for (uint32_t index = 0; index < custom_outputs.size(); index++)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : custom_outputs)
+ {
+ auto custom_out = loco::must_cast<luci::CircleCustomOut *>(out);
+ if (custom_out->index() == static_cast<int32_t>(index))
+ {
+ outputs_vec.push_back(get_tensor_index(custom_out));
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ INTERNAL_EXN("Invalid Custom output");
+ }
+ }
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> circle_custom_options;
+ std::vector<uint8_t> custom_options_vec{node->custom_options().begin(),
+ node->custom_options().end()};
+ circle_custom_options = ctx.builder.CreateVector(custom_options_vec);
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, circle::BuiltinOptions_NONE,
+ flatbuffers::Offset<void>(), circle_custom_options);
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleIf *node)
+{
+ auto if_outs = loco::succs(node);
+ assert(if_outs.size() == node->output_count());
+
+ uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_IF, node->op_version());
+ std::vector<int32_t> inputs_vec;
+ std::vector<int32_t> outputs_vec;
+
+ inputs_vec.push_back(get_tensor_index(node->cond()));
+ for (uint32_t idx = 0; idx < node->input_count(); ++idx)
+ inputs_vec.push_back(get_tensor_index(node->input(idx)));
+
+ for (uint32_t idx = 0; idx < node->output_count(); ++idx)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : if_outs)
+ {
+ auto if_out = loco::must_cast<luci::CircleIfOut *>(out);
+ if (if_out->index() == static_cast<int32_t>(idx))
+ {
+ outputs_vec.push_back(get_tensor_index(if_out));
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ INTERNAL_EXN("Invalid CircleIf output");
+ }
+ }
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateIfOptions(ctx.builder, node->then_branch(), node->else_branch());
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_IfOptions, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV4 *node)
+{
+ auto nms_outs = loco::succs(node);
+ assert(nms_outs.size() == 2);
+
+ uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V4,
+ node->op_version());
+ std::vector<int32_t> inputs_vec{
+ get_tensor_index(node->boxes()), get_tensor_index(node->scores()),
+ get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
+ get_tensor_index(node->score_threshold()),
+ };
+ std::vector<int32_t> outputs_vec;
+
+ for (uint32_t idx = 0; idx < nms_outs.size(); ++idx)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : nms_outs)
+ {
+ auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(out);
+ if (nms_out->index() == static_cast<int32_t>(idx))
+ {
+ outputs_vec.push_back(get_tensor_index(nms_out));
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ INTERNAL_EXN("Invalid NonMaxSuppressionV4 output");
+ }
+ }
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateNonMaxSuppressionV4Options(ctx.builder);
+ auto op_offset =
+ CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_NonMaxSuppressionV4Options, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV5 *node)
+{
+ auto nms_outs = loco::succs(node);
+ assert(nms_outs.size() == 3);
+
+ uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V5,
+ node->op_version());
+ std::vector<int32_t> inputs_vec{
+ get_tensor_index(node->boxes()), get_tensor_index(node->scores()),
+ get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
+ get_tensor_index(node->score_threshold()), get_tensor_index(node->soft_nms_sigma()),
+ };
+ std::vector<int32_t> outputs_vec;
+
+ for (uint32_t idx = 0; idx < nms_outs.size(); ++idx)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : nms_outs)
+ {
+ auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV5Out *>(out);
+ if (nms_out->index() == static_cast<int32_t>(idx))
+ {
+ outputs_vec.push_back(get_tensor_index(nms_out));
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ INTERNAL_EXN("Invalid NonMaxSuppressionV5 output");
+ }
+ }
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateNonMaxSuppressionV5Options(ctx.builder);
+ auto op_offset =
+ CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_NonMaxSuppressionV5Options, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleReverseV2 *node)
+{
+ uint32_t op_idx =
+ ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_REVERSE_V2, node->op_version());
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->tensor()), get_tensor_index(node->axis())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateReverseV2Options(ctx.builder);
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_ReverseSequenceOptions, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleSplit *node)
+{
+ auto split_outs = loco::succs(node);
+ assert(int32_t(split_outs.size()) == node->num_split());
+
+ uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT, node->op_version());
+ // NOTE BuiltinOperator_SPLIT input is placed at second position
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->split_dim()),
+ get_tensor_index(node->input())};
+ std::vector<int32_t> outputs_vec;
+
+ for (int32_t index = 0; index < node->num_split(); index++)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : split_outs)
+ {
+ auto split_out = loco::must_cast<luci::CircleSplitOut *>(out);
+ if (split_out->index() == index)
+ {
+ outputs_vec.push_back(get_tensor_index(split_out));
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ INTERNAL_EXN("Invalid Split output");
+ }
+ }
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateSplitOptions(ctx.builder, node->num_split());
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_SplitOptions, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleSplitV *node)
+{
+ auto split_outs = loco::succs(node);
+ assert(int32_t(split_outs.size()) == node->num_split());
+
+ uint32_t op_idx =
+ ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT_V, node->op_version());
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
+ get_tensor_index(node->size_splits()),
+ get_tensor_index(node->split_dim())};
+ std::vector<int32_t> outputs_vec;
+
+ for (int32_t index = 0; index < node->num_split(); index++)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : split_outs)
+ {
+ auto split_out = loco::must_cast<luci::CircleSplitVOut *>(out);
+ if (split_out->index() == index)
+ {
+ outputs_vec.push_back(get_tensor_index(split_out));
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ INTERNAL_EXN("Invalid SplitV output");
+ }
+ }
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateSplitVOptions(ctx.builder, node->num_split());
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_SplitVOptions, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleTopKV2 *node)
+{
+ auto topkv2_outs = loco::succs(node);
+ int outs_count = int32_t(topkv2_outs.size());
+ assert(outs_count == 2);
+
+ uint32_t op_idx =
+ ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_TOPK_V2, node->op_version());
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->k())};
+ std::vector<int32_t> outputs_vec;
+
+ for (int32_t index = 0; index < outs_count; index++)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : topkv2_outs)
+ {
+ auto topkv2_out = loco::must_cast<luci::CircleTopKV2Out *>(out);
+ if (topkv2_out->index() == index)
+ {
+ outputs_vec.push_back(get_tensor_index(topkv2_out));
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ INTERNAL_EXN("Invalid TopKV2 output");
+ }
+ }
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateTopKV2Options(ctx.builder);
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_TopKV2Options, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleUnique *node)
+{
+ auto unique_outs = loco::succs(node);
+ assert(int32_t(unique_outs.size()) == 2);
+ uint32_t op_idx =
+ ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNIQUE, node->op_version());
+
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
+ std::vector<int32_t> outputs_vec;
+
+ for (int32_t index = 0; index < 2; index++)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : unique_outs)
+ {
+ auto unique_out = loco::must_cast<luci::CircleUniqueOut *>(out);
+ if (unique_out->index() == index)
+ {
+ outputs_vec.push_back(get_tensor_index(unique_out));
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ INTERNAL_EXN("Invalid Unique output");
+ }
+ }
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateUniqueOptions(ctx.builder, to_circle_tensortype(node->idx_out_type()));
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_UniqueOptions, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleUnpack *node)
+{
+ LOGGER(l);
+ auto settings = luci::UserSettings::settings();
+
+ auto unpack_outs = loco::succs(node);
+ // NOTE real models may not use all of the outputs
+ if (static_cast<int32_t>(unpack_outs.size()) != node->num())
+ {
+ if (settings->get(luci::UserSettings::Key::DisableValidation))
+ {
+ WARN(l) << "Warning: export Unpack(" << node->name() << ") 'num' not same as outputs";
+ }
+ else
+ assert(false);
+ }
+
+ uint32_t op_idx =
+ ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNPACK, node->op_version());
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
+ std::vector<int32_t> outputs_vec;
+
+ for (int32_t index = 0; index < node->num(); index++)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : unpack_outs)
+ {
+ auto unpack_out = loco::must_cast<luci::CircleUnpackOut *>(out);
+ if (unpack_out->index() == index)
+ {
+ outputs_vec.push_back(get_tensor_index(unpack_out));
+ found = true;
+ break;
+ }
+ }
+ // NOTE real models may not use all of the outputs
+ if (!found)
+ {
+ if (settings->get(luci::UserSettings::Key::DisableValidation))
+ {
+ WARN(l) << "Warning: export Unpack(" << node->name() << ") output " << index << " not used";
+ }
+ else
+ assert(false);
+ }
+ }
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateUnpackOptions(ctx.builder, node->num(), node->axis());
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_UnpackOptions, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleWhile *node)
+{
+ auto while_outs = loco::succs(node);
+ assert(while_outs.size() == node->output_count());
+
+ uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_WHILE, node->op_version());
+ std::vector<int32_t> inputs_vec;
+ std::vector<int32_t> outputs_vec;
+
+ for (uint32_t idx = 0; idx < node->input_count(); ++idx)
+ inputs_vec.push_back(get_tensor_index(node->input(idx)));
+
+ for (uint32_t idx = 0; idx < node->output_count(); ++idx)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : while_outs)
+ {
+ auto while_out = loco::must_cast<luci::CircleWhileOut *>(out);
+ if (while_out->index() == static_cast<int32_t>(idx))
+ {
+ outputs_vec.push_back(get_tensor_index(while_out));
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ INTERNAL_EXN("Invalid CircleWhile output");
+ }
+ }
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateWhileOptions(ctx.builder, node->cond_branch(), node->body_branch());
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_WhileOptions, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
class OperationExporter final : public luci::CircleNodeMutableVisitor<void>,
public loco::CanonicalNodeMutableVisitor<void>
{
public:
- OperationExporter(FlatBufferBuilder &fbb, SerializedModelData &m, SerializedGraphData &g)
- : builder{fbb}, md{m}, gd{g}
+ OperationExporter(ExportContext &ctx) : _ctx{ctx}
{
// DO NOTHING
}
void visit(luci::CircleMul *) final;
void visit(luci::CircleNeg *) final;
void visit(luci::CircleNonMaxSuppressionV4 *) final;
+ void visit(luci::CircleNonMaxSuppressionV5 *) final;
void visit(luci::CircleNotEqual *) final;
void visit(luci::CircleOneHot *) final;
void visit(luci::CirclePack *) final;
void visit(luci::CirclePad *) final;
+ void visit(luci::CirclePadV2 *) final;
void visit(luci::CirclePow *) final;
void visit(luci::CirclePRelu *) final;
void visit(luci::CircleRange *) final;
void visit(luci::CircleCustomOut *) final {}
void visit(luci::CircleIfOut *) final {}
void visit(luci::CircleNonMaxSuppressionV4Out *) final {}
+ void visit(luci::CircleNonMaxSuppressionV5Out *) final {}
void visit(luci::CircleSplitOut *) final {}
void visit(luci::CircleSplitVOut *) final {}
void visit(luci::CircleTopKV2Out *) final {}
private:
/**
- * @brief Exports CircleMaxPool2D or CircleAveragePool2D
- *
- * @note CirclePool2D should be one of CircleMaxPool2D or CircleAveragePool2D
- */
- template <class CirclePool2D>
- void export_pool_2d(CirclePool2D *node, circle::BuiltinOperator builtin_op);
-
- /**
* @brief export simple nodes
*/
void export_simple(loco::Node *node, circle::BuiltinOperator bop, circle::BuiltinOptions bot,
void export_simple(loco::Node *node, circle::BuiltinOperator bop);
private:
- FlatBufferBuilder &builder;
- SerializedModelData &md;
- SerializedGraphData &gd;
+ ExportContext &_ctx;
};
-template <class CirclePool2D>
-void OperationExporter::export_pool_2d(CirclePool2D *node, circle::BuiltinOperator builtin_op)
-{
- LUCI_ASSERT(builtin_op == circle::BuiltinOperator_MAX_POOL_2D ||
- builtin_op == circle::BuiltinOperator_L2_POOL_2D ||
- builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
- "Should be L2Pool, MaxPool or AvgPool");
- LUCI_ASSERT(node->padding() != luci::Padding::UNDEFINED, "Padding is not set");
-
- uint32_t op_idx = md.registerBuiltinOpcode(builtin_op, node->op_version());
- std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
- std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
-
- circle::Padding padding = getOpPadding(node->padding());
-
- auto options = CreatePool2DOptions(builder, padding, node->stride()->w(), node->stride()->h(),
- node->filter()->w(), node->filter()->h(),
- to_circle_actfunc(node->fusedActivationFunction()));
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_Pool2DOptions, options.Union());
- gd._operators.push_back(op_offset);
-}
-
void OperationExporter::export_simple(loco::Node *node, circle::BuiltinOperator bop,
circle::BuiltinOptions bot,
flatbuffers::Offset<void> options_offset)
{
- uint32_t op_idx =
- md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
- std::vector<int32_t> inputs_vec;
- std::vector<int32_t> outputs_vec{get_tensor_index(node)};
- for (uint32_t i = 0; i < node->arity(); ++i)
- inputs_vec.push_back(get_tensor_index(node->arg(i)));
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs, bot, options_offset);
- gd._operators.push_back(op_offset);
+ export_node(_ctx, node, bop, bot, options_offset);
}
void OperationExporter::export_simple(loco::Node *node, circle::BuiltinOperator bop)
{
- uint32_t op_idx =
- md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
- std::vector<int32_t> inputs_vec;
- std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
- for (uint32_t i = 0; i < node->arity(); ++i)
- inputs_vec.push_back(get_tensor_index(node->arg(i)));
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
- gd._operators.push_back(op_offset);
+ export_node(_ctx, node, bop);
}
void OperationExporter::visit(luci::CircleAbs *node)
{
export_simple(node, circle::BuiltinOperator_ABS, circle::BuiltinOptions_AbsOptions,
- CreateAbsOptions(builder).Union());
+ CreateAbsOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleAdd *node)
{
export_simple(
node, circle::BuiltinOperator_ADD, circle::BuiltinOptions_AddOptions,
- CreateAddOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+ CreateAddOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
}
-void OperationExporter::visit(luci::CircleAddN *node)
-{
- uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_ADD_N, node->op_version());
- std::vector<int32_t> inputs_vec;
- std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-
- for (uint32_t i = 0; i < node->arity(); ++i)
- inputs_vec.push_back(get_tensor_index(node->inputs(i)));
-
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateAddNOptions(builder);
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_AddNOptions, options.Union());
- gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleAddN *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleArgMax *node)
{
- export_simple(node, circle::BuiltinOperator_ARG_MAX, circle::BuiltinOptions_ArgMaxOptions,
- CreateArgMaxOptions(builder, to_circle_tensortype(node->output_type())).Union());
+ export_simple(
+ node, circle::BuiltinOperator_ARG_MAX, circle::BuiltinOptions_ArgMaxOptions,
+ CreateArgMaxOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
}
void OperationExporter::visit(luci::CircleArgMin *node)
{
- export_simple(node, circle::BuiltinOperator_ARG_MIN, circle::BuiltinOptions_ArgMinOptions,
- CreateArgMinOptions(builder, to_circle_tensortype(node->output_type())).Union());
+ export_simple(
+ node, circle::BuiltinOperator_ARG_MIN, circle::BuiltinOptions_ArgMinOptions,
+ CreateArgMinOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
}
void OperationExporter::visit(luci::CircleAveragePool2D *node)
{
- export_pool_2d<luci::CircleAveragePool2D>(node, circle::BuiltinOperator_AVERAGE_POOL_2D);
+ export_pool_2d<luci::CircleAveragePool2D>(_ctx, node, circle::BuiltinOperator_AVERAGE_POOL_2D);
}
void OperationExporter::visit(luci::CircleBatchMatMul *node)
{
export_simple(node, circle::BuiltinOperator_BATCH_MATMUL,
circle::BuiltinOptions_BatchMatMulOptions,
- CreateBatchMatMulOptions(builder, node->adj_x(), node->adj_y()).Union());
+ CreateBatchMatMulOptions(_ctx.builder, node->adj_x(), node->adj_y()).Union());
}
-void OperationExporter::visit(luci::CircleCast *node)
-{
- uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_CAST, node->op_version());
- std::vector<int32_t> inputs_vec{get_tensor_index(node->x())};
- std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
-
- flatbuffers::Offset<Operator> op_offset;
- if (node->out_data_type() != loco::DataType::Unknown)
- {
- auto options = CreateCastOptions(builder, to_circle_tensortype(node->in_data_type()),
- to_circle_tensortype(node->out_data_type()));
- op_offset = CreateOperator(builder, op_idx, inputs, outputs, circle::BuiltinOptions_CastOptions,
- options.Union());
- }
- else
- {
- op_offset = CreateOperator(builder, op_idx, inputs, outputs);
- }
- gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleCast *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleCeil *node)
{
export_simple(node, circle::BuiltinOperator_CEIL);
}
-void OperationExporter::visit(luci::CircleConcatenation *node)
-{
- uint32_t op_idx =
- md.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION, node->op_version());
- std::vector<int32_t> inputs_vec;
- std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-
- for (uint32_t i = 0; i < node->numValues(); ++i)
- inputs_vec.push_back(get_tensor_index(node->values(i)));
-
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateConcatenationOptions(builder, node->axis(),
- to_circle_actfunc(node->fusedActivationFunction()));
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_ConcatenationOptions, options.Union());
- gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleConcatenation *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleBatchToSpaceND *node)
{
export_simple(node, circle::BuiltinOperator_BATCH_TO_SPACE_ND,
circle::BuiltinOptions_BatchToSpaceNDOptions,
- CreateBatchToSpaceNDOptions(builder).Union());
+ CreateBatchToSpaceNDOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleConv2D *node)
{
export_simple(node, circle::BuiltinOperator_CONV_2D, circle::BuiltinOptions_Conv2DOptions,
- CreateConv2DOptions(builder, getOpPadding(node->padding()), node->stride()->w(),
- node->stride()->h(),
+ CreateConv2DOptions(_ctx.builder, getOpPadding(node->padding()),
+ node->stride()->w(), node->stride()->h(),
to_circle_actfunc(node->fusedActivationFunction()),
node->dilation()->w(), node->dilation()->h())
.Union());
void OperationExporter::visit(luci::CircleCos *node)
{
export_simple(node, circle::BuiltinOperator_COS, circle::BuiltinOptions_CosOptions,
- CreateCosOptions(builder).Union());
+ CreateCosOptions(_ctx.builder).Union());
}
-void OperationExporter::visit(luci::CircleCustom *node)
-{
- auto custom_outputs = loco::succs(node);
-
- uint32_t op_idx = md.registerCustomOpcode(node->custom_code());
- std::vector<int32_t> inputs_vec;
- std::vector<int32_t> outputs_vec;
-
- for (uint32_t index = 0; index < node->numInputs(); index++)
- {
- inputs_vec.push_back(get_tensor_index(node->inputs(index)));
- }
- for (uint32_t index = 0; index < custom_outputs.size(); index++)
- {
- // store in order of index
- bool found = false;
- for (auto out : custom_outputs)
- {
- auto custom_out = loco::must_cast<luci::CircleCustomOut *>(out);
- if (custom_out->index() == static_cast<int32_t>(index))
- {
- outputs_vec.push_back(get_tensor_index(custom_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid Custom output");
- }
- }
-
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> circle_custom_options;
- std::vector<uint8_t> custom_options_vec{node->custom_options().begin(),
- node->custom_options().end()};
- circle_custom_options = builder.CreateVector(custom_options_vec);
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs, circle::BuiltinOptions_NONE,
- flatbuffers::Offset<void>(), circle_custom_options);
- gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleCustom *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleDepthToSpace *node)
{
export_simple(node, circle::BuiltinOperator_DEPTH_TO_SPACE,
circle::BuiltinOptions_DepthToSpaceOptions,
- CreateDepthToSpaceOptions(builder, node->block_size()).Union());
+ CreateDepthToSpaceOptions(_ctx.builder, node->block_size()).Union());
}
void OperationExporter::visit(luci::CircleDepthwiseConv2D *node)
{
export_simple(node, circle::BuiltinOperator_DEPTHWISE_CONV_2D,
circle::BuiltinOptions_DepthwiseConv2DOptions,
- CreateDepthwiseConv2DOptions(builder, getOpPadding(node->padding()),
+ CreateDepthwiseConv2DOptions(_ctx.builder, getOpPadding(node->padding()),
node->stride()->w(), node->stride()->h(),
node->depthMultiplier(),
to_circle_actfunc(node->fusedActivationFunction()),
{
export_simple(
node, circle::BuiltinOperator_DIV, circle::BuiltinOptions_DivOptions,
- CreateDivOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+ CreateDivOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
}
void OperationExporter::visit(luci::CircleElu *node)
void OperationExporter::visit(luci::CircleEqual *node)
{
export_simple(node, circle::BuiltinOperator_EQUAL, circle::BuiltinOptions_EqualOptions,
- CreateEqualOptions(builder).Union());
+ CreateEqualOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleExp *node)
{
export_simple(node, circle::BuiltinOperator_EXP, circle::BuiltinOptions_ExpOptions,
- CreateExpOptions(builder).Union());
+ CreateExpOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleExpandDims *node)
{
export_simple(node, circle::BuiltinOperator_EXPAND_DIMS, circle::BuiltinOptions_ExpandDimsOptions,
- CreateExpandDimsOptions(builder).Union());
+ CreateExpandDimsOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleFill *node)
{
export_simple(node, circle::BuiltinOperator_FILL, circle::BuiltinOptions_FillOptions,
- CreateFillOptions(builder).Union());
+ CreateFillOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleFloor *node)
void OperationExporter::visit(luci::CircleFloorDiv *node)
{
export_simple(node, circle::BuiltinOperator_FLOOR_DIV, circle::BuiltinOptions_FloorDivOptions,
- CreateFloorDivOptions(builder).Union());
+ CreateFloorDivOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleFloorMod *node)
{
export_simple(node, circle::BuiltinOperator_FLOOR_MOD, circle::BuiltinOptions_FloorModOptions,
- CreateFloorModOptions(builder).Union());
+ CreateFloorModOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleFullyConnected *node)
{
export_simple(
node, circle::BuiltinOperator_FULLY_CONNECTED, circle::BuiltinOptions_FullyConnectedOptions,
- CreateFullyConnectedOptions(builder, to_circle_actfunc(node->fusedActivationFunction()))
+ CreateFullyConnectedOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()))
.Union());
}
void OperationExporter::visit(luci::CircleGather *node)
{
export_simple(node, circle::BuiltinOperator_GATHER, circle::BuiltinOptions_GatherOptions,
- CreateGatherOptions(builder, node->axis()).Union());
+ CreateGatherOptions(_ctx.builder, node->axis()).Union());
}
void OperationExporter::visit(luci::CircleGatherNd *node)
{
export_simple(node, circle::BuiltinOperator_GATHER_ND, circle::BuiltinOptions_GatherNdOptions,
- CreateGatherNdOptions(builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleGreater *node)
-{
- export_simple(node, circle::BuiltinOperator_GREATER, circle::BuiltinOptions_GreaterOptions,
- CreateGreaterOptions(builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleGreaterEqual *node)
-{
- export_simple(node, circle::BuiltinOperator_GREATER_EQUAL,
- circle::BuiltinOptions_GreaterEqualOptions,
- CreateGreaterEqualOptions(builder).Union());
+ CreateGatherNdOptions(_ctx.builder).Union());
}
-void OperationExporter::visit(luci::CircleIf *node)
-{
- auto if_outs = loco::succs(node);
- assert(if_outs.size() == node->output_count());
-
- uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_IF, node->op_version());
- std::vector<int32_t> inputs_vec;
- std::vector<int32_t> outputs_vec;
-
- inputs_vec.push_back(get_tensor_index(node->cond()));
- for (uint32_t idx = 0; idx < node->input_count(); ++idx)
- inputs_vec.push_back(get_tensor_index(node->input(idx)));
-
- for (uint32_t idx = 0; idx < node->output_count(); ++idx)
- {
- // store in order of index
- bool found = false;
- for (auto out : if_outs)
- {
- auto if_out = loco::must_cast<luci::CircleIfOut *>(out);
- if (if_out->index() == static_cast<int32_t>(idx))
- {
- outputs_vec.push_back(get_tensor_index(if_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid CircleIf output");
- }
- }
+void OperationExporter::visit(luci::CircleGreater *node)
+{
+ export_simple(node, circle::BuiltinOperator_GREATER, circle::BuiltinOptions_GreaterOptions,
+ CreateGreaterOptions(_ctx.builder).Union());
+}
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateIfOptions(builder, node->then_branch(), node->else_branch());
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_IfOptions, options.Union());
- gd._operators.push_back(op_offset);
+void OperationExporter::visit(luci::CircleGreaterEqual *node)
+{
+ export_simple(node, circle::BuiltinOperator_GREATER_EQUAL,
+ circle::BuiltinOptions_GreaterEqualOptions,
+ CreateGreaterEqualOptions(_ctx.builder).Union());
}
+void OperationExporter::visit(luci::CircleIf *node) { export_node(_ctx, node); }
+
void OperationExporter::visit(luci::CircleL2Normalize *node)
{
export_simple(
node, circle::BuiltinOperator_L2_NORMALIZATION, circle::BuiltinOptions_L2NormOptions,
- CreateL2NormOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+ CreateL2NormOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()))
+ .Union());
}
void OperationExporter::visit(luci::CircleL2Pool2D *node)
{
- export_pool_2d<luci::CircleL2Pool2D>(node, circle::BuiltinOperator_L2_POOL_2D);
+ export_pool_2d<luci::CircleL2Pool2D>(_ctx, node, circle::BuiltinOperator_L2_POOL_2D);
}
void OperationExporter::visit(luci::CircleLeakyRelu *node)
{
export_simple(node, circle::BuiltinOperator_LEAKY_RELU, circle::BuiltinOptions_LeakyReluOptions,
- CreateLeakyReluOptions(builder, node->alpha()).Union());
+ CreateLeakyReluOptions(_ctx.builder, node->alpha()).Union());
}
void OperationExporter::visit(luci::CircleLess *node)
{
export_simple(node, circle::BuiltinOperator_LESS, circle::BuiltinOptions_LessOptions,
- CreateLessOptions(builder).Union());
+ CreateLessOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleLessEqual *node)
{
export_simple(node, circle::BuiltinOperator_LESS_EQUAL, circle::BuiltinOptions_LessEqualOptions,
- CreateLessEqualOptions(builder).Union());
+ CreateLessEqualOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleLocalResponseNormalization *node)
{
export_simple(node, circle::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
circle::BuiltinOptions_LocalResponseNormalizationOptions,
- CreateLocalResponseNormalizationOptions(builder, node->radius(), node->bias(),
+ CreateLocalResponseNormalizationOptions(_ctx.builder, node->radius(), node->bias(),
node->alpha(), node->beta())
.Union());
}
void OperationExporter::visit(luci::CircleLogicalAnd *node)
{
export_simple(node, circle::BuiltinOperator_LOGICAL_AND, circle::BuiltinOptions_LogicalAndOptions,
- CreateLogicalAndOptions(builder).Union());
+ CreateLogicalAndOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleLogicalNot *node)
{
export_simple(node, circle::BuiltinOperator_LOGICAL_NOT, circle::BuiltinOptions_LogicalNotOptions,
- CreateLogicalNotOptions(builder).Union());
+ CreateLogicalNotOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleLogicalOr *node)
{
export_simple(node, circle::BuiltinOperator_LOGICAL_OR, circle::BuiltinOptions_LogicalOrOptions,
- CreateLogicalOrOptions(builder).Union());
+ CreateLogicalOrOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleLogistic *node)
void OperationExporter::visit(luci::CircleLogSoftmax *node)
{
export_simple(node, circle::BuiltinOperator_LOG_SOFTMAX, circle::BuiltinOptions_LogSoftmaxOptions,
- CreateLogSoftmaxOptions(builder).Union());
+ CreateLogSoftmaxOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleMatrixDiag *node)
{
export_simple(node, circle::BuiltinOperator_MATRIX_DIAG, circle::BuiltinOptions_MatrixDiagOptions,
- CreateMatrixDiagOptions(builder).Union());
+ CreateMatrixDiagOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleMatrixSetDiag *node)
{
export_simple(node, circle::BuiltinOperator_MATRIX_SET_DIAG,
circle::BuiltinOptions_MatrixSetDiagOptions,
- CreateMatrixSetDiagOptions(builder).Union());
+ CreateMatrixSetDiagOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleMaximum *node)
{
export_simple(node, circle::BuiltinOperator_MAXIMUM, circle::BuiltinOptions_MaximumMinimumOptions,
- CreateMaximumMinimumOptions(builder).Union());
+ CreateMaximumMinimumOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleMaxPool2D *node)
{
- export_pool_2d<luci::CircleMaxPool2D>(node, circle::BuiltinOperator_MAX_POOL_2D);
+ export_pool_2d<luci::CircleMaxPool2D>(_ctx, node, circle::BuiltinOperator_MAX_POOL_2D);
}
void OperationExporter::visit(luci::CircleMean *node)
{
export_simple(node, circle::BuiltinOperator_MEAN, circle::BuiltinOptions_ReducerOptions,
- CreateReducerOptions(builder, node->keep_dims()).Union());
+ CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
}
void OperationExporter::visit(luci::CircleMinimum *node)
{
export_simple(node, circle::BuiltinOperator_MINIMUM, circle::BuiltinOptions_MaximumMinimumOptions,
- CreateMaximumMinimumOptions(builder).Union());
+ CreateMaximumMinimumOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleMirrorPad *node)
{
- export_simple(node, circle::BuiltinOperator_MIRROR_PAD, circle::BuiltinOptions_MirrorPadOptions,
- CreateMirrorPadOptions(builder, to_circle_mirrorpadmode(node->mode())).Union());
+ export_simple(
+ node, circle::BuiltinOperator_MIRROR_PAD, circle::BuiltinOptions_MirrorPadOptions,
+ CreateMirrorPadOptions(_ctx.builder, to_circle_mirrorpadmode(node->mode())).Union());
}
void OperationExporter::visit(luci::CircleMul *node)
{
export_simple(
node, circle::BuiltinOperator_MUL, circle::BuiltinOptions_MulOptions,
- CreateMulOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+ CreateMulOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
}
void OperationExporter::visit(luci::CircleNeg *node)
{
export_simple(node, circle::BuiltinOperator_NEG, circle::BuiltinOptions_NegOptions,
- CreateNegOptions(builder).Union());
+ CreateNegOptions(_ctx.builder).Union());
}
-void OperationExporter::visit(luci::CircleNonMaxSuppressionV4 *node)
-{
- auto nms_outs = loco::succs(node);
- assert(nms_outs.size() == 2);
-
- uint32_t op_idx =
- md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V4, node->op_version());
- std::vector<int32_t> inputs_vec{
- get_tensor_index(node->boxes()), get_tensor_index(node->scores()),
- get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
- get_tensor_index(node->score_threshold()),
- };
- std::vector<int32_t> outputs_vec;
-
- for (uint32_t idx = 0; idx < nms_outs.size(); ++idx)
- {
- // store in order of index
- bool found = false;
- for (auto out : nms_outs)
- {
- auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(out);
- if (nms_out->index() == static_cast<int32_t>(idx))
- {
- outputs_vec.push_back(get_tensor_index(nms_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid NonMaxSuppressionV4 output");
- }
- }
+void OperationExporter::visit(luci::CircleNonMaxSuppressionV4 *node) { export_node(_ctx, node); }
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateNonMaxSuppressionV4Options(builder);
- auto op_offset =
- CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_NonMaxSuppressionV4Options, options.Union());
- gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleNonMaxSuppressionV5 *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleNotEqual *node)
{
export_simple(node, circle::BuiltinOperator_NOT_EQUAL, circle::BuiltinOptions_NotEqualOptions,
- CreateNotEqualOptions(builder).Union());
+ CreateNotEqualOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleOneHot *node)
{
export_simple(node, circle::BuiltinOperator_ONE_HOT, circle::BuiltinOptions_OneHotOptions,
- CreateOneHotOptions(builder, node->axis()).Union());
+ CreateOneHotOptions(_ctx.builder, node->axis()).Union());
}
void OperationExporter::visit(luci::CirclePack *node)
{
export_simple(node, circle::BuiltinOperator_PACK, circle::BuiltinOptions_PackOptions,
- CreatePackOptions(builder, node->values_count(), node->axis()).Union());
+ CreatePackOptions(_ctx.builder, node->values_count(), node->axis()).Union());
}
void OperationExporter::visit(luci::CirclePad *node)
{
export_simple(node, circle::BuiltinOperator_PAD, circle::BuiltinOptions_PadOptions,
- CreatePadOptions(builder).Union());
+ CreatePadOptions(_ctx.builder).Union());
+}
+
+void OperationExporter::visit(luci::CirclePadV2 *node)
+{
+ export_simple(node, circle::BuiltinOperator_PADV2, circle::BuiltinOptions_PadV2Options,
+ CreatePadV2Options(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CirclePow *node)
{
export_simple(node, circle::BuiltinOperator_POW, circle::BuiltinOptions_PowOptions,
- CreatePowOptions(builder).Union());
+ CreatePowOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CirclePRelu *node)
void OperationExporter::visit(luci::CircleRange *node)
{
export_simple(node, circle::BuiltinOperator_RANGE, circle::BuiltinOptions_RangeOptions,
- CreateRangeOptions(builder).Union());
+ CreateRangeOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleRank *node)
{
export_simple(node, circle::BuiltinOperator_RANK, circle::BuiltinOptions_RankOptions,
- CreateRankOptions(builder).Union());
+ CreateRankOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleReduceAny *node)
{
export_simple(node, circle::BuiltinOperator_REDUCE_ANY, circle::BuiltinOptions_ReducerOptions,
- CreateReducerOptions(builder, node->keep_dims()).Union());
+ CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
}
void OperationExporter::visit(luci::CircleReduceMax *node)
{
export_simple(node, circle::BuiltinOperator_REDUCE_MAX, circle::BuiltinOptions_ReducerOptions,
- CreateReducerOptions(builder, node->keep_dims()).Union());
+ CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
}
void OperationExporter::visit(luci::CircleReduceMin *node)
{
export_simple(node, circle::BuiltinOperator_REDUCE_MIN, circle::BuiltinOptions_ReducerOptions,
- CreateReducerOptions(builder, node->keep_dims()).Union());
+ CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
}
void OperationExporter::visit(luci::CircleReduceProd *node)
{
export_simple(node, circle::BuiltinOperator_REDUCE_PROD, circle::BuiltinOptions_ReducerOptions,
- CreateReducerOptions(builder, node->keep_dims()).Union());
+ CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
}
void OperationExporter::visit(luci::CircleRelu *node)
void OperationExporter::visit(luci::CircleReshape *node)
{
- auto new_shape = builder.CreateVector<int32_t>(
+ auto new_shape = _ctx.builder.CreateVector<int32_t>(
node->newShape()->rank(), [node](size_t i) { return node->newShape()->dim(i); });
export_simple(node, circle::BuiltinOperator_RESHAPE, circle::BuiltinOptions_ReshapeOptions,
- CreateReshapeOptions(builder, new_shape).Union());
+ CreateReshapeOptions(_ctx.builder, new_shape).Union());
}
void OperationExporter::visit(luci::CircleResizeBilinear *node)
{
export_simple(
node, circle::BuiltinOperator_RESIZE_BILINEAR, circle::BuiltinOptions_ResizeBilinearOptions,
- CreateResizeBilinearOptions(builder, node->align_corners(), node->half_pixel_centers())
+ CreateResizeBilinearOptions(_ctx.builder, node->align_corners(), node->half_pixel_centers())
.Union());
}
{
export_simple(node, circle::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
circle::BuiltinOptions_ResizeNearestNeighborOptions,
- CreateResizeNearestNeighborOptions(builder, node->align_corners()).Union());
+ CreateResizeNearestNeighborOptions(_ctx.builder, node->align_corners()).Union());
}
void OperationExporter::visit(luci::CircleReverseSequence *node)
{
export_simple(
node, circle::BuiltinOperator_REVERSE_SEQUENCE, circle::BuiltinOptions_ReverseSequenceOptions,
- CreateReverseSequenceOptions(builder, node->seq_axis(), node->batch_axis()).Union());
+ CreateReverseSequenceOptions(_ctx.builder, node->seq_axis(), node->batch_axis()).Union());
}
-void OperationExporter::visit(luci::CircleReverseV2 *node)
-{
- uint32_t op_idx =
- md.registerBuiltinOpcode(circle::BuiltinOperator_REVERSE_V2, node->op_version());
- std::vector<int32_t> inputs_vec{get_tensor_index(node->tensor()), get_tensor_index(node->axis())};
- std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateReverseV2Options(builder);
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_ReverseSequenceOptions, options.Union());
- gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleReverseV2 *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleRound *node)
{
void OperationExporter::visit(luci::CircleScatterNd *node)
{
export_simple(node, circle::BuiltinOperator_SCATTER_ND, circle::BuiltinOptions_ScatterNdOptions,
- CreateScatterNdOptions(builder).Union());
+ CreateScatterNdOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleSegmentSum *node)
{
export_simple(node, circle::BuiltinOperator_SEGMENT_SUM, circle::BuiltinOptions_SegmentSumOptions,
- CreateSegmentSumOptions(builder).Union());
+ CreateSegmentSumOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleSelect *node)
{
export_simple(node, circle::BuiltinOperator_SELECT, circle::BuiltinOptions_SelectOptions,
- CreateSelectOptions(builder).Union());
+ CreateSelectOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleSelectV2 *node)
{
export_simple(node, circle::BuiltinOperator_SELECT_V2, circle::BuiltinOptions_SelectV2Options,
- CreateSelectV2Options(builder).Union());
+ CreateSelectV2Options(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleShape *node)
{
export_simple(node, circle::BuiltinOperator_SHAPE, circle::BuiltinOptions_ShapeOptions,
- CreateShapeOptions(builder, to_circle_tensortype(node->out_type())).Union());
+ CreateShapeOptions(_ctx.builder, to_circle_tensortype(node->out_type())).Union());
}
void OperationExporter::visit(luci::CircleSin *node)
void OperationExporter::visit(luci::CircleSlice *node)
{
export_simple(node, circle::BuiltinOperator_SLICE, circle::BuiltinOptions_SliceOptions,
- CreateSliceOptions(builder).Union());
+ CreateSliceOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleSoftmax *node)
{
export_simple(node, circle::BuiltinOperator_SOFTMAX, circle::BuiltinOptions_SoftmaxOptions,
- CreateSoftmaxOptions(builder, node->beta()).Union());
+ CreateSoftmaxOptions(_ctx.builder, node->beta()).Union());
}
void OperationExporter::visit(luci::CircleSpaceToBatchND *node)
{
export_simple(node, circle::BuiltinOperator_SPACE_TO_BATCH_ND,
circle::BuiltinOptions_SpaceToBatchNDOptions,
- CreateSpaceToBatchNDOptions(builder).Union());
+ CreateSpaceToBatchNDOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleSpaceToDepth *node)
{
export_simple(node, circle::BuiltinOperator_SPACE_TO_DEPTH,
circle::BuiltinOptions_SpaceToDepthOptions,
- CreateSpaceToDepthOptions(builder, node->block_size()).Union());
+ CreateSpaceToDepthOptions(_ctx.builder, node->block_size()).Union());
}
void OperationExporter::visit(luci::CircleSparseToDense *node)
{
export_simple(node, circle::BuiltinOperator_SPARSE_TO_DENSE,
circle::BuiltinOptions_SparseToDenseOptions,
- CreateSparseToDenseOptions(builder, node->validate_indices()).Union());
-}
-
-void OperationExporter::visit(luci::CircleSplit *node)
-{
- auto split_outs = loco::succs(node);
- assert(int32_t(split_outs.size()) == node->num_split());
-
- uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT, node->op_version());
- // NOTE BuiltinOperator_SPLIT input is placed at second position
- std::vector<int32_t> inputs_vec{get_tensor_index(node->split_dim()),
- get_tensor_index(node->input())};
- std::vector<int32_t> outputs_vec;
-
- for (int32_t index = 0; index < node->num_split(); index++)
- {
- // store in order of index
- bool found = false;
- for (auto out : split_outs)
- {
- auto split_out = loco::must_cast<luci::CircleSplitOut *>(out);
- if (split_out->index() == index)
- {
- outputs_vec.push_back(get_tensor_index(split_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid Split output");
- }
- }
-
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateSplitOptions(builder, node->num_split());
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_SplitOptions, options.Union());
- gd._operators.push_back(op_offset);
+ CreateSparseToDenseOptions(_ctx.builder, node->validate_indices()).Union());
}
-void OperationExporter::visit(luci::CircleSplitV *node)
-{
- auto split_outs = loco::succs(node);
- assert(int32_t(split_outs.size()) == node->num_split());
-
- uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT_V, node->op_version());
- std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
- get_tensor_index(node->size_splits()),
- get_tensor_index(node->split_dim())};
- std::vector<int32_t> outputs_vec;
-
- for (int32_t index = 0; index < node->num_split(); index++)
- {
- // store in order of index
- bool found = false;
- for (auto out : split_outs)
- {
- auto split_out = loco::must_cast<luci::CircleSplitVOut *>(out);
- if (split_out->index() == index)
- {
- outputs_vec.push_back(get_tensor_index(split_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid SplitV output");
- }
- }
+void OperationExporter::visit(luci::CircleSplit *node) { export_node(_ctx, node); }
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateSplitVOptions(builder, node->num_split());
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_SplitVOptions, options.Union());
- gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleSplitV *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleSqrt *node)
{
void OperationExporter::visit(luci::CircleSquare *node)
{
export_simple(node, circle::BuiltinOperator_SQUARE, circle::BuiltinOptions_SquareOptions,
- CreateSquareOptions(builder).Union());
+ CreateSquareOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleSquaredDifference *node)
{
export_simple(node, circle::BuiltinOperator_SQUARED_DIFFERENCE,
circle::BuiltinOptions_SquaredDifferenceOptions,
- CreateSquaredDifferenceOptions(builder).Union());
+ CreateSquaredDifferenceOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleSqueeze *node)
{
- auto squeeze_dims = builder.CreateVector<int32_t>(node->squeeze_dims());
+ auto squeeze_dims = _ctx.builder.CreateVector<int32_t>(node->squeeze_dims());
export_simple(node, circle::BuiltinOperator_SQUEEZE, circle::BuiltinOptions_SqueezeOptions,
- CreateSqueezeOptions(builder, squeeze_dims).Union());
+ CreateSqueezeOptions(_ctx.builder, squeeze_dims).Union());
}
void OperationExporter::visit(luci::CircleStridedSlice *node)
{
export_simple(node, circle::BuiltinOperator_STRIDED_SLICE,
circle::BuiltinOptions_StridedSliceOptions,
- CreateStridedSliceOptions(builder, node->begin_mask(), node->end_mask(),
+ CreateStridedSliceOptions(_ctx.builder, node->begin_mask(), node->end_mask(),
node->ellipsis_mask(), node->new_axis_mask(),
node->shrink_axis_mask())
.Union());
{
export_simple(
node, circle::BuiltinOperator_SUB, circle::BuiltinOptions_SubOptions,
- CreateSubOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+ CreateSubOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
}
void OperationExporter::visit(luci::CircleSum *node)
{
export_simple(node, circle::BuiltinOperator_SUM, circle::BuiltinOptions_ReducerOptions,
- CreateReducerOptions(builder, node->keep_dims()).Union());
+ CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
}
void OperationExporter::visit(luci::CircleTanh *node)
void OperationExporter::visit(luci::CircleTile *node)
{
export_simple(node, circle::BuiltinOperator_TILE, circle::BuiltinOptions_TileOptions,
- CreateTileOptions(builder).Union());
+ CreateTileOptions(_ctx.builder).Union());
}
-void OperationExporter::visit(luci::CircleTopKV2 *node)
-{
- auto topkv2_outs = loco::succs(node);
- int outs_count = int32_t(topkv2_outs.size());
- assert(outs_count == 2);
-
- uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_TOPK_V2, node->op_version());
- std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->k())};
- std::vector<int32_t> outputs_vec;
-
- for (int32_t index = 0; index < outs_count; index++)
- {
- // store in order of index
- bool found = false;
- for (auto out : topkv2_outs)
- {
- auto topkv2_out = loco::must_cast<luci::CircleTopKV2Out *>(out);
- if (topkv2_out->index() == index)
- {
- outputs_vec.push_back(get_tensor_index(topkv2_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid TopKV2 output");
- }
- }
-
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateTopKV2Options(builder);
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_TopKV2Options, options.Union());
- gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleTopKV2 *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleTranspose *node)
{
export_simple(node, circle::BuiltinOperator_TRANSPOSE, circle::BuiltinOptions_TransposeOptions,
- CreateTransposeOptions(builder).Union());
+ CreateTransposeOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleTransposeConv *node)
{
export_simple(node, circle::BuiltinOperator_TRANSPOSE_CONV,
circle::BuiltinOptions_TransposeConvOptions,
- CreateTransposeConvOptions(builder, getOpPadding(node->padding()),
+ CreateTransposeConvOptions(_ctx.builder, getOpPadding(node->padding()),
node->stride()->w(), node->stride()->h())
.Union());
}
-void OperationExporter::visit(luci::CircleUnique *node)
-{
- auto unique_outs = loco::succs(node);
- assert(int32_t(unique_outs.size()) == 2);
- uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_UNIQUE, node->op_version());
-
- std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
- std::vector<int32_t> outputs_vec;
-
- for (int32_t index = 0; index < 2; index++)
- {
- // store in order of index
- bool found = false;
- for (auto out : unique_outs)
- {
- auto unique_out = loco::must_cast<luci::CircleUniqueOut *>(out);
- if (unique_out->index() == index)
- {
- outputs_vec.push_back(get_tensor_index(unique_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid Unique output");
- }
- }
-
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateUniqueOptions(builder, to_circle_tensortype(node->idx_out_type()));
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_UniqueOptions, options.Union());
- gd._operators.push_back(op_offset);
-}
-
-void OperationExporter::visit(luci::CircleUnpack *node)
-{
- LOGGER(l);
- auto settings = luci::UserSettings::settings();
-
- auto unpack_outs = loco::succs(node);
- // NOTE real models may not use all of the outputs
- if (static_cast<int32_t>(unpack_outs.size()) != node->num())
- {
- if (settings->get(luci::UserSettings::Key::DisableValidation))
- {
- WARN(l) << "Warning: export Unpack(" << node->name() << ") 'num' not same as outputs";
- }
- else
- assert(false);
- }
-
- uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_UNPACK, node->op_version());
- std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
- std::vector<int32_t> outputs_vec;
-
- for (int32_t index = 0; index < node->num(); index++)
- {
- // store in order of index
- bool found = false;
- for (auto out : unpack_outs)
- {
- auto unpack_out = loco::must_cast<luci::CircleUnpackOut *>(out);
- if (unpack_out->index() == index)
- {
- outputs_vec.push_back(get_tensor_index(unpack_out));
- found = true;
- break;
- }
- }
- // NOTE real models may not use all of the outputs
- if (!found)
- {
- if (settings->get(luci::UserSettings::Key::DisableValidation))
- {
- WARN(l) << "Warning: export Unpack(" << node->name() << ") output " << index << " not used";
- }
- else
- assert(false);
- }
- }
+void OperationExporter::visit(luci::CircleUnique *node) { export_node(_ctx, node); }
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateUnpackOptions(builder, node->num(), node->axis());
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_UnpackOptions, options.Union());
- gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleUnpack *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleWhere *node)
{
export_simple(node, circle::BuiltinOperator_WHERE, circle::BuiltinOptions_WhereOptions,
- CreateWhereOptions(builder).Union());
+ CreateWhereOptions(_ctx.builder).Union());
}
-void OperationExporter::visit(luci::CircleWhile *node)
-{
- auto while_outs = loco::succs(node);
- assert(while_outs.size() == node->output_count());
-
- uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_WHILE, node->op_version());
- std::vector<int32_t> inputs_vec;
- std::vector<int32_t> outputs_vec;
-
- for (uint32_t idx = 0; idx < node->input_count(); ++idx)
- inputs_vec.push_back(get_tensor_index(node->input(idx)));
-
- for (uint32_t idx = 0; idx < node->output_count(); ++idx)
- {
- // store in order of index
- bool found = false;
- for (auto out : while_outs)
- {
- auto while_out = loco::must_cast<luci::CircleWhileOut *>(out);
- if (while_out->index() == static_cast<int32_t>(idx))
- {
- outputs_vec.push_back(get_tensor_index(while_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid CircleWhile output");
- }
- }
-
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateWhileOptions(builder, node->cond_branch(), node->body_branch());
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_WhileOptions, options.Union());
- gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleWhile *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleZerosLike *node)
{
export_simple(node, circle::BuiltinOperator_ZEROS_LIKE, circle::BuiltinOptions_ZerosLikeOptions,
- CreateZerosLikeOptions(builder).Union());
+ CreateZerosLikeOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleBCQFullyConnected *node)
{
export_simple(node, circle::BuiltinOperator_BCQ_FULLY_CONNECTED,
circle::BuiltinOptions_BCQFullyConnectedOptions,
- CreateBCQFullyConnectedOptions(builder, node->weights_hidden_size(),
+ CreateBCQFullyConnectedOptions(_ctx.builder, node->weights_hidden_size(),
to_circle_actfunc(node->fusedActivationFunction()))
.Union());
}
void OperationExporter::visit(luci::CircleBCQGather *node)
{
- export_simple(node, circle::BuiltinOperator_BCQ_GATHER, circle::BuiltinOptions_BCQGatherOptions,
- CreateBCQGatherOptions(builder, node->input_hidden_size(), node->axis()).Union());
+ export_simple(
+ node, circle::BuiltinOperator_BCQ_GATHER, circle::BuiltinOptions_BCQGatherOptions,
+ CreateBCQGatherOptions(_ctx.builder, node->input_hidden_size(), node->axis()).Union());
}
void OperationExporter::visit(luci::CircleInstanceNorm *node)
{
export_simple(node, circle::BuiltinOperator_INSTANCE_NORM,
circle::BuiltinOptions_InstanceNormOptions,
- CreateInstanceNormOptions(builder, node->epsilon(),
+ CreateInstanceNormOptions(_ctx.builder, node->epsilon(),
to_circle_actfunc(node->fusedActivationFunction()))
.Union());
}
{
if (auto circle_node = dynamic_cast<luci::CircleNode *>(node))
{
- OperationExporter exporter{builder, md, gd};
+ ExportContext ctx{builder, md, gd};
+ OperationExporter exporter{ctx};
circle_node->accept(&exporter);
}
else
#include "Nodes/CircleMul.h"
#include "Nodes/CircleNeg.h"
#include "Nodes/CircleNonMaxSuppressionV4.h"
+#include "Nodes/CircleNonMaxSuppressionV5.h"
#include "Nodes/CircleNotEqual.h"
#include "Nodes/CircleOneHot.h"
#include "Nodes/CirclePack.h"
#include "Nodes/CirclePad.h"
+#include "Nodes/CirclePadV2.h"
#include "Nodes/CirclePow.h"
#include "Nodes/CirclePRelu.h"
#include "Nodes/CircleRange.h"
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
+#define __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
+
+#include "luci/Import/GraphBuilderBase.h"
+
+namespace luci
+{
+
+class CircleNonMaxSuppressionV5GraphBuilder : public GraphBuilderBase
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+ void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_PADV2_H__
+#define __LUCI_IMPORT_OP_CIRCLE_PADV2_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CirclePadV2GraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_PADV2_H__
CIRCLE_NODE(MUL, CircleMulGraphBuilder); // 18
CIRCLE_NODE(NEG, CircleNegGraphBuilder); // 59
CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, CircleNonMaxSuppressionV4GraphBuilder); // 120,
+ CIRCLE_NODE(NON_MAX_SUPPRESSION_V5, CircleNonMaxSuppressionV5GraphBuilder); // 121,
CIRCLE_NODE(NOT_EQUAL, CircleNotEqualGraphBuilder); // 72
CIRCLE_NODE(ONE_HOT, CircleOneHotGraphBuilder); // 85
CIRCLE_NODE(PACK, CirclePackGraphBuilder); // 83
CIRCLE_NODE(PAD, CirclePadGraphBuilder); // 34
+ CIRCLE_NODE(PADV2, CirclePadV2GraphBuilder); // 60
CIRCLE_NODE(POW, CirclePowGraphBuilder); // 78
CIRCLE_NODE(PRELU, CirclePReluGraphBuilder); // 54,
CIRCLE_NODE(RANGE, CircleRangeGraphBuilder); // 96
// BuiltinOperator_DELEGATE = 51,
// BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52,
// BuiltinOperator_ARG_MAX = 56,
- // BuiltinOperator_PADV2 = 60,
// BuiltinOperator_FAKE_QUANT = 80,
// BuiltinOperator_QUANTIZE = 114,
// BuiltinOperator_HARD_SWISH = 117,
- // BuiltinOperator_NON_MAX_SUPPRESSION_V5 = 121,
// BuiltinOperator_DENSIFY = 124,
}
#include <luci/IR/Nodes/CircleBatchToSpaceND.h>
-#include <loco.h>
+#include "ValidateHelpers.h"
-#include <cassert>
+#include <loco.h>
namespace luci
{
bool CircleBatchToSpaceNDGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- if (inputs.size() != 3)
- return false;
-
- // input 1 and 2 should have INT32/INT64 type
- const auto &tensors = args.reader.tensors();
- const auto &tensor_1 = tensors.at(inputs.at(1));
- switch (tensor_1->type)
- {
- case circle::TensorType_INT32:
- case circle::TensorType_INT64:
- break;
- default:
- return false;
- }
- const auto &tensor_2 = tensors.at(inputs.at(2));
- switch (tensor_2->type)
- {
- case circle::TensorType_INT32:
- case circle::TensorType_INT64:
- break;
- default:
- return false;
- }
-
- // Only support input shape dimension 3 and 4 only
- const auto &tensor_0 = tensors.at(inputs.at(0));
- const auto t_0_s = tensor_0->shape.size();
- if (t_0_s != 3 && t_0_s != 4)
- return false;
-
- // TODO check input shape
-
- return true;
+ return validate_batch_space_nd(args);
}
CircleNode *CircleBatchToSpaceNDGraphBuilder::build_node(const circle::OperatorT &,
copy_data<loco::DataType::U8>(buffer, num_elements, const_node);
break;
+ case loco::DataType::S8:
+ copy_data<loco::DataType::S8>(buffer, num_elements, const_node);
+ break;
+
case loco::DataType::S16:
copy_data<loco::DataType::S16>(buffer, num_elements, const_node);
break;
#include <luci/IR/Nodes/CircleMaximum.h>
+#include "ValidateHelpers.h"
+
#include <loco.h>
namespace luci
bool CircleMaximumGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 2)
- return false;
-
- if (outputs.size() != 1)
- return false;
-
- const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs.at(0));
-
- switch (tensor->type)
- {
- case circle::TensorType_FLOAT16:
- case circle::TensorType_FLOAT32:
- case circle::TensorType_FLOAT64:
- case circle::TensorType_INT32:
- case circle::TensorType_INT64:
- break;
- default:
- return false;
- }
-
- if (tensors[inputs.at(1)]->type != tensor->type)
- return false;
-
- if (tensors[outputs[0]]->type != tensor->type)
- return false;
-
- return true;
+ return validate_minmax(args);
}
CircleNode *CircleMaximumGraphBuilder::build_node(const circle::OperatorT &,
#include <luci/IR/Nodes/CircleMinimum.h>
+#include "ValidateHelpers.h"
+
#include <loco.h>
namespace luci
bool CircleMinimumGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 2)
- return false;
-
- if (outputs.size() != 1)
- return false;
-
- const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs.at(0));
-
- switch (tensor->type)
- {
- case circle::TensorType_FLOAT16:
- case circle::TensorType_FLOAT32:
- case circle::TensorType_FLOAT64:
- case circle::TensorType_INT32:
- case circle::TensorType_INT64:
- break;
- default:
- return false;
- }
-
- if (tensors[inputs.at(1)]->type != tensor->type)
- return false;
-
- if (tensors[outputs[0]]->type != tensor->type)
- return false;
-
- return true;
+ return validate_minmax(args);
}
CircleNode *CircleMinimumGraphBuilder::build_node(const circle::OperatorT &,
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleNonMaxSuppressionV5.h"
+
+#include <luci/IR/Nodes/CircleNonMaxSuppressionV5.h>
+#include <luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h>
+
+#include <loco.h>
+#include <oops/UserExn.h>
+
+namespace luci
+{
+
+bool CircleNonMaxSuppressionV5GraphBuilder::validate(const ValidateArgs &args) const
+{
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
+
+ if (inputs.size() != 6)
+ return false;
+ if (outputs.size() != 3)
+ return false;
+
+ const auto &tensors = args.reader.tensors();
+ const auto &boxes_tensor = tensors.at(inputs[0]);
+ if (boxes_tensor->shape.size() != 2)
+ return false;
+ if (boxes_tensor->shape.at(1) != 4)
+ return false;
+ if (boxes_tensor->shape.at(0) != tensors.at(inputs[1])->shape.at(0))
+ return false;
+
+ if (tensors.at(inputs[2])->type != circle::TensorType_INT32)
+ return false;
+ if (tensors.at(inputs[3])->type != circle::TensorType_FLOAT32)
+ return false;
+ if (tensors.at(inputs[4])->type != circle::TensorType_FLOAT32)
+ return false;
+ if (tensors.at(inputs[5])->type != circle::TensorType_FLOAT32)
+ return false;
+
+ return true;
+}
+
+/**
+ * @brief NonMaxSuppressionV5 Node builder
+ *
+ * @note Current loco does not provide multiple outputs
+ * We will create multiple NonMasSuppressionV5Oout nodes to emulate this
+ */
+
+void CircleNonMaxSuppressionV5GraphBuilder::build(const circle::OperatorT &op,
+ GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ auto graph = context->graph();
+
+ const std::vector<int32_t> &inputs = op.inputs;
+ const std::vector<int32_t> &outputs = op.outputs;
+ const auto &tensors = context->reader()->tensors();
+ const auto &opcodes = context->reader()->opcodes();
+ auto tensors_ptr = context->reader()->tensors_ptr();
+ assert(tensors_ptr != nullptr);
+
+ std::vector<CircleNode *> input_nodes;
+ for (const int32_t input_tensor_index : inputs)
+ {
+ input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
+ }
+
+ // Create CircleNonMaxSuppressionV5
+ auto node = graph->nodes()->create<CircleNonMaxSuppressionV5>();
+ node->boxes(input_nodes[0]);
+ node->scores(input_nodes[1]);
+ node->max_output_size(input_nodes[2]);
+ node->iou_threshold(input_nodes[3]);
+ node->score_threshold(input_nodes[4]);
+ node->soft_nms_sigma(input_nodes[5]);
+
+ assert(outputs.size() == 3);
+ {
+ // Let's use name of output 0 as NonMaxSuppressionV5 name
+ const circle::TensorT &output_tensor = *tensors[outputs[0]];
+ node->name(tensor_name(output_tensor));
+ node->op_version(opcodes[op.opcode_index].get()->version);
+
+ // NOTE We don't set quantization for NonMaxSuppressionV5 itself but to virtual outputs
+ }
+
+ // Create virtual outputs of NonMaxSuppressionV5
+ for (size_t n = 0; n < outputs.size(); ++n)
+ {
+ const circle::TensorT &output_tensor = *tensors[outputs[n]];
+
+ auto *nodeout = graph->nodes()->create<CircleNonMaxSuppressionV5Out>();
+ copy_tensor_attributes(output_tensor, nodeout);
+
+ // mark shape_status
+ if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
+ nodeout->shape_status(ShapeStatus::NOSHAPE);
+ else
+ nodeout->shape_status(ShapeStatus::VALID);
+
+ nodeout->input(node);
+ nodeout->index(n);
+
+ context->nodefinder()->enroll(outputs[n], nodeout);
+ }
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CirclePadV2.h"
+
+#include <luci/IR/Nodes/CirclePadV2.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CirclePadV2GraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 3)
+ return false;
+
+ if (args.op.outputs.size() != 1)
+ return false;
+
+ return true;
+}
+
+CircleNode *CirclePadV2GraphBuilder::build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CirclePadV2>();
+ node->input(inputs[0]);
+ node->paddings(inputs[1]);
+ node->constant_values(inputs[2]);
+
+ const auto *options = op.builtin_options.AsPadV2Options();
+ (void)options; // There are no options.
+
+ return node;
+}
+
+} // namespace luci
#include <luci/IR/Nodes/CircleReduceMax.h>
+#include "ValidateHelpers.h"
+
namespace luci
{
bool CircleReduceMaxGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 2)
- return false;
-
- if (outputs.size() != 1)
- return false;
-
- const auto &tensors = args.reader.tensors();
- const auto &tensor_axis = tensors.at(inputs.at(1));
-
- switch (tensor_axis->type)
- {
- case circle::TensorType_INT32:
- case circle::TensorType_INT64:
- break;
- default:
- return false;
- }
-
- return true;
+ return validate_reduce_minmax(args);
}
CircleNode *CircleReduceMaxGraphBuilder::build_node(const circle::OperatorT &op,
#include <luci/IR/Nodes/CircleReduceMin.h>
+#include "ValidateHelpers.h"
+
namespace luci
{
bool CircleReduceMinGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 2)
- return false;
-
- if (outputs.size() != 1)
- return false;
-
- const auto &tensors = args.reader.tensors();
- const auto &tensor_axis = tensors.at(inputs.at(1));
-
- switch (tensor_axis->type)
- {
- case circle::TensorType_INT32:
- case circle::TensorType_INT64:
- break;
- default:
- return false;
- }
-
- return true;
+ return validate_reduce_minmax(args);
}
CircleNode *CircleReduceMinGraphBuilder::build_node(const circle::OperatorT &op,
#include <luci/IR/Nodes/CircleSpaceToBatchND.h>
-#include <loco.h>
+#include "ValidateHelpers.h"
-#include <cassert>
+#include <loco.h>
namespace luci
{
bool CircleSpaceToBatchNDGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- if (inputs.size() != 3)
- return false;
-
- // input 1 and 2 should have INT32/INT64 type
- const auto &tensors = args.reader.tensors();
- const auto &tensor_1 = tensors.at(inputs.at(1));
- switch (tensor_1->type)
- {
- case circle::TensorType_INT32:
- case circle::TensorType_INT64:
- break;
- default:
- return false;
- }
- const auto &tensor_2 = tensors.at(inputs.at(2));
- switch (tensor_2->type)
- {
- case circle::TensorType_INT32:
- case circle::TensorType_INT64:
- break;
- default:
- return false;
- }
-
- // Only support input shape dimension 3 and 4 only
- const auto &tensor_0 = tensors.at(inputs.at(0));
- const auto t_0_s = tensor_0->shape.size();
- if (t_0_s != 3 && t_0_s != 4)
- return false;
-
- // TODO check input shape
-
- return true;
+ return validate_batch_space_nd(args);
}
CircleNode *CircleSpaceToBatchNDGraphBuilder::build_node(const circle::OperatorT &,
node->default_value(inputs.at(3));
const auto *options = op.builtin_options.AsSparseToDenseOptions();
- node->validate_indices(options->validate_indices);
+ if (options)
+ node->validate_indices(options->validate_indices);
return node;
}
bool CircleTransposeConvGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 3)
+ if (args.op.inputs.size() != 3 && args.op.inputs.size() != 4)
return false;
const auto &inputs = args.op.inputs;
node->inputSizes(inputs.at(0));
node->filter(inputs.at(1));
node->outBackprop(inputs.at(2));
+ if (inputs.size() == 3)
+ node->bias(graph->nodes()->create<CircleOutputExclude>());
+ else
+ node->bias(inputs.at(3));
+
+ if (auto bias = dynamic_cast<luci::CircleOutputExclude *>(node->bias()))
+ {
+ // CircleOutputExclude doesn't need a type, but since all nodes must have a type, a dummy type
+ // is inserted.
+ bias->dtype(loco::DataType::FLOAT32);
+ }
const auto *options = op.builtin_options.AsTransposeConvOptions();
node->padding(luci_padding(options->padding));
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ValidateHelpers.h"
+
+namespace luci
+{
+
+bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args)
+{
+ const auto &inputs = args.op.inputs;
+ if (inputs.size() != 3)
+ return false;
+
+ // input 1 and 2 should have INT32/INT64 type
+ const auto &tensors = args.reader.tensors();
+ const auto &tensor_1 = tensors.at(inputs.at(1));
+ switch (tensor_1->type)
+ {
+ case circle::TensorType_INT32:
+ case circle::TensorType_INT64:
+ break;
+ default:
+ return false;
+ }
+ const auto &tensor_2 = tensors.at(inputs.at(2));
+ switch (tensor_2->type)
+ {
+ case circle::TensorType_INT32:
+ case circle::TensorType_INT64:
+ break;
+ default:
+ return false;
+ }
+
+ // Only support input shape dimension 3 and 4 only
+ const auto &tensor_0 = tensors.at(inputs.at(0));
+ const auto t_0_s = tensor_0->shape.size();
+ if (t_0_s != 3 && t_0_s != 4)
+ return false;
+
+ // TODO check input shape
+
+ return true;
+}
+
+bool validate_minmax(const GraphBuilderBase::ValidateArgs &args)
+{
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
+
+ if (inputs.size() != 2)
+ return false;
+
+ if (outputs.size() != 1)
+ return false;
+
+ const auto &tensors = args.reader.tensors();
+ const auto &tensor = tensors.at(inputs.at(0));
+
+ switch (tensor->type)
+ {
+ case circle::TensorType_FLOAT16:
+ case circle::TensorType_FLOAT32:
+ case circle::TensorType_FLOAT64:
+ case circle::TensorType_INT32:
+ case circle::TensorType_INT64:
+ break;
+ default:
+ return false;
+ }
+
+ if (tensors[inputs.at(1)]->type != tensor->type)
+ return false;
+
+ if (tensors[outputs[0]]->type != tensor->type)
+ return false;
+
+ return true;
+}
+
+bool validate_reduce_minmax(const GraphBuilderBase::ValidateArgs &args)
+{
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
+
+ if (inputs.size() != 2)
+ return false;
+
+ if (outputs.size() != 1)
+ return false;
+
+ const auto &tensors = args.reader.tensors();
+ const auto &tensor_axis = tensors.at(inputs.at(1));
+
+ switch (tensor_axis->type)
+ {
+ case circle::TensorType_INT32:
+ case circle::TensorType_INT64:
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_VALIDATE_HELPERS_H__
+#define __LUCI_VALIDATE_HELPERS_H__
+
+#include "luci/Import/GraphBuilderBase.h"
+
+/**
+ * @Note Methods in this file provides helper functions to reduce duplicate codes
+ */
+
+namespace luci
+{
+
+bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args);
+bool validate_minmax(const GraphBuilderBase::ValidateArgs &args);
+bool validate_reduce_minmax(const GraphBuilderBase::ValidateArgs &args);
+
+} // namespace luci
+
+#endif // __LUCI_VALIDATE_HELPERS_H__
#include "Nodes/CircleMul.h"
#include "Nodes/CircleNeg.h"
#include "Nodes/CircleNonMaxSuppressionV4.h"
+#include "Nodes/CircleNonMaxSuppressionV5.h"
#include "Nodes/CircleNotEqual.h"
#include "Nodes/CircleOneHot.h"
#include "Nodes/CirclePack.h"
#include "Nodes/CircleCustomOut.h"
#include "Nodes/CircleIfOut.h"
#include "Nodes/CircleNonMaxSuppressionV4Out.h"
+#include "Nodes/CircleNonMaxSuppressionV5Out.h"
#include "Nodes/CircleUnpackOut.h"
#include "Nodes/CircleUniqueOut.h"
#include "Nodes/CircleSplitOut.h"
CIRCLE_NODE(MUL, luci::CircleMul)
CIRCLE_NODE(NEG, luci::CircleNeg)
CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, luci::CircleNonMaxSuppressionV4)
+CIRCLE_NODE(NON_MAX_SUPPRESSION_V5, luci::CircleNonMaxSuppressionV5)
CIRCLE_NODE(NOT_EQUAL, luci::CircleNotEqual)
CIRCLE_NODE(ONE_HOT, luci::CircleOneHot)
CIRCLE_NODE(PACK, luci::CirclePack)
CIRCLE_NODE(CIRCLECUSTOMOUT, luci::CircleCustomOut)
CIRCLE_NODE(CIRCLEIFOUT, luci::CircleIfOut)
CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV4OUT, luci::CircleNonMaxSuppressionV4Out)
+CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV5OUT, luci::CircleNonMaxSuppressionV5Out)
CIRCLE_NODE(CIRCLESPLITOUT, luci::CircleSplitOut)
CIRCLE_NODE(CIRCLESPLITVOUT, luci::CircleSplitVOut)
CIRCLE_NODE(CIRCLETOPKV2OUT, luci::CircleTopKV2Out)
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
+#define __LUCI_IR_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief NON_MAX_SUPPRESSION_V5 in Circle
+ */
+class CircleNonMaxSuppressionV5 final
+ : public FixedArityNode<6, CircleNodeImpl<CircleOpcode::NON_MAX_SUPPRESSION_V5>>
+{
+public:
+ loco::Node *boxes(void) const { return at(0)->node(); }
+ void boxes(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *scores(void) const { return at(1)->node(); }
+ void scores(loco::Node *node) { at(1)->node(node); }
+
+ loco::Node *max_output_size(void) const { return at(2)->node(); }
+ void max_output_size(loco::Node *node) { at(2)->node(node); }
+
+ loco::Node *iou_threshold(void) const { return at(3)->node(); }
+ void iou_threshold(loco::Node *node) { at(3)->node(node); }
+
+ loco::Node *score_threshold(void) const { return at(4)->node(); }
+ void score_threshold(loco::Node *node) { at(4)->node(node); }
+
+ loco::Node *soft_nms_sigma(void) const { return at(5)->node(); }
+ void soft_nms_sigma(loco::Node *node) { at(5)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_NONMAXSUPPRESSIONV5OUT_H__
+#define __LUCI_IR_CIRCLE_NONMAXSUPPRESSIONV5OUT_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief Virtual NONMAXSUPPRESSIONV5OUT in Circle
+ */
+class CircleNonMaxSuppressionV5Out final
+ : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT>>
+{
+public:
+ CircleNonMaxSuppressionV5Out() = default;
+
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+public:
+ int32_t index(void) const { return _index; }
+ void index(int32_t index) { _index = index; }
+
+private:
+ int32_t _index{-1};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_NONMAXSUPPRESSIONV5OUT_H__
void validate_indices(bool validate_indices) { _validate_indices = validate_indices; }
private:
- bool _validate_indices{true};
+ bool _validate_indices{false};
};
} // namespace luci
* 'out' acutally means 'out' and 'in' of the this node.
*/
class CircleTransposeConv final
- : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::TRANSPOSE_CONV>>
+ : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::TRANSPOSE_CONV>>,
+ public LuciNodeMixin<LuciNodeTrait::Bias>
{
public:
loco::Node *inputSizes(void) const { return at(0)->node(); }
loco::Node *outBackprop(void) const { return at(2)->node(); }
void outBackprop(Node *node) { at(2)->node(node); }
+ /**
+ * @note "bias" is optional. When this node has no conceptual bias, "bias()"
+ * expected to be `luci::CircleOutputExclude` type.
+ *
+ * <Comment on tflite TRANSPOSE_CONV>
+ *
+ * (Circle node has no dependency on tflite, but just for information on converting)
+ * Before TF v2.3.0, tflite TRANSPOSE_CONV didn't support fused bias as argument.
+ * From TF v2.3.0, tflite TRANSPOSE_CONV supports bias as optional 4th argument.
+ *
+ * Ref: https://github.com/tensorflow/tensorflow/commit/43b8f6e710
+ */
+ loco::Node *bias(void) const override { return at(3)->node(); }
+ void bias(loco::Node *node) override { at(3)->node(node); }
+
public:
const Padding &padding(void) const { return _padding; }
void padding(const Padding &padding) { _padding = padding; }
INSTANTIATE(loco::DataType::S64);
INSTANTIATE(loco::DataType::S32);
INSTANTIATE(loco::DataType::S16);
+INSTANTIATE(loco::DataType::S8);
INSTANTIATE(loco::DataType::FLOAT32);
INSTANTIATE(loco::DataType::U8);
INSTANTIATE(loco::DataType::BOOL);
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleNonMaxSuppressionV5.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleNonMaxSuppressionV5Test, constructor)
+{
+ luci::CircleNonMaxSuppressionV5 nmsv5_node;
+
+ ASSERT_EQ(luci::CircleDialect::get(), nmsv5_node.dialect());
+ ASSERT_EQ(luci::CircleOpcode::NON_MAX_SUPPRESSION_V5, nmsv5_node.opcode());
+
+ ASSERT_EQ(nullptr, nmsv5_node.boxes());
+ ASSERT_EQ(nullptr, nmsv5_node.scores());
+ ASSERT_EQ(nullptr, nmsv5_node.max_output_size());
+ ASSERT_EQ(nullptr, nmsv5_node.iou_threshold());
+ ASSERT_EQ(nullptr, nmsv5_node.score_threshold());
+ ASSERT_EQ(nullptr, nmsv5_node.soft_nms_sigma());
+}
+
+TEST(CircleNonMaxSuppressionV5Test, input_NEG)
+{
+ luci::CircleNonMaxSuppressionV5 nmsv5_node;
+ luci::CircleNonMaxSuppressionV5 node;
+
+ nmsv5_node.boxes(&node);
+ nmsv5_node.scores(&node);
+ nmsv5_node.max_output_size(&node);
+ nmsv5_node.iou_threshold(&node);
+ nmsv5_node.score_threshold(&node);
+ nmsv5_node.soft_nms_sigma(&node);
+ ASSERT_NE(nullptr, nmsv5_node.boxes());
+ ASSERT_NE(nullptr, nmsv5_node.scores());
+ ASSERT_NE(nullptr, nmsv5_node.max_output_size());
+ ASSERT_NE(nullptr, nmsv5_node.iou_threshold());
+ ASSERT_NE(nullptr, nmsv5_node.score_threshold());
+ ASSERT_NE(nullptr, nmsv5_node.soft_nms_sigma());
+
+ nmsv5_node.boxes(nullptr);
+ nmsv5_node.scores(nullptr);
+ nmsv5_node.max_output_size(nullptr);
+ nmsv5_node.iou_threshold(nullptr);
+ nmsv5_node.score_threshold(nullptr);
+ nmsv5_node.soft_nms_sigma(nullptr);
+ ASSERT_EQ(nullptr, nmsv5_node.boxes());
+ ASSERT_EQ(nullptr, nmsv5_node.scores());
+ ASSERT_EQ(nullptr, nmsv5_node.max_output_size());
+ ASSERT_EQ(nullptr, nmsv5_node.iou_threshold());
+ ASSERT_EQ(nullptr, nmsv5_node.score_threshold());
+ ASSERT_EQ(nullptr, nmsv5_node.soft_nms_sigma());
+}
+
+TEST(CircleNonMaxSuppressionV5Test, arity_NEG)
+{
+ luci::CircleNonMaxSuppressionV5 nmsv5_node;
+
+ ASSERT_NO_THROW(nmsv5_node.arg(5));
+ ASSERT_THROW(nmsv5_node.arg(6), std::out_of_range);
+}
+
+TEST(CircleNonMaxSuppressionV5Test, visit_mutable_NEG)
+{
+ struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+ {
+ };
+
+ luci::CircleNonMaxSuppressionV5 nmsv5_node;
+
+ TestVisitor tv;
+ ASSERT_THROW(nmsv5_node.accept(&tv), std::exception);
+}
+
+TEST(CircleNonMaxSuppressionV5Test, visit_NEG)
+{
+ struct TestVisitor final : public luci::CircleNodeVisitor<void>
+ {
+ };
+
+ luci::CircleNonMaxSuppressionV5 nmsv5_node;
+
+ TestVisitor tv;
+ ASSERT_THROW(nmsv5_node.accept(&tv), std::exception);
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleNonMaxSuppressionV5OutTest, constructor)
+{
+ luci::CircleNonMaxSuppressionV5Out vout_node;
+
+ ASSERT_EQ(luci::CircleDialect::get(), vout_node.dialect());
+ ASSERT_EQ(luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT, vout_node.opcode());
+
+ ASSERT_EQ(nullptr, vout_node.input());
+ ASSERT_EQ(-1, vout_node.index());
+}
ASSERT_EQ(nullptr, stb_node.values());
ASSERT_EQ(nullptr, stb_node.default_value());
- ASSERT_EQ(true, stb_node.validate_indices());
+ ASSERT_EQ(false, stb_node.validate_indices());
}
TEST(CircleSparseToDenseTest, input_NEG)
{
luci::CircleTransposeConv trc_node;
- ASSERT_NO_THROW(trc_node.arg(2));
- ASSERT_THROW(trc_node.arg(3), std::out_of_range);
+ ASSERT_NO_THROW(trc_node.arg(3));
+ ASSERT_THROW(trc_node.arg(4), std::out_of_range);
}
TEST(CircleTransposeConvTest, visit_mutable_NEG)
IMPLEMENT(luci::CircleMul)
IMPLEMENT(luci::CircleNeg)
IMPLEMENT(luci::CircleNonMaxSuppressionV4)
+ IMPLEMENT(luci::CircleNonMaxSuppressionV5)
IMPLEMENT(luci::CircleNotEqual)
IMPLEMENT(luci::CircleOneHot)
IMPLEMENT(luci::CirclePack)
IMPLEMENT(luci::CirclePad)
+ IMPLEMENT(luci::CirclePadV2)
IMPLEMENT(luci::CirclePow)
IMPLEMENT(luci::CirclePRelu)
IMPLEMENT(luci::CircleRange)
IMPLEMENT(luci::CircleOutput)
IMPLEMENT(luci::CircleIfOut)
IMPLEMENT(luci::CircleNonMaxSuppressionV4Out)
+ IMPLEMENT(luci::CircleNonMaxSuppressionV5Out)
IMPLEMENT(luci::CircleSplitOut)
IMPLEMENT(luci::CircleSplitVOut)
IMPLEMENT(luci::CircleTopKV2Out)
return true;
}
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleAddN *node,
+ locop::NodeSummary &s)
+{
+ for (uint32_t i = 0; i < node->arity(); ++i)
+ s.args().append("inputs", tbl->lookup(node->inputs(i)));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleAveragePool2D *node,
+ locop::NodeSummary &s)
+{
+ assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+
+ s.args().append("value", tbl->lookup(node->value()));
+ s.args().append("filter(h,w)", to_str(node->filter()));
+ s.args().append("stride(h,w)", to_str(node->stride()));
+ s.args().append("padding", to_str(node->padding()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBatchMatMul *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("x", tbl->lookup(node->x()));
+ s.args().append("y", tbl->lookup(node->y()));
+ s.args().append("adj_x", to_str(node->adj_x()));
+ s.args().append("adj_y", to_str(node->adj_y()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBatchToSpaceND *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("block_shape", tbl->lookup(node->block_shape()));
+ s.args().append("crops", tbl->lookup(node->crops()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleCast *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("x", tbl->lookup(node->x()));
+ s.args().append("in_data_type", to_str(node->in_data_type()));
+ s.args().append("out_data_type", to_str(node->out_data_type()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleConcatenation *node,
+ locop::NodeSummary &s)
+{
+ assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+
+ for (uint32_t i = 0; i < node->numValues(); ++i)
+ s.args().append("values", tbl->lookup(node->values(i)));
+ s.args().append("axis", pepper::str(node->axis()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleConv2D *node,
+ locop::NodeSummary &s)
+{
+ assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+ assert(node->padding() != luci::Padding::UNDEFINED);
+
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("filter", tbl->lookup(node->filter()));
+ s.args().append("bias", tbl->lookup(node->bias()));
+ s.args().append("stride(h,w)", to_str(node->stride()));
+ s.args().append("dilation(h,w)", to_str(node->dilation()));
+ s.args().append("padding", to_str(node->padding()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleCustom *node,
+ locop::NodeSummary &s)
+{
+ for (uint32_t i = 0; i < node->numInputs(); i++)
+ {
+ s.args().append("input" + std::to_string(i), tbl->lookup(node->inputs(i)));
+ }
+ s.args().append("custom_code", node->custom_code());
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleDepthToSpace *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("block_size", std::to_string(node->block_size()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleDepthwiseConv2D *node,
+ locop::NodeSummary &s)
+{
+ assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+ assert(node->padding() != luci::Padding::UNDEFINED);
+
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("filter", tbl->lookup(node->filter()));
+ s.args().append("bias", tbl->lookup(node->bias()));
+ s.args().append("stride(h,w)", to_str(node->stride()));
+ s.args().append("dilation(h,w)", to_str(node->dilation()));
+ s.args().append("padding", to_str(node->padding()));
+ s.args().append("depthMultiplier", std::to_string(node->depthMultiplier()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleExpandDims *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("axis", tbl->lookup(node->axis()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFill *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("dims", tbl->lookup(node->dims()));
+ s.args().append("value", tbl->lookup(node->value()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFullyConnected *node,
+ locop::NodeSummary &s)
+{
+ assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("weights", tbl->lookup(node->weights()));
+ s.args().append("bias", tbl->lookup(node->bias()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleGather *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("params", tbl->lookup(node->params()));
+ s.args().append("indices", tbl->lookup(node->indices()));
+ s.args().append("axis", pepper::str(node->axis()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleGatherNd *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("params", tbl->lookup(node->params()));
+ s.args().append("indices", tbl->lookup(node->indices()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleIf *node, locop::NodeSummary &s)
+{
+ s.args().append("cond", tbl->lookup(node->cond()));
+ for (uint32_t i = 0; i < node->input_count(); ++i)
+ s.args().append("input", tbl->lookup(node->input(i)));
+
+ if (node->then_graph() != nullptr)
+ s.args().append("then_graph", node->then_graph()->name());
+ else
+ s.args().append("then_branch", pepper::str(node->then_branch()));
+
+ if (node->else_graph() != nullptr)
+ s.args().append("else_graph", node->else_graph()->name());
+ else
+ s.args().append("else_branch", pepper::str(node->else_branch()));
+
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleL2Normalize *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("x", tbl->lookup(node->x()));
+ s.args().append("fused_activation_function", to_str(node->fusedActivationFunction()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLeakyRelu *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("features", tbl->lookup(node->features()));
+ s.args().append("alpha", std::to_string(node->alpha()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLocalResponseNormalization *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("radius", pepper::str(node->radius()));
+ s.args().append("bias", pepper::str(node->bias()));
+ s.args().append("alpha", pepper::str(node->alpha()));
+ s.args().append("beta", pepper::str(node->beta()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLogSoftmax *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("logits", tbl->lookup(node->logits()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMatrixDiag *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("diagonal", tbl->lookup(node->diagonal()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMatrixSetDiag *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("diagonal", tbl->lookup(node->diagonal()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMaxPool2D *node,
+ locop::NodeSummary &s)
+{
+ assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+
+ s.args().append("value", tbl->lookup(node->value()));
+ s.args().append("filter(h,w)", to_str(node->filter()));
+ s.args().append("stride(h,w)", to_str(node->stride()));
+ s.args().append("padding", to_str(node->padding()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMirrorPad *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("paddings", tbl->lookup(node->paddings()));
+ s.args().append("mode", to_str(node->mode()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleNonMaxSuppressionV4 *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("boxes", tbl->lookup(node->boxes()));
+ s.args().append("scores", tbl->lookup(node->scores()));
+ s.args().append("max_output_size", tbl->lookup(node->max_output_size()));
+ s.args().append("iou_threshold", tbl->lookup(node->iou_threshold()));
+ s.args().append("score_threshold", tbl->lookup(node->score_threshold()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleNonMaxSuppressionV5 *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("boxes", tbl->lookup(node->boxes()));
+ s.args().append("scores", tbl->lookup(node->scores()));
+ s.args().append("max_output_size", tbl->lookup(node->max_output_size()));
+ s.args().append("iou_threshold", tbl->lookup(node->iou_threshold()));
+ s.args().append("score_threshold", tbl->lookup(node->score_threshold()));
+ s.args().append("soft_nms_sigma", tbl->lookup(node->soft_nms_sigma()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleOneHot *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("indices", tbl->lookup(node->indices()));
+ s.args().append("depth", tbl->lookup(node->depth()));
+ s.args().append("on_value", tbl->lookup(node->on_value()));
+ s.args().append("off_value", tbl->lookup(node->off_value()));
+ s.args().append("axis", pepper::str(node->axis()));
+
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePack *node,
+ locop::NodeSummary &s)
+{
+ for (uint32_t i = 0; i < node->values_count(); ++i)
+ s.args().append("values", tbl->lookup(node->values(i)));
+ s.args().append("values_count", pepper::str(node->values_count()));
+ s.args().append("axis", pepper::str(node->axis()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePad *node, locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("paddings", tbl->lookup(node->paddings()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePadV2 *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("paddings", tbl->lookup(node->paddings()));
+ s.args().append("constant_values", tbl->lookup(node->constant_values()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePRelu *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("alpha", tbl->lookup(node->alpha()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleRange *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("start", tbl->lookup(node->start()));
+ s.args().append("limit", tbl->lookup(node->limit()));
+ s.args().append("delta", tbl->lookup(node->delta()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReshape *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("tensor", tbl->lookup(node->tensor()));
+ s.args().append("shape", tbl->lookup(node->shape()));
+ // TODO Show newShape info
+ s.state(locop::NodeSummary::State::PartiallyKnown);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleResizeBilinear *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("size", tbl->lookup(node->size()));
+ s.args().append("align_corners", node->align_corners() ? "true" : "false");
+ s.args().append("half_pixel_centers", node->half_pixel_centers() ? "true" : "false");
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleResizeNearestNeighbor *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("size", tbl->lookup(node->size()));
+ s.args().append("align_corners", node->align_corners() ? "true" : "false");
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReverseSequence *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("seq_lengths", tbl->lookup(node->seq_lengths()));
+ s.args().append("seq_axis", std::to_string(node->seq_axis()));
+ s.args().append("batch_axis", std::to_string(node->batch_axis()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReverseV2 *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("tensor", tbl->lookup(node->tensor()));
+ s.args().append("axis", tbl->lookup(node->axis()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleScatterNd *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("indices", tbl->lookup(node->indices()));
+ s.args().append("updates", tbl->lookup(node->updates()));
+ s.args().append("shape", tbl->lookup(node->shape()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSegmentSum *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("segment_ids", tbl->lookup(node->segment_ids()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSelect *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("condition", tbl->lookup(node->condition()));
+ s.args().append("t", tbl->lookup(node->t()));
+ s.args().append("e", tbl->lookup(node->e()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSelectV2 *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("condition", tbl->lookup(node->condition()));
+ s.args().append("t", tbl->lookup(node->t()));
+ s.args().append("e", tbl->lookup(node->e()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleShape *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("out_type", to_str(node->out_type()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSlice *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("begin", tbl->lookup(node->begin()));
+ s.args().append("size", tbl->lookup(node->size()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSoftmax *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("logits", tbl->lookup(node->logits()));
+ s.args().append("beta", pepper::str(node->beta()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSpaceToBatchND *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("block_shape", tbl->lookup(node->block_shape()));
+ s.args().append("paddings", tbl->lookup(node->paddings()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSpaceToDepth *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("block_size", pepper::str(node->block_size()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSparseToDense *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("indices", tbl->lookup(node->indices()));
+ s.args().append("output_shape", tbl->lookup(node->output_shape()));
+ s.args().append("values", tbl->lookup(node->values()));
+ s.args().append("default_value", tbl->lookup(node->default_value()));
+ s.args().append("Validate_indices", pepper::str(node->validate_indices()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSplit *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("split_dim", tbl->lookup(node->split_dim()));
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("num_split", pepper::str(node->num_split()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSplitV *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("size_splits", tbl->lookup(node->size_splits()));
+ s.args().append("split_dim", tbl->lookup(node->split_dim()));
+ s.args().append("num_split", pepper::str(node->num_split()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSqueeze *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+
+ std::stringstream ss{"("};
+ for (size_t i = 0; i < node->squeeze_dims().size(); ++i)
+ {
+ if (i != 0)
+ ss << ", ";
+ ss << node->squeeze_dims()[i];
+ }
+ ss << ")";
+ s.args().append("squeeze_dims", ss.str());
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleStridedSlice *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("begin", tbl->lookup(node->begin()));
+ s.args().append("end", tbl->lookup(node->end()));
+ s.args().append("strides", tbl->lookup(node->strides()));
+ s.args().append("begin_mask", pepper::str(node->begin_mask()));
+ s.args().append("end_mask", pepper::str(node->end_mask()));
+ s.args().append("ellipsis_mask", pepper::str(node->ellipsis_mask()));
+ s.args().append("new_axis_mask", pepper::str(node->new_axis_mask()));
+ s.args().append("shrink_axis_mask", pepper::str(node->shrink_axis_mask()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTile *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("multiples", tbl->lookup(node->multiples()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTopKV2 *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("k", tbl->lookup(node->k()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTranspose *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("a", tbl->lookup(node->a()));
+ s.args().append("perm", tbl->lookup(node->perm()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTransposeConv *node,
+ locop::NodeSummary &s)
+{
+ assert(node->padding() != luci::Padding::UNDEFINED);
+
+ s.args().append("inputSizes", tbl->lookup(node->inputSizes()));
+ s.args().append("filter", tbl->lookup(node->filter()));
+ s.args().append("outBackprop", tbl->lookup(node->outBackprop()));
+ s.args().append("stride(h,w)", to_str(node->stride()));
+ s.args().append("padding", to_str(node->padding()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnique *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("idx_out_type", to_str(node->idx_out_type()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnpack *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("value", tbl->lookup(node->value()));
+ s.args().append("num", pepper::str(node->num()));
+ s.args().append("axis", pepper::str(node->axis()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhere *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("condition", tbl->lookup(node->condition()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhile *node,
+ locop::NodeSummary &s)
+{
+ for (uint32_t i = 0; i < node->input_count(); ++i)
+ s.args().append("input", tbl->lookup(node->input(i)));
+
+ if (node->cond_graph() != nullptr)
+ s.args().append("cond_graph", node->cond_graph()->name());
+ else
+ s.args().append("cond_branch", pepper::str(node->cond_branch()));
+
+ if (node->body_graph() != nullptr)
+ s.args().append("body_graph", node->body_graph()->name());
+ else
+ s.args().append("body_branch", pepper::str(node->body_branch()));
+
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTopKV2Out *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("topkv2", tbl->lookup(node->input()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUniqueOut *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("unique", tbl->lookup(node->input()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnpackOut *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("unpack", tbl->lookup(node->input()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhileOut *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("while", tbl->lookup(node->input()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleOutput *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("from", tbl->lookup(node->from()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBCQFullyConnected *node,
+ locop::NodeSummary &s)
+{
+ assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("weights_scales", tbl->lookup(node->weights_scales()));
+ s.args().append("weights_binary", tbl->lookup(node->weights_binary()));
+ s.args().append("bias", tbl->lookup(node->bias()));
+ s.args().append("weights_clusters", tbl->lookup(node->weights_clusters()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
+ s.args().append("weights_hidden_size", pepper::str(node->weights_hidden_size()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBCQGather *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input_scales", tbl->lookup(node->input_scales()));
+ s.args().append("input_binary", tbl->lookup(node->input_binary()));
+ s.args().append("indices", tbl->lookup(node->indices()));
+ s.args().append("input_clusters", tbl->lookup(node->input_clusters()));
+ s.args().append("axis", pepper::str(node->axis()));
+ s.args().append("input_hidden_size", pepper::str(node->input_hidden_size()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleInstanceNorm *node,
+ locop::NodeSummary &s)
+{
+ auto fused = node->fusedActivationFunction();
+ assert(fused != luci::FusedActFunc::UNDEFINED);
+
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("gamma", tbl->lookup(node->gamma()));
+ s.args().append("beta", tbl->lookup(node->beta()));
+ s.args().append("epsilon", pepper::str(node->epsilon()));
+ s.args().append("fused_activation_function", to_str(fused));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
bool CircleNodeSummaryBuilderBase::build(const loco::Node *node, locop::NodeSummary &s) const
{
if (node->dialect() != luci::CircleDialect::get())
bool CircleNodeSummaryBuilder::summary(const luci::CircleAddN *node, locop::NodeSummary &s) const
{
- for (uint32_t i = 0; i < node->arity(); ++i)
- s.args().append("inputs", tbl()->lookup(node->inputs(i)));
-
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleArgMax *node, locop::NodeSummary &s) const
bool CircleNodeSummaryBuilder::summary(const luci::CircleAveragePool2D *node,
locop::NodeSummary &s) const
{
- assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
- s.args().append("value", tbl()->lookup(node->value()));
- s.args().append("filter(h,w)", to_str(node->filter()));
- s.args().append("stride(h,w)", to_str(node->stride()));
- s.args().append("padding", to_str(node->padding()));
- s.args().append("fused", to_str(node->fusedActivationFunction()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleBatchMatMul *node,
locop::NodeSummary &s) const
{
- s.args().append("x", tbl()->lookup(node->x()));
- s.args().append("y", tbl()->lookup(node->y()));
- s.args().append("adj_x", to_str(node->adj_x()));
- s.args().append("adj_y", to_str(node->adj_y()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleBatchToSpaceND *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("block_shape", tbl()->lookup(node->block_shape()));
- s.args().append("crops", tbl()->lookup(node->crops()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleCast *node, locop::NodeSummary &s) const
{
- s.args().append("x", tbl()->lookup(node->x()));
- s.args().append("in_data_type", to_str(node->in_data_type()));
- s.args().append("out_data_type", to_str(node->out_data_type()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleCeil *node, locop::NodeSummary &s) const
bool CircleNodeSummaryBuilder::summary(const luci::CircleConcatenation *node,
locop::NodeSummary &s) const
{
- assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
- for (uint32_t i = 0; i < node->numValues(); ++i)
- s.args().append("values", tbl()->lookup(node->values(i)));
- s.args().append("axis", pepper::str(node->axis()));
- s.args().append("fused", to_str(node->fusedActivationFunction()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleConst *, locop::NodeSummary &s) const
bool CircleNodeSummaryBuilder::summary(const luci::CircleConv2D *node, locop::NodeSummary &s) const
{
- assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
- assert(node->padding() != luci::Padding::UNDEFINED);
-
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("filter", tbl()->lookup(node->filter()));
- s.args().append("bias", tbl()->lookup(node->bias()));
-
- s.args().append("stride(h,w)", to_str(node->stride()));
- s.args().append("dilation(h,w)", to_str(node->dilation()));
-
- s.args().append("padding", to_str(node->padding()));
- s.args().append("fused", to_str(node->fusedActivationFunction()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleCos *node, locop::NodeSummary &s) const
bool CircleNodeSummaryBuilder::summary(const luci::CircleCustom *node, locop::NodeSummary &s) const
{
- for (uint32_t i = 0; i < node->numInputs(); i++)
- {
- s.args().append("input" + std::to_string(i), tbl()->lookup(node->inputs(i)));
- }
- s.args().append("custom_code", node->custom_code());
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleDepthToSpace *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("block_size", std::to_string(node->block_size()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleDepthwiseConv2D *node,
locop::NodeSummary &s) const
{
- assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
- assert(node->padding() != luci::Padding::UNDEFINED);
-
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("filter", tbl()->lookup(node->filter()));
- s.args().append("bias", tbl()->lookup(node->bias()));
-
- s.args().append("stride(h,w)", to_str(node->stride()));
- s.args().append("dilation(h,w)", to_str(node->dilation()));
- s.args().append("padding", to_str(node->padding()));
- s.args().append("depthMultiplier", std::to_string(node->depthMultiplier()));
- s.args().append("fused", to_str(node->fusedActivationFunction()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleDiv *node, locop::NodeSummary &s) const
bool CircleNodeSummaryBuilder::summary(const luci::CircleExpandDims *node,
locop::NodeSummary &s) const
-{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("axis", tbl()->lookup(node->axis()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+{
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleFloor *node, locop::NodeSummary &s) const
bool CircleNodeSummaryBuilder::summary(const luci::CircleFill *node, locop::NodeSummary &s) const
{
- s.args().append("dims", tbl()->lookup(node->dims()));
- s.args().append("value", tbl()->lookup(node->value()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleFullyConnected *node,
locop::NodeSummary &s) const
{
- assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("weights", tbl()->lookup(node->weights()));
- s.args().append("bias", tbl()->lookup(node->bias()));
- s.args().append("fused", to_str(node->fusedActivationFunction()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleGather *node, locop::NodeSummary &s) const
{
- s.args().append("params", tbl()->lookup(node->params()));
- s.args().append("indices", tbl()->lookup(node->indices()));
- s.args().append("axis", pepper::str(node->axis()));
-
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleGatherNd *node,
locop::NodeSummary &s) const
{
- s.args().append("params", tbl()->lookup(node->params()));
- s.args().append("indices", tbl()->lookup(node->indices()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleGreater *node, locop::NodeSummary &s) const
bool CircleNodeSummaryBuilder::summary(const luci::CircleIf *node, locop::NodeSummary &s) const
{
- s.args().append("cond", tbl()->lookup(node->cond()));
- for (uint32_t i = 0; i < node->input_count(); ++i)
- s.args().append("input", tbl()->lookup(node->input(i)));
-
- if (node->then_graph() != nullptr)
- s.args().append("then_graph", node->then_graph()->name());
- else
- s.args().append("then_branch", pepper::str(node->then_branch()));
-
- if (node->else_graph() != nullptr)
- s.args().append("else_graph", node->else_graph()->name());
- else
- s.args().append("else_branch", pepper::str(node->else_branch()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleL2Normalize *node,
locop::NodeSummary &s) const
{
- s.args().append("x", tbl()->lookup(node->x()));
- s.args().append("fused_activation_function", to_str(node->fusedActivationFunction()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleLess *node, locop::NodeSummary &s) const
bool CircleNodeSummaryBuilder::summary(const luci::CircleLeakyRelu *node,
locop::NodeSummary &s) const
{
- s.args().append("features", tbl()->lookup(node->features()));
- s.args().append("alpha", std::to_string(node->alpha()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleLocalResponseNormalization *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("radius", pepper::str(node->radius()));
- s.args().append("bias", pepper::str(node->bias()));
- s.args().append("alpha", pepper::str(node->alpha()));
- s.args().append("beta", pepper::str(node->beta()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleLog *node, locop::NodeSummary &s) const
bool CircleNodeSummaryBuilder::summary(const luci::CircleLogSoftmax *node,
locop::NodeSummary &s) const
{
- s.args().append("logits", tbl()->lookup(node->logits()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleMatrixDiag *node,
locop::NodeSummary &s) const
{
- s.args().append("diagonal", tbl()->lookup(node->diagonal()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleMatrixSetDiag *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("diagonal", tbl()->lookup(node->diagonal()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleMaximum *node, locop::NodeSummary &s) const
bool CircleNodeSummaryBuilder::summary(const luci::CircleMaxPool2D *node,
locop::NodeSummary &s) const
{
- assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
- s.args().append("value", tbl()->lookup(node->value()));
- s.args().append("filter(h,w)", to_str(node->filter()));
- s.args().append("stride(h,w)", to_str(node->stride()));
- s.args().append("padding", to_str(node->padding()));
- s.args().append("fused", to_str(node->fusedActivationFunction()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleMean *node, locop::NodeSummary &s) const
bool CircleNodeSummaryBuilder::summary(const luci::CircleMirrorPad *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("paddings", tbl()->lookup(node->paddings()));
- s.args().append("mode", to_str(node->mode()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleMul *node, locop::NodeSummary &s) const
bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV4 *node,
locop::NodeSummary &s) const
{
- s.args().append("boxes", pepper::str(node->boxes()));
- s.args().append("scores", pepper::str(node->scores()));
- s.args().append("max_output_size", pepper::str(node->max_output_size()));
- s.args().append("iou_threshold", pepper::str(node->iou_threshold()));
- s.args().append("score_threshold", pepper::str(node->score_threshold()));
+ return summary_node(tbl(), node, s);
+}
- s.state(locop::NodeSummary::State::Complete);
- return true;
+bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV5 *node,
+ locop::NodeSummary &s) const
+{
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleNotEqual *node,
bool CircleNodeSummaryBuilder::summary(const luci::CircleOneHot *node, locop::NodeSummary &s) const
{
- s.args().append("indices", tbl()->lookup(node->indices()));
- s.args().append("depth", tbl()->lookup(node->depth()));
- s.args().append("on_value", tbl()->lookup(node->on_value()));
- s.args().append("off_value", tbl()->lookup(node->off_value()));
- s.args().append("axis", pepper::str(node->axis()));
-
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CirclePack *node, locop::NodeSummary &s) const
{
- for (uint32_t i = 0; i < node->values_count(); ++i)
- s.args().append("values", tbl()->lookup(node->values(i)));
- s.args().append("values_count", pepper::str(node->values_count()));
- s.args().append("axis", pepper::str(node->axis()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CirclePad *node, locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("paddings", tbl()->lookup(node->paddings()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CirclePadV2 *node, locop::NodeSummary &s) const
+{
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CirclePow *node, locop::NodeSummary &s) const
bool CircleNodeSummaryBuilder::summary(const luci::CirclePRelu *node, locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("alpha", tbl()->lookup(node->alpha()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleRange *node, locop::NodeSummary &s) const
{
- s.args().append("start", tbl()->lookup(node->start()));
- s.args().append("limit", tbl()->lookup(node->limit()));
- s.args().append("delta", tbl()->lookup(node->delta()));
-
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleRank *node, locop::NodeSummary &s) const
bool CircleNodeSummaryBuilder::summary(const luci::CircleReshape *node, locop::NodeSummary &s) const
{
- s.args().append("tensor", tbl()->lookup(node->tensor()));
- s.args().append("shape", tbl()->lookup(node->shape()));
- // TODO Show newShape info
- s.state(locop::NodeSummary::State::PartiallyKnown);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleResizeBilinear *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("size", tbl()->lookup(node->size()));
- s.args().append("align_corners", node->align_corners() ? "true" : "false");
- s.args().append("half_pixel_centers", node->half_pixel_centers() ? "true" : "false");
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleResizeNearestNeighbor *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("size", tbl()->lookup(node->size()));
- s.args().append("align_corners", node->align_corners() ? "true" : "false");
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleReverseSequence *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("seq_lengths", tbl()->lookup(node->seq_lengths()));
- s.args().append("seq_axis", std::to_string(node->seq_axis()));
- s.args().append("batch_axis", std::to_string(node->batch_axis()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleReverseV2 *node,
locop::NodeSummary &s) const
{
- s.args().append("tensor", tbl()->lookup(node->tensor()));
- s.args().append("axis", tbl()->lookup(node->axis()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleRound *node, locop::NodeSummary &s) const
bool CircleNodeSummaryBuilder::summary(const luci::CircleScatterNd *node,
locop::NodeSummary &s) const
{
- s.args().append("indices", tbl()->lookup(node->indices()));
- s.args().append("updates", tbl()->lookup(node->updates()));
- s.args().append("shape", tbl()->lookup(node->shape()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSegmentSum *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("segment_ids", tbl()->lookup(node->segment_ids()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSelect *node, locop::NodeSummary &s) const
{
- s.args().append("condition", tbl()->lookup(node->condition()));
- s.args().append("t", tbl()->lookup(node->t()));
- s.args().append("e", tbl()->lookup(node->e()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSelectV2 *node,
locop::NodeSummary &s) const
{
- s.args().append("condition", tbl()->lookup(node->condition()));
- s.args().append("t", tbl()->lookup(node->t()));
- s.args().append("e", tbl()->lookup(node->e()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleShape *node, locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("out_type", to_str(node->out_type()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSin *node, locop::NodeSummary &s) const
bool CircleNodeSummaryBuilder::summary(const luci::CircleSlice *node, locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("begin", tbl()->lookup(node->begin()));
- s.args().append("size", tbl()->lookup(node->size()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSoftmax *node, locop::NodeSummary &s) const
{
- s.args().append("logits", tbl()->lookup(node->logits()));
- s.args().append("beta", pepper::str(node->beta()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSpaceToBatchND *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("block_shape", tbl()->lookup(node->block_shape()));
- s.args().append("paddings", tbl()->lookup(node->paddings()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSpaceToDepth *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("block_size", pepper::str(node->block_size()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSparseToDense *node,
locop::NodeSummary &s) const
{
- s.args().append("indices", tbl()->lookup(node->indices()));
- s.args().append("output_shape", tbl()->lookup(node->output_shape()));
- s.args().append("values", tbl()->lookup(node->values()));
- s.args().append("default_value", tbl()->lookup(node->default_value()));
-
- s.args().append("Validate_indices", pepper::str(node->validate_indices()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSplit *node, locop::NodeSummary &s) const
{
- s.args().append("split_dim", tbl()->lookup(node->split_dim()));
- s.args().append("input", tbl()->lookup(node->input()));
-
- s.args().append("num_split", pepper::str(node->num_split()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSplitV *node, locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("size_splits", tbl()->lookup(node->size_splits()));
- s.args().append("split_dim", tbl()->lookup(node->split_dim()));
-
- s.args().append("num_split", pepper::str(node->num_split()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSqrt *node, locop::NodeSummary &s) const
bool CircleNodeSummaryBuilder::summary(const luci::CircleSqueeze *node, locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
-
- std::stringstream ss{"("};
- for (size_t i = 0; i < node->squeeze_dims().size(); ++i)
- {
- if (i != 0)
- ss << ", ";
- ss << node->squeeze_dims()[i];
- }
- ss << ")";
-
- s.args().append("squeeze_dims", ss.str());
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleStridedSlice *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("begin", tbl()->lookup(node->begin()));
- s.args().append("end", tbl()->lookup(node->end()));
- s.args().append("strides", tbl()->lookup(node->strides()));
-
- s.args().append("begin_mask", pepper::str(node->begin_mask()));
- s.args().append("end_mask", pepper::str(node->end_mask()));
- s.args().append("ellipsis_mask", pepper::str(node->ellipsis_mask()));
- s.args().append("new_axis_mask", pepper::str(node->new_axis_mask()));
- s.args().append("shrink_axis_mask", pepper::str(node->shrink_axis_mask()));
-
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSub *node, locop::NodeSummary &s) const
bool CircleNodeSummaryBuilder::summary(const luci::CircleTile *node, locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("multiples", tbl()->lookup(node->multiples()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleTopKV2 *node, locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("k", tbl()->lookup(node->k()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleTranspose *node,
locop::NodeSummary &s) const
{
- s.args().append("a", tbl()->lookup(node->a()));
- s.args().append("perm", tbl()->lookup(node->perm()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleTransposeConv *node,
locop::NodeSummary &s) const
{
- assert(node->padding() != luci::Padding::UNDEFINED);
-
- s.args().append("inputSizes", tbl()->lookup(node->inputSizes()));
- s.args().append("filter", tbl()->lookup(node->filter()));
- s.args().append("outBackprop", tbl()->lookup(node->outBackprop()));
-
- s.args().append("stride(h,w)", to_str(node->stride()));
- s.args().append("padding", to_str(node->padding()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleUnique *node, locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("idx_out_type", to_str(node->idx_out_type()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleUnpack *node, locop::NodeSummary &s) const
{
- s.args().append("value", tbl()->lookup(node->value()));
-
- s.args().append("num", pepper::str(node->num()));
- s.args().append("axis", pepper::str(node->axis()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleWhere *node, locop::NodeSummary &s) const
{
- s.args().append("condition", tbl()->lookup(node->condition()));
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleWhile *node, locop::NodeSummary &s) const
{
- for (uint32_t i = 0; i < node->input_count(); ++i)
- s.args().append("input", tbl()->lookup(node->input(i)));
-
- if (node->cond_graph() != nullptr)
- s.args().append("cond_graph", node->cond_graph()->name());
- else
- s.args().append("cond_branch", pepper::str(node->cond_branch()));
-
- if (node->body_graph() != nullptr)
- s.args().append("body_graph", node->body_graph()->name());
- else
- s.args().append("body_branch", pepper::str(node->body_branch()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleZerosLike *node,
bool CircleNodeSummaryBuilder::summary(const luci::CircleTopKV2Out *node,
locop::NodeSummary &s) const
{
- s.args().append("topkv2", tbl()->lookup(node->input()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleUniqueOut *node,
locop::NodeSummary &s) const
{
- s.args().append("unique", tbl()->lookup(node->input()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleUnpackOut *node,
locop::NodeSummary &s) const
{
- s.args().append("unpack", tbl()->lookup(node->input()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleIfOut *node, locop::NodeSummary &s) const
return use_input(tbl(), node, s);
}
-bool CircleNodeSummaryBuilder::summary(const luci::CircleWhileOut *node,
+bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV5Out *node,
locop::NodeSummary &s) const
{
- s.args().append("while", tbl()->lookup(node->input()));
-
- s.state(locop::NodeSummary::State::Complete);
+ return use_input(tbl(), node, s);
+}
- return true;
+bool CircleNodeSummaryBuilder::summary(const luci::CircleWhileOut *node,
+ locop::NodeSummary &s) const
+{
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleInput *, locop::NodeSummary &s) const
bool CircleNodeSummaryBuilder::summary(const luci::CircleOutput *node, locop::NodeSummary &s) const
{
- s.args().append("from", tbl()->lookup(node->from()));
-
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleBCQFullyConnected *node,
locop::NodeSummary &s) const
{
- assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("weights_scales", tbl()->lookup(node->weights_scales()));
- s.args().append("weights_binary", tbl()->lookup(node->weights_binary()));
- s.args().append("bias", tbl()->lookup(node->bias()));
- s.args().append("weights_clusters", tbl()->lookup(node->weights_clusters()));
-
- s.args().append("fused", to_str(node->fusedActivationFunction()));
- s.args().append("weights_hidden_size", pepper::str(node->weights_hidden_size()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleBCQGather *node,
locop::NodeSummary &s) const
{
- s.args().append("input_scales", tbl()->lookup(node->input_scales()));
- s.args().append("input_binary", tbl()->lookup(node->input_binary()));
- s.args().append("indices", tbl()->lookup(node->indices()));
- s.args().append("input_clusters", tbl()->lookup(node->input_clusters()));
-
- s.args().append("axis", pepper::str(node->axis()));
- s.args().append("input_hidden_size", pepper::str(node->input_hidden_size()));
-
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleInstanceNorm *node,
locop::NodeSummary &s) const
{
- auto fused = node->fusedActivationFunction();
- assert(fused != luci::FusedActFunc::UNDEFINED);
-
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("gamma", tbl()->lookup(node->gamma()));
- s.args().append("beta", tbl()->lookup(node->beta()));
- s.args().append("epsilon", pepper::str(node->epsilon()));
- s.args().append("fused_activation_function", to_str(fused));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
} // namespace
{
enum Algorithm
{
+ FuseBatchNormWithTConv,
FuseBCQ,
FuseInstanceNorm,
ResolveCustomOpAdd,
ResolveCustomOpMatMul,
QuantizeDequantizeWeights,
QuantizeWithMinMax,
+ Requantize,
};
enum AlgorithmParameters
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
+#define __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to fuse Batch Normalization into CircleTransposeConv
+ */
+struct FuseBatchNormWithTConvPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::FuseBatchNormWithTConvPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_RELULAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_RELULAYER_H__
+#ifndef __LUCI_REQUANTIZE_PASS_H__
+#define __LUCI_REQUANTIZE_PASS_H__
-#include <backend/IPortableTensor.h>
+#include <loco.h>
-#include <exec/IFunction.h>
+#include <logo/Pass.h>
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
+#include <luci/Pass/QuantizationParameters.h>
+
+namespace luci
{
-class ReLULayer : public ::onert::exec::IFunction
+/**
+ * @brief Pass to quantize weights
+ */
+class RequantizePass : public logo::Pass
{
public:
- ReLULayer();
+ RequantizePass(loco::DataType input_dtype, loco::DataType output_dtype)
+ : _input_dtype{input_dtype}, _output_dtype{output_dtype}
+ {
+ // DO NOTHING
+ }
+ virtual const char *name(void) const { return "luci::RequantizePass"; }
public:
- void reluFloat32();
-
- void reluQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
+ bool run(loco::Graph *graph);
private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
+ loco::DataType _input_dtype;
+ loco::DataType _output_dtype;
};
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+} // namespace luci
-#endif // __ONERT_BACKEND_CPU_OPS_RELULAYER_H__
+#endif //__LUCI_REQUANTIZE_PASS_H__
#include "luci/CircleOptimizer.h"
+#include "luci/Pass/FuseBatchNormWithTConv.h"
#include "luci/Pass/FuseBCQPass.h"
#include "luci/Pass/FuseInstanceNormPass.h"
#include "luci/Pass/ResolveCustomOpAddPass.h"
#include "luci/Pass/ResolveCustomOpBatchMatMulPass.h"
#include "luci/Pass/ResolveCustomOpMatMulPass.h"
+#include "luci/Pass/RequantizePass.h"
#include "luci/Pass/QuantizeWithMinMaxPass.h"
#include "luci/Pass/QuantizeDequantizeWeightsPass.h"
// TODO add more passes
#include "ProgressReporter.h"
#include "CircleOptimizerUtils.h"
+#include <luci/IR/CircleNodes.h>
#include <logo/Phase.h>
#include <memory>
{
phase.emplace_back(std::make_unique<FuseBCQPass>());
}
+ if (_options->query(Options::Algorithm::FuseBatchNormWithTConv))
+ {
+ phase.emplace_back(std::make_unique<FuseBatchNormWithTConvPass>());
+ }
// Shape inference is needed for added nodes doing above transformations
phase.emplace_back(std::make_unique<luci::ShapeInferencePass>());
throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
to_string(fakeq_supported_granularity));
+ // Clear existing quantparams before doing fake quantization
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ if (circle_node->quantparam() != nullptr)
+ circle_node->quantparam(nullptr);
+ }
+
luci::QuantizeDequantizeWeightsPass fake_quantizer(
str_to_dtype(input_dtype), str_to_dtype(output_dtype), str_to_granularity(granularity));
fake_quantizer.run(g);
quantizer.run(g);
}
+ // Requantize
+ if (_options->query(Options::Algorithm::Requantize))
+ {
+ static const std::vector<std::string> rq_supported_input_dtype{"int8"};
+ static const std::vector<std::string> rq_supported_output_dtype{"uint8"};
+
+ auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype);
+ auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype);
+
+ if (!in_array(to_lower_case(input_dtype), rq_supported_input_dtype))
+ throw std::runtime_error("Unsupported input type. List of supported input types: " +
+ to_string(rq_supported_input_dtype));
+
+ if (!in_array(to_lower_case(output_dtype), rq_supported_output_dtype))
+ throw std::runtime_error("Unsupported output type. List of supported output types: " +
+ to_string(rq_supported_output_dtype));
+
+ luci::RequantizePass requantizer(str_to_dtype(input_dtype), str_to_dtype(output_dtype));
+ requantizer.run(g);
+ }
+
logo::Phase phase;
// Do Shape/Type inference
{
std::string prefix = node_name;
- if (prefix.find("ReadVariableOp/resource/") != std::string::npos)
+ if (prefix.find("/ReadVariableOp/resource") != std::string::npos)
{
- const auto start_index = prefix.find("ReadVariableOp/resource/");
+ const auto start_index = prefix.find("/ReadVariableOp/resource");
const auto left_prefix = prefix.substr(0, start_index);
const auto right_prefix = prefix.substr(start_index + 24);
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithTConv.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+/**
+ * NOTE TF's fusedBatchNorm is converted to mul and add of Circle.
+ *
+ * BEFORE
+ *
+ * [CircleTransposeConv]
+ * |
+ * [mul]
+ * |
+ * [add]
+ * AFTER
+ *
+ * [CircleTransposeConv]
+ */
+bool fused_batch_norm_with_tconv(luci::CircleTransposeConv *tconv)
+{
+ // check whether it has bias or not. This optimization works only if it doesn't.
+ auto bias = dynamic_cast<luci::CircleOutputExclude *>(tconv->bias());
+ if (not bias)
+ return false;
+
+ // get weight of tconv
+ auto filter = dynamic_cast<luci::CircleConst *>(tconv->filter());
+ if (not filter)
+ return false;
+ if (filter->dtype() != loco::DataType::FLOAT32)
+ return false;
+
+ // get mul node
+ auto tconv_output = loco::succs(tconv);
+ assert(tconv_output.size() == 1);
+ auto mul = dynamic_cast<luci::CircleMul *>(*tconv_output.begin());
+ if (not mul)
+ return false;
+ if (mul->dtype() != loco::DataType::FLOAT32)
+ return false;
+
+ // get add node
+ auto mul_output = loco::succs(mul);
+ assert(mul_output.size() == 1);
+ auto add = dynamic_cast<luci::CircleAdd *>(*mul_output.begin());
+ if (not add)
+ return false;
+ if (add->dtype() != loco::DataType::FLOAT32)
+ return false;
+ if (add->fusedActivationFunction() != luci::FusedActFunc::NONE &&
+ add->fusedActivationFunction() != luci::FusedActFunc::RELU6)
+ return false;
+
+ // get scale of batchnorm
+ auto scale = dynamic_cast<luci::CircleConst *>(mul->y());
+ if (not scale)
+ return false;
+
+ // scale dim(0) == tconv filter channel dim
+ if (filter->rank() != 4)
+ return false;
+ auto filter_channel_dim = filter->dim(3).value();
+ if (scale->rank() != 1)
+ return false;
+ auto scale_dim = scale->dim(0).value();
+ if (filter_channel_dim != scale_dim)
+ return false;
+
+ // get shift of batchnorm
+ auto shift = dynamic_cast<luci::CircleConst *>(add->y());
+ if (not shift)
+ return false;
+
+ // shift dim(0) == tconv filter channel dim
+ if (shift->rank() != 1)
+ return false;
+ auto shift_dim = shift->dim(0).value();
+ if (filter_channel_dim != shift_dim)
+ return false;
+
+ // filter weight = filter weight * mul(scale) + add(shift)
+ uint32_t filter_batch_dim = filter->dim(0).value();
+ uint32_t filter_height_dim = filter->dim(1).value();
+ uint32_t filter_width_dim = filter->dim(2).value();
+ for (uint32_t c = 0; c < filter_channel_dim; c++)
+ {
+ for (uint32_t n = 0; n < filter_batch_dim; n++)
+ {
+ for (uint32_t h = 0; h < filter_height_dim; h++)
+ {
+ for (uint32_t w = 0; w < filter_width_dim; w++)
+ {
+ uint32_t offset = n * filter_height_dim * filter_width_dim * filter_channel_dim +
+ h * filter_width_dim * filter_channel_dim + w * filter_channel_dim + c;
+ filter->at<loco::DataType::FLOAT32>(offset) *= scale->at<loco::DataType::FLOAT32>(c);
+ }
+ }
+ }
+ }
+
+ // fuse shift with transposed conv
+ tconv->bias(shift);
+
+ if (add->fusedActivationFunction() == luci::FusedActFunc::RELU6)
+ {
+ // separate relu op from add op
+ auto relu = add->graph()->nodes()->create<luci::CircleRelu6>();
+ relu->features(tconv);
+
+ // remove mul node
+ replace(add).with(relu);
+ }
+ else
+ {
+ replace(add).with(tconv);
+ }
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseBatchNormWithTConvPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto tconv = dynamic_cast<luci::CircleTransposeConv *>(node);
+ if (not tconv)
+ continue;
+
+ changed |= fused_batch_norm_with_tconv(tconv);
+ }
+
+ return changed;
+}
+
+} // namespace luci
if (granularity == QuantizationGranularity::ChannelWise)
{
auto quantparam = circle_node->quantparam();
- assert(quantparam != nullptr);
+ if (quantparam == nullptr)
+ {
+ assert(false && "quantparam is nullptr");
+ return false;
+ }
+
auto min = quantparam->min;
auto scaling_factor = quantparam->scale;
int32_t channel_dim_index = 0;
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RequantizePass.h"
+#include "QuantizationUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Log.h>
+
+#include <oops/UserExn.h>
+
+#include <iostream>
+#include <cmath>
+
+namespace luci
+{
+
+namespace
+{
+
+// Check if the node is the bias of Conv2D, DepthwiseConv2D, or FullyConnected layer
+bool is_bias(CircleConst *node)
+{
+ if (node == nullptr)
+ return false;
+
+ auto succs = loco::succs(node);
+ if (succs.size() != 1) // assume bias is used by only one node
+ return false;
+
+ for (auto out : succs)
+ {
+ auto conv = dynamic_cast<CircleConv2D *>(out);
+ if (conv != nullptr && conv->bias() == node)
+ return true;
+
+ auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
+ if (dw_conv != nullptr && dw_conv->bias() == node)
+ return true;
+
+ auto fc = dynamic_cast<CircleFullyConnected *>(out);
+ if (fc != nullptr && fc->bias() == node)
+ return true;
+
+ // TODO: add TransposeConv when bias is supported in CircleTransposeConv
+ }
+ return false;
+}
+
+void requant_nonconst_int8_to_uint8(CircleNode *circle_node)
+{
+ assert(circle_node->dtype() == loco::DataType::S8);
+
+ auto quantparam = circle_node->quantparam();
+ assert(quantparam != nullptr);
+ for (size_t i = 0; i < quantparam->zerop.size(); ++i)
+ {
+ quantparam->zerop[i] += 128;
+ }
+ circle_node->dtype(loco::DataType::U8);
+}
+
+// Requantize CircleConst from symmetric int8 to asymmetric uint8
+// Original values: -127 ~ 127
+// After requantization: 1 ~ 255 (zp <- zp + 128)
+void requant_const_int8_to_uint8(CircleConst *node)
+{
+ assert(node->dtype() == loco::DataType::S8);
+
+ uint32_t size = node->size<loco::DataType::S8>();
+ std::vector<int32_t> requantized_values(size);
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ int32_t data = node->at<loco::DataType::S8>(i);
+ requantized_values[i] = data + 128;
+ }
+
+ node->dtype(loco::DataType::U8); // change the type of tensor
+ node->size<loco::DataType::U8>(size);
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ assert(1 <= requantized_values[i] && requantized_values[i] <= 255);
+ node->at<loco::DataType::U8>(i) = requantized_values[i];
+ }
+
+ auto quantparam = node->quantparam();
+ assert(quantparam != nullptr);
+ for (size_t i = 0; i < quantparam->zerop.size(); ++i)
+ {
+ quantparam->zerop[i] += 128;
+ }
+}
+
+/**
+ * @brief RequantizeNonConst requantizes tensors for activations
+ */
+struct RequantizeNonConst final : public luci::CircleNodeMutableVisitor<bool>
+{
+ RequantizeNonConst(loco::DataType input, loco::DataType output)
+ : _input_type(input), _output_type(output)
+ {
+ }
+
+ loco::DataType _input_type;
+ loco::DataType _output_type;
+
+ // Requantize input tensors of each node
+ bool visit(luci::CircleNode *node)
+ {
+ LOGGER(l);
+ INFO(l) << "RequantizeNonConst visit node: " << node->name() << std::endl;
+ auto arity = node->arity();
+ for (uint32_t i = 0; i < arity; i++)
+ {
+ auto input_node = node->arg(i);
+ auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
+
+ // Check if this was quantized (only quantized tensors are requantized)
+ if (circle_node->quantparam() == nullptr)
+ continue;
+
+ // Check if this is already requantized
+ if (circle_node->dtype() == _output_type)
+ continue;
+
+ // Check if this is not const (only non-const is requantized in this function)
+ auto circle_const = dynamic_cast<CircleConst *>(circle_node);
+ if (circle_const != nullptr)
+ continue;
+
+ if (_input_type == loco::DataType::S8 && _output_type == loco::DataType::U8)
+ requant_nonconst_int8_to_uint8(circle_node);
+ }
+ return false;
+ }
+};
+
+/**
+ * @brief RequantizeConst requantizes tensors for weights
+ */
+struct RequantizeConst final : public luci::CircleNodeMutableVisitor<bool>
+{
+ RequantizeConst(loco::DataType input, loco::DataType output)
+ : _input_type(input), _output_type(output)
+ {
+ }
+
+ loco::DataType _input_type;
+ loco::DataType _output_type;
+
+ // Requantize input tensors of each node
+ bool visit(luci::CircleNode *node)
+ {
+ LOGGER(l);
+ INFO(l) << "RequantizeConst visit node: " << node->name() << std::endl;
+ auto arity = node->arity();
+ for (uint32_t i = 0; i < arity; i++)
+ {
+ auto input_node = node->arg(i);
+ auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
+
+ // Check if this was quantized (only quantized tensors are requantized)
+ if (circle_node->quantparam() == nullptr)
+ continue;
+
+ // Check if this is already requantized
+ if (circle_node->dtype() == _output_type)
+ continue;
+
+ // Check if this is const (only const is requantized in this function)
+ auto circle_const = dynamic_cast<CircleConst *>(circle_node);
+ if (circle_const == nullptr)
+ continue;
+
+ // Check if this is not bias
+ // bias is not requantized when int8 -> uint8
+ if (is_bias(circle_const))
+ continue;
+
+ if (_input_type == loco::DataType::S8 && _output_type == loco::DataType::U8)
+ requant_const_int8_to_uint8(circle_const);
+ }
+ return false;
+ }
+};
+
+} // namespace
+
+bool RequantizePass::run(loco::Graph *g)
+{
+ LOGGER(l);
+ INFO(l) << "RequantizePass Start" << std::endl;
+
+ // Requantize non-const (activations)
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ RequantizeNonConst rqnc(_input_dtype, _output_dtype);
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ circle_node->accept(&rqnc);
+ }
+
+ // Requantize const (including weights, constants)
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ RequantizeConst rqc(_input_dtype, _output_dtype);
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ circle_node->accept(&rqc);
+ }
+
+ // Update output dtype
+ auto graph_outputs = g->outputs();
+ for (auto node : loco::output_nodes(g))
+ {
+ auto circle_node = loco::must_cast<luci::CircleOutput *>(node);
+ if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _output_dtype)
+ {
+ circle_node->dtype(_output_dtype);
+ auto graph_output = graph_outputs->at(circle_node->index());
+ graph_output->dtype(_output_dtype);
+ }
+ }
+
+ INFO(l) << "RequantizePass End" << std::endl;
+ return false; // one time run
+}
+
+} // namespace luci
return os;
}
-// Call this for CircleAvgPool2D and CircleMaxPool2D only
-template <class Pool2DType> loco::NodeShape infer_pool_2d_shape(const Pool2DType *node)
+loco::TensorShape own_shape(const luci::CircleNode *node)
{
- LUCI_ASSERT(loco::shape_known(node->value()), "Shape must be known");
-
- auto ifm_shape = loco::shape_get(node->value()).template as<loco::TensorShape>();
- assert(ifm_shape.rank() == 4);
-
- uint32_t input_height = ifm_shape.dim(1).value();
- uint32_t input_width = ifm_shape.dim(2).value();
- uint32_t stride_height = node->stride()->h();
- uint32_t stride_width = node->stride()->w();
- uint32_t window_height = node->filter()->h();
- uint32_t window_width = node->filter()->w();
- uint32_t dilation_height = 1; // dilation for CircleAvgPool2D and CircleMaxPool2D is 1
- uint32_t dilation_width = 1;
- uint32_t effective_window_height = dilation_height * (window_height - 1) + 1;
- uint32_t effective_window_width = dilation_width * (window_width - 1) + 1;
-
- uint32_t output_height = 0;
- uint32_t output_width = 0;
-
- if (node->padding() == luci::Padding::VALID)
- {
- output_height = (input_height + stride_height - effective_window_height) / stride_height;
- output_width = (input_width + stride_width - effective_window_width) / stride_width;
- }
- else if (node->padding() == luci::Padding::SAME)
- {
- output_height = (input_height + stride_height - 1) / stride_height;
- output_width = (input_width + stride_width - 1) / stride_width;
- }
- else
- LUCI_ASSERT(false, "Wrong padding type");
-
- loco::TensorShape ofm_shape;
- ofm_shape.rank(4);
- ofm_shape.dim(0) = ifm_shape.dim(0);
- ofm_shape.dim(1) = output_height;
- ofm_shape.dim(2) = output_width;
- ofm_shape.dim(3) = ifm_shape.dim(3);
+ loco::TensorShape shape;
+ shape.rank(node->rank());
+ for (uint32_t r = 0; r < node->rank(); ++r)
+ shape.dim(r) = loco::Dimension(node->dim(r).value());
+ return shape;
+}
- return loco::NodeShape{ofm_shape};
+loco::NodeShape use_own(const luci::CircleNode *node)
+{
+ loco::TensorShape shape = own_shape(node);
+ return loco::NodeShape{shape};
}
/**
return output_shape;
}
-// BatchMatMulV2 supports broadcasting in the batch dimensions(BatchMatMul doesn't)
-// TODO Distinguish BatchMatMul and BatchMatMulV2
-loco::NodeShape infer_batchmatmul_shape(const loco::TensorShape &x_shape,
- const loco::TensorShape &y_shape, bool adj_x, bool adj_y)
-{
- uint32_t x_rank = x_shape.rank();
- uint32_t y_rank = y_shape.rank();
- assert(x_rank >= 2 && y_rank >= 2);
-
- loco::TensorShape output_shape;
- output_shape.rank(x_shape.rank());
- // Braodcast in the batch dimension
- if (x_rank > 2 || y_rank > 2)
- {
- loco::TensorShape dummy_x = x_shape;
- loco::TensorShape dummy_y = y_shape;
- expand_rank(dummy_x, dummy_y);
- if (x_rank < y_rank)
- expand_rank(output_shape, dummy_y);
-
- for (uint32_t d = 0; d < output_shape.rank() - 2; d++)
- {
- uint32_t max_dim = std::max(dummy_x.dim(d).value(), dummy_y.dim(d).value());
- if (dummy_x.dim(d) == dummy_y.dim(d) ||
- dummy_x.dim(d).value() * dummy_y.dim(d).value() == max_dim)
- output_shape.dim(d).set(max_dim);
- else
- INTERNAL_EXN("BatchMatMul has wrong shape");
- }
- }
-
- loco::Dimension x_lhs = adj_x ? x_shape.dim(x_rank - 1) : x_shape.dim(x_rank - 2);
- loco::Dimension x_rhs = adj_x ? x_shape.dim(x_rank - 2) : x_shape.dim(x_rank - 1);
- loco::Dimension y_lhs = adj_y ? y_shape.dim(y_rank - 1) : y_shape.dim(y_rank - 2);
- loco::Dimension y_rhs = adj_y ? y_shape.dim(y_rank - 2) : y_shape.dim(y_rank - 1);
-
- if (not(x_rhs == y_lhs))
- INTERNAL_EXN("x_rhs and y_lhs should be same");
-
- uint32_t out_rank = output_shape.rank();
- output_shape.dim(out_rank - 2) = x_lhs;
- output_shape.dim(out_rank - 1) = y_rhs;
-
- return loco::NodeShape{output_shape};
-}
-
-loco::TensorShape own_shape(const luci::CircleNode *node)
-{
- loco::TensorShape shape;
- shape.rank(node->rank());
- for (uint32_t r = 0; r < node->rank(); ++r)
- shape.dim(r) = loco::Dimension(node->dim(r).value());
- return shape;
-}
-
-loco::TensorShape infer_reducer(const loco::Node *input, const loco::Node *indices, bool keep_dims)
-{
- const loco::DataType S32 = loco::DataType::S32;
-
- auto input_shape = loco::shape_get(input).as<loco::TensorShape>();
- auto reduction_indices = loco::must_cast<const luci::CircleConst *>(indices);
-
- { // Exceptions
- // TODO support non-const case
- // TODO support other data type
- LUCI_ASSERT(reduction_indices->dtype() == S32, "Only support int 32");
- }
-
- std::vector<int32_t> reduction_values;
-
- for (uint32_t i = 0; i < reduction_indices->size<S32>(); ++i)
- {
- int32_t axis = reduction_indices->at<S32>(i);
- if (axis < 0)
- axis += input_shape.rank();
- if (not(0 <= axis and axis < static_cast<int32_t>(input_shape.rank())))
- INTERNAL_EXN_V("Invalid reduction axis for REDUCER", oops::to_uint32(axis));
- reduction_values.push_back(axis);
- }
-
- loco::TensorShape output_shape;
-
- if (keep_dims)
- {
- output_shape.rank(input_shape.rank());
- for (uint32_t i = 0; i < input_shape.rank(); ++i)
- output_shape.dim(i) = input_shape.dim(i);
- for (uint32_t i = 0; i < reduction_values.size(); ++i)
- output_shape.dim(reduction_values.at(i)) = 1;
- }
- else
- {
- std::vector<bool> check_reduce(input_shape.rank(), false);
- for (uint32_t i = 0; i < reduction_values.size(); ++i)
- check_reduce.at(reduction_values.at(i)) = true;
-
- uint32_t reduce_cnt = 0;
- for (uint32_t i = 0; i < check_reduce.size(); ++i)
- if (check_reduce.at(i))
- ++reduce_cnt;
-
- output_shape.rank(input_shape.rank() - reduce_cnt);
- for (uint32_t i = 0, j = 0; i < check_reduce.size(); ++i)
- if (check_reduce.at(i) == false)
- output_shape.dim(j++) = input_shape.dim(i);
- }
-
- return output_shape;
-}
-
/**
* @brief vector_from_constant will return int64_t vector from CircleConst node
*/
return loco::NodeShape{shape};
}
-loco::NodeShape use_own(const luci::CircleNode *node)
+template <class CIRCLENODE>
+loco::NodeShape use_paddings(const CIRCLENODE *node, const luci::CircleConst *paddings)
{
- loco::TensorShape shape = own_shape(node);
- return loco::NodeShape{shape};
-}
+ const loco::DataType S32 = loco::DataType::S32;
-/**
- * @brief Class to infer the shape of CircleNode
- *
- * @note All CircleNode's inputs and outputs are always loco::Domain::Tensor
- */
-class ShapeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::NodeShape>
-{
-public:
- loco::NodeShape visit(const luci::CircleAbs *node) final { return use_x(node); }
+ auto input_shape = loco::shape_get(node->input()).template as<loco::TensorShape>();
- loco::NodeShape visit(const luci::CircleAdd *node) final { return broadcast_xy(node); }
+ // TODO support other data type
+ LUCI_ASSERT(paddings->dtype() == S32, "Only support int 32 for now");
+ LUCI_ASSERT(paddings->rank() == 2, "paddings should be rank 2")
- loco::NodeShape visit(const luci::CircleAddN *node) final
- {
- auto shape = loco::shape_get(node->inputs(0)).as<loco::TensorShape>();
+ int32_t n = paddings->dim(0).value();
+ int32_t v = paddings->dim(1).value();
- for (uint32_t idx = 1; idx < node->arity(); ++idx)
- {
- auto shape_idx = loco::shape_get(node->inputs(idx)).as<loco::TensorShape>();
- if (!(shape == shape_idx))
- {
- INTERNAL_EXN_V("ADD_N shape not same as the first input: ", idx);
- }
- }
+ LUCI_ASSERT(v == 2, "paddings should be [n, 2]");
+ LUCI_ASSERT(n == int32_t(input_shape.rank()),
+ "paddings [n, 2] should have same value of input rank");
- return loco::NodeShape{shape};
- }
+ loco::TensorShape output_shape;
- loco::NodeShape visit(const luci::CircleArgMax *node) final
+ output_shape.rank(input_shape.rank());
+ for (int32_t ni = 0; ni < n; ++ni)
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
-
- int64_t select_axis = 0;
- {
- LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
-
- // Only support node's shape() is CircleConst with S32/S64
- // Support S32 for now.
- auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
- LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
- "Only support int32 CircleConst for CircleArgMax");
+ int32_t idx = ni * 2;
+ int value = input_shape.dim(ni).value();
+ value += paddings->at<S32>(idx + 0); // left
+ value += paddings->at<S32>(idx + 1); // right
+ output_shape.dim(ni) = value;
+ }
- if (const_shape_node->rank() > 1)
- INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
- oops::to_uint32(const_shape_node->rank()));
+ return loco::NodeShape{output_shape};
+}
- select_axis = const_shape_node->scalar<loco::DataType::S32>();
- }
- assert(select_axis < input_shape.rank());
- assert(select_axis >= 0); // TODO support minus of this breaks
+loco::NodeShape infer_add_n(const luci::CircleAddN *node)
+{
+ auto shape = loco::shape_get(node->inputs(0)).as<loco::TensorShape>();
- // NOTE select_axis is removed
- loco::TensorShape shape_output;
- uint32_t rank = input_shape.rank();
- uint32_t shrink = static_cast<uint32_t>(select_axis);
- assert(rank > 0);
- shape_output.rank(rank - 1);
- for (uint32_t r = 0, d = 0; r < rank; ++r)
+ for (uint32_t idx = 1; idx < node->arity(); ++idx)
+ {
+ auto shape_idx = loco::shape_get(node->inputs(idx)).as<loco::TensorShape>();
+ if (!(shape == shape_idx))
{
- if (r == shrink)
- continue;
- shape_output.dim(d++) = input_shape.dim(r);
+ INTERNAL_EXN_V("ADD_N shape not same as the first input: ", idx);
}
- return loco::NodeShape{shape_output};
}
+ return loco::NodeShape{shape};
+}
- loco::NodeShape visit(const luci::CircleArgMin *node) final
- {
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
-
- int64_t select_axis = 0;
- {
- LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
+loco::NodeShape infer_arg_max(const luci::CircleArgMax *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
- // Only support node's shape() is CircleConst with S32/S64
- // Support S32 for now.
- auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
- LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
- "Only support int32 CircleConst for CircleArgMin");
+ int64_t select_axis = 0;
+ {
+ LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
- if (const_shape_node->rank() > 1)
- INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
- oops::to_uint32(const_shape_node->rank()));
+ // Only support node's shape() is CircleConst with S32/S64
+ // Support S32 for now.
+ auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
+ LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
+ "Only support int32 CircleConst for CircleArgMax");
- select_axis = const_shape_node->scalar<loco::DataType::S32>();
- }
- assert(select_axis < input_shape.rank());
- assert(select_axis >= 0); // TODO support minus of this breaks
+ if (const_shape_node->rank() > 1)
+ INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
+ oops::to_uint32(const_shape_node->rank()));
- // NOTE select_axis is removed
- loco::TensorShape shape_output;
- uint32_t rank = input_shape.rank();
- uint32_t shrink = static_cast<uint32_t>(select_axis);
- assert(rank > 0);
- shape_output.rank(rank - 1);
- for (uint32_t r = 0, d = 0; r < rank; ++r)
- {
- if (r == shrink)
- continue;
- shape_output.dim(d++) = input_shape.dim(r);
- }
- return loco::NodeShape{shape_output};
+ select_axis = const_shape_node->scalar<loco::DataType::S32>();
}
+ assert(select_axis < input_shape.rank());
+ assert(select_axis >= 0); // TODO support minus of this breaks
- loco::NodeShape visit(const luci::CircleAveragePool2D *node) final
+ // NOTE select_axis is removed
+ loco::TensorShape shape_output;
+ uint32_t rank = input_shape.rank();
+ uint32_t shrink = static_cast<uint32_t>(select_axis);
+ assert(rank > 0);
+ shape_output.rank(rank - 1);
+ for (uint32_t r = 0, d = 0; r < rank; ++r)
{
- return infer_pool_2d_shape(node);
+ if (r == shrink)
+ continue;
+ shape_output.dim(d++) = input_shape.dim(r);
}
+ return loco::NodeShape{shape_output};
+}
- loco::NodeShape visit(const luci::CircleBatchMatMul *node) final
- {
- auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
- auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
-
- return infer_batchmatmul_shape(x_shape, y_shape, node->adj_x(), node->adj_y());
- }
+loco::NodeShape infer_arg_min(const luci::CircleArgMin *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
- loco::NodeShape visit(const luci::CircleBatchToSpaceND *node) final
+ int64_t select_axis = 0;
{
- const loco::DataType S32 = loco::DataType::S32;
-
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- // Support only input rank is 3 and 4
- assert(input_shape.rank() == 3 || input_shape.rank() == 4);
+ LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
- // Only support block_shape() with S32 type CircleConst for now
- auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape());
- LUCI_ASSERT(const_block_shape->dtype() == loco::DataType::S32,
- "Only support int32 block_shape");
+ // Only support node's shape() is CircleConst with S32/S64
+ // Support S32 for now.
+ auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
+ LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
+ "Only support int32 CircleConst for CircleArgMin");
- // Only support crops() with S32 type CircleConst for now
- auto const_crops = loco::must_cast<luci::CircleConst *>(node->crops());
- LUCI_ASSERT(const_crops->dtype() == loco::DataType::S32, "Only support int32 crops");
+ if (const_shape_node->rank() > 1)
+ INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
+ oops::to_uint32(const_shape_node->rank()));
- auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
- auto const_crops_shape = loco::shape_get(const_crops).as<loco::TensorShape>();
- assert(const_block_shape_shape.rank() == 1);
- assert(const_crops_shape.rank() == 2);
+ select_axis = const_shape_node->scalar<loco::DataType::S32>();
+ }
+ assert(select_axis < input_shape.rank());
+ assert(select_axis >= 0); // TODO support minus of this breaks
- int32_t input_spatial_dim = input_shape.rank() - 2;
- assert(const_block_shape_shape.dim(0) == input_spatial_dim);
- assert(const_crops_shape.dim(0) == input_spatial_dim);
- assert(const_crops_shape.dim(1) == 2);
+ // NOTE select_axis is removed
+ loco::TensorShape shape_output;
+ uint32_t rank = input_shape.rank();
+ uint32_t shrink = static_cast<uint32_t>(select_axis);
+ assert(rank > 0);
+ shape_output.rank(rank - 1);
+ for (uint32_t r = 0, d = 0; r < rank; ++r)
+ {
+ if (r == shrink)
+ continue;
+ shape_output.dim(d++) = input_shape.dim(r);
+ }
+ return loco::NodeShape{shape_output};
+}
- loco::TensorShape shape_output;
+// Call this for CircleAvgPool2D and CircleMaxPool2D only
+template <class Pool2DType> loco::NodeShape infer_pool_2d_shape(const Pool2DType *node)
+{
+ LUCI_ASSERT(loco::shape_known(node->value()), "Shape must be known");
- shape_output.rank(input_shape.rank());
+ auto ifm_shape = loco::shape_get(node->value()).template as<loco::TensorShape>();
+ assert(ifm_shape.rank() == 4);
- int32_t output_batch_size = input_shape.dim(0).value();
- for (int32_t dim = 0; dim < input_spatial_dim; ++dim)
- {
- int dim_size = input_shape.dim(dim + 1).value() * const_block_shape->at<S32>(dim);
- dim_size -= const_crops->at<S32>(dim * 2);
- dim_size -= const_crops->at<S32>(dim * 2 + 1);
- shape_output.dim(dim + 1) = dim_size;
+ uint32_t input_height = ifm_shape.dim(1).value();
+ uint32_t input_width = ifm_shape.dim(2).value();
+ uint32_t stride_height = node->stride()->h();
+ uint32_t stride_width = node->stride()->w();
+ uint32_t window_height = node->filter()->h();
+ uint32_t window_width = node->filter()->w();
+ uint32_t dilation_height = 1; // dilation for CircleAvgPool2D and CircleMaxPool2D is 1
+ uint32_t dilation_width = 1;
+ uint32_t effective_window_height = dilation_height * (window_height - 1) + 1;
+ uint32_t effective_window_width = dilation_width * (window_width - 1) + 1;
- assert(output_batch_size % const_block_shape->at<S32>(dim) == 0);
- output_batch_size = output_batch_size / const_block_shape->at<S32>(dim);
- }
- shape_output.dim(0) = output_batch_size;
- shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1);
+ uint32_t output_height = 0;
+ uint32_t output_width = 0;
- return loco::NodeShape{shape_output};
+ if (node->padding() == luci::Padding::VALID)
+ {
+ output_height = (input_height + stride_height - effective_window_height) / stride_height;
+ output_width = (input_width + stride_width - effective_window_width) / stride_width;
}
+ else if (node->padding() == luci::Padding::SAME)
+ {
+ output_height = (input_height + stride_height - 1) / stride_height;
+ output_width = (input_width + stride_width - 1) / stride_width;
+ }
+ else
+ LUCI_ASSERT(false, "Wrong padding type");
+
+ loco::TensorShape ofm_shape;
+ ofm_shape.rank(4);
+ ofm_shape.dim(0) = ifm_shape.dim(0);
+ ofm_shape.dim(1) = output_height;
+ ofm_shape.dim(2) = output_width;
+ ofm_shape.dim(3) = ifm_shape.dim(3);
+
+ return loco::NodeShape{ofm_shape};
+}
+
+loco::NodeShape infer_batch_to_space_nd(const luci::CircleBatchToSpaceND *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
+
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ // Support only input rank is 3 and 4
+ assert(input_shape.rank() == 3 || input_shape.rank() == 4);
+
+ // Only support block_shape() with S32 type CircleConst for now
+ auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape());
+ LUCI_ASSERT(const_block_shape->dtype() == loco::DataType::S32, "Only support int32 block_shape");
+
+ // Only support crops() with S32 type CircleConst for now
+ auto const_crops = loco::must_cast<luci::CircleConst *>(node->crops());
+ LUCI_ASSERT(const_crops->dtype() == loco::DataType::S32, "Only support int32 crops");
+
+ auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
+ auto const_crops_shape = loco::shape_get(const_crops).as<loco::TensorShape>();
+ assert(const_block_shape_shape.rank() == 1);
+ assert(const_crops_shape.rank() == 2);
+
+ int32_t input_spatial_dim = input_shape.rank() - 2;
+ assert(const_block_shape_shape.dim(0) == input_spatial_dim);
+ assert(const_crops_shape.dim(0) == input_spatial_dim);
+ assert(const_crops_shape.dim(1) == 2);
+
+ loco::TensorShape shape_output;
+
+ shape_output.rank(input_shape.rank());
+
+ int32_t output_batch_size = input_shape.dim(0).value();
+ for (int32_t dim = 0; dim < input_spatial_dim; ++dim)
+ {
+ int dim_size = input_shape.dim(dim + 1).value() * const_block_shape->at<S32>(dim);
+ dim_size -= const_crops->at<S32>(dim * 2);
+ dim_size -= const_crops->at<S32>(dim * 2 + 1);
+ shape_output.dim(dim + 1) = dim_size;
+
+ assert(output_batch_size % const_block_shape->at<S32>(dim) == 0);
+ output_batch_size = output_batch_size / const_block_shape->at<S32>(dim);
+ }
+ shape_output.dim(0) = output_batch_size;
+ shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1);
+
+ return loco::NodeShape{shape_output};
+}
+
+struct OutputSize
+{
+ uint32_t height = 0;
+ uint32_t width = 0;
+};
+
+template <class Conv2DType> OutputSize infer_conv2d_type(const Conv2DType *node)
+{
+ auto ifm_shape = loco::shape_get(node->input()).template as<loco::TensorShape>();
+ auto ker_shape = loco::shape_get(node->filter()).template as<loco::TensorShape>();
+ assert(ifm_shape.rank() == 4);
+ assert(ker_shape.rank() == 4);
+
+ uint32_t input_height = ifm_shape.dim(1).value();
+ uint32_t input_width = ifm_shape.dim(2).value();
+ uint32_t stride_height = node->stride()->h();
+ uint32_t stride_width = node->stride()->w();
+ uint32_t ker_height = ker_shape.dim(1).value();
+ uint32_t ker_width = ker_shape.dim(2).value();
+ uint32_t dilation_height = node->dilation()->h();
+ uint32_t dilation_width = node->dilation()->w();
+ uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1;
+ uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1;
+
+ uint32_t output_height = 0;
+ uint32_t output_width = 0;
+
+ if (node->padding() == luci::Padding::VALID)
+ {
+ output_height = (input_height + stride_height - effective_ker_height) / stride_height;
+ output_width = (input_width + stride_width - effective_ker_width) / stride_width;
+ }
+ else if (node->padding() == luci::Padding::SAME)
+ {
+ output_height = (input_height + stride_height - 1) / stride_height;
+ output_width = (input_width + stride_width - 1) / stride_width;
+ }
+ else
+ LUCI_ASSERT(false, "Wrong padding type");
+
+ OutputSize os{output_height, output_width};
+
+ return os;
+}
+
+// BatchMatMulV2 supports broadcasting in the batch dimensions(BatchMatMul doesn't)
+// TODO Distinguish BatchMatMul and BatchMatMulV2
+loco::NodeShape infer_batchmatmul_shape(const loco::TensorShape &x_shape,
+ const loco::TensorShape &y_shape, bool adj_x, bool adj_y)
+{
+ uint32_t x_rank = x_shape.rank();
+ uint32_t y_rank = y_shape.rank();
+ assert(x_rank >= 2 && y_rank >= 2);
+
+ loco::TensorShape output_shape;
+ output_shape.rank(x_shape.rank());
+ // Braodcast in the batch dimension
+ if (x_rank > 2 || y_rank > 2)
+ {
+ loco::TensorShape dummy_x = x_shape;
+ loco::TensorShape dummy_y = y_shape;
+ expand_rank(dummy_x, dummy_y);
+ if (x_rank < y_rank)
+ expand_rank(output_shape, dummy_y);
+
+ for (uint32_t d = 0; d < output_shape.rank() - 2; d++)
+ {
+ uint32_t max_dim = std::max(dummy_x.dim(d).value(), dummy_y.dim(d).value());
+ if (dummy_x.dim(d) == dummy_y.dim(d) ||
+ dummy_x.dim(d).value() * dummy_y.dim(d).value() == max_dim)
+ output_shape.dim(d).set(max_dim);
+ else
+ INTERNAL_EXN("BatchMatMul has wrong shape");
+ }
+ }
+
+ loco::Dimension x_lhs = adj_x ? x_shape.dim(x_rank - 1) : x_shape.dim(x_rank - 2);
+ loco::Dimension x_rhs = adj_x ? x_shape.dim(x_rank - 2) : x_shape.dim(x_rank - 1);
+ loco::Dimension y_lhs = adj_y ? y_shape.dim(y_rank - 1) : y_shape.dim(y_rank - 2);
+ loco::Dimension y_rhs = adj_y ? y_shape.dim(y_rank - 2) : y_shape.dim(y_rank - 1);
+
+ if (not(x_rhs == y_lhs))
+ INTERNAL_EXN("x_rhs and y_lhs should be same");
+
+ uint32_t out_rank = output_shape.rank();
+ output_shape.dim(out_rank - 2) = x_lhs;
+ output_shape.dim(out_rank - 1) = y_rhs;
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_concatenation(const luci::CircleConcatenation *node)
+{
+ // TODO Support when CircleConcatenation has 0 input
+ assert(node->numValues() > 0);
+
+ auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
+ auto axis = node->axis();
+ if (axis < 0)
+ axis += first_shape.rank();
+
+ assert(0 <= axis);
+ assert(first_shape.rank() > static_cast<uint32_t>(axis));
+
+ loco::TensorShape output_shape;
+
+ output_shape.rank(first_shape.rank());
+ for (uint32_t i = 0; i < output_shape.rank(); ++i)
+ output_shape.dim(i) = first_shape.dim(i);
+
+ for (uint32_t i = 1; i < node->numValues(); ++i)
+ {
+ auto input_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
+
+ for (uint32_t j = 0; j < output_shape.rank(); ++j)
+ {
+ if (j == static_cast<uint32_t>(axis))
+ output_shape.dim(j) = output_shape.dim(j).value() + input_shape.dim(j).value();
+ else
+ assert(output_shape.dim(j) == input_shape.dim(j));
+ }
+ }
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_conv2d(const luci::CircleConv2D *node)
+{
+ LOGGER(l);
+
+ auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC
+ auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in OHWI
+
+ INFO(l) << "[luci] CircleConv2D ShapeInf ifm(" << ifm_shape.rank() << ") ker(" << ker_shape.rank()
+ << ")" << std::endl;
+
+ assert(ifm_shape.rank() == 4);
+ assert(ker_shape.rank() == 4);
+ assert(ifm_shape.dim(3) == ker_shape.dim(3));
+
+ auto os = infer_conv2d_type(node);
+
+ loco::TensorShape ofm_shape;
+ ofm_shape.rank(4);
+ ofm_shape.dim(0) = ifm_shape.dim(0);
+ ofm_shape.dim(1) = os.height;
+ ofm_shape.dim(2) = os.width;
+ ofm_shape.dim(3) = ker_shape.dim(0);
+
+ return loco::NodeShape{ofm_shape};
+}
+
+loco::NodeShape infer_depth_to_space(const luci::CircleDepthToSpace *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
+
+ // Only data format NHWC is supported
+ // TODO need to clarify what to do with layout in this operator
+ int32_t height = input_shape.dim(1).value();
+ int32_t width = input_shape.dim(2).value();
+ int32_t depth = input_shape.dim(3).value();
+
+ int block_size = node->block_size();
+
+ if (block_size < 2)
+ INTERNAL_EXN("Block size must be >= 2");
+
+ if (depth % (block_size * block_size))
+ {
+ INTERNAL_EXN("The input tensor's depth must be divisible by block_size^2");
+ }
+
+ loco::TensorShape output_shape;
+ output_shape.rank(4);
+
+ output_shape.dim(0) = input_shape.dim(0).value();
+ output_shape.dim(1) = height * block_size;
+ output_shape.dim(2) = width * block_size;
+ output_shape.dim(3) = depth / (block_size * block_size);
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_depthwise_conv2d(const luci::CircleDepthwiseConv2D *node)
+{
+ auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC
+ auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in 1 H W CM
+
+ assert(ifm_shape.rank() == 4);
+ assert(ker_shape.rank() == 4);
+ assert(ker_shape.dim(0).value() == 1);
+
+ auto os = infer_conv2d_type(node);
+
+ loco::TensorShape ofm_shape;
+ ofm_shape.rank(4);
+ ofm_shape.dim(0) = ifm_shape.dim(0);
+ ofm_shape.dim(1) = os.height;
+ ofm_shape.dim(2) = os.width;
+ ofm_shape.dim(3) = ker_shape.dim(3);
+
+ return loco::NodeShape{ofm_shape};
+}
+
+loco::NodeShape infer_expand_dims(const luci::CircleExpandDims *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
+ auto x_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ if (x_shape.rank() == 0)
+ {
+ // This maybe for unknown shape. We use shape from the node itself.
+ return use_own(node);
+ }
+ auto const_axis = loco::must_cast<luci::CircleConst *>(node->axis());
+ LUCI_ASSERT(const_axis->dtype() == S32, "Only support int32 CircleConst for axis");
+ if (const_axis->rank() != 0 && const_axis->rank() != 1)
+ {
+ INTERNAL_EXN_V("Non-scalar axis in OP", node->opnum());
+ }
+ int32_t axis = const_axis->at<S32>(0);
+ LUCI_ASSERT((axis <= static_cast<int32_t>(x_shape.rank())) &&
+ (axis >= -1 - static_cast<int32_t>(x_shape.rank())),
+ "Axis has to be between [-(D+1), D], where D is rank of input.");
+ size_t positive_axis = axis < 0 ? x_shape.rank() + axis + 1 : axis;
+ loco::TensorShape output_shape;
+ output_shape.rank(x_shape.rank() + 1);
+ size_t i = 0;
+ for (; i < positive_axis; i++)
+ output_shape.dim(i) = x_shape.dim(i);
+ output_shape.dim(i) = loco::Dimension(1);
+ for (; i < x_shape.rank(); i++)
+ output_shape.dim(i + 1) = x_shape.dim(i);
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_fill(const luci::CircleFill *node)
+{
+ loco::TensorShape shape;
+ {
+ LUCI_ASSERT(node->dims(), "dims input should not be nullptr");
+
+ auto dims_node = dynamic_cast<luci::CircleConst *>(node->dims());
+ if (dims_node != nullptr)
+ {
+ // Only support node with S32
+ LUCI_ASSERT(dims_node->dtype() == loco::DataType::S32, "Only support int32 CircleConst");
+
+ if (dims_node->rank() != 1)
+ INTERNAL_EXN_V("Only support rank 1 CircleConst", oops::to_uint32(dims_node->rank()));
+
+ shape.rank(dims_node->dim(0).value());
+
+ for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+ {
+ shape.dim(axis) = dims_node->at<loco::DataType::S32>(axis);
+ }
+ }
+ else
+ {
+ shape = own_shape(node);
+ }
+ }
+
+ return loco::NodeShape{shape};
+}
+
+loco::NodeShape infer_fully_connected(const luci::CircleFullyConnected *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto weights_shape = loco::shape_get(node->weights()).as<loco::TensorShape>();
+
+ // Checking shape capability for fully connected layer
+ // Input: a tensor of at least rank 2 [D1, D2, ... Dn]
+ // Weight: [# of units, K]
+ // Output: [D1 * D2 * ... * Dn / K, # of units]
+ if (input_shape.rank() < 2 || weights_shape.rank() != 2)
+ {
+ // Return node own shape if shape inference is not possible
+ return use_own(node);
+ }
+
+ uint32_t input_size = 1;
+ for (uint32_t i = 0; i < input_shape.rank(); i++)
+ {
+ input_size = input_size * input_shape.dim(i).value();
+ }
+ const uint32_t batch_size = input_size / weights_shape.dim(1).value();
+ loco::TensorShape out_shape;
+ out_shape.rank(2);
+ out_shape.dim(0) = batch_size;
+ out_shape.dim(1) = weights_shape.dim(0);
+
+ return loco::NodeShape{out_shape};
+}
+
+loco::NodeShape infer_gather(const luci::CircleGather *node)
+{
+ loco::TensorShape output_shape;
+
+ const auto input_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
+ const auto positions_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+ int32_t axis = node->axis();
+
+ // If CircleGather input has a dynamic shape, it can't inference this shape. So, it returns the
+ // shape that node already has.
+ if (input_shape.rank() == 0 || positions_shape.rank() == 0)
+ return use_own(node);
+
+ if (axis < 0)
+ axis += input_shape.rank();
+
+ output_shape.rank(input_shape.rank() - 1 + positions_shape.rank());
+ int32_t outdim_index = 0;
+ for (int32_t i = 0; i < axis; ++i)
+ output_shape.dim(outdim_index++) = input_shape.dim(i);
+ for (uint32_t i = 0; i < positions_shape.rank(); ++i)
+ output_shape.dim(outdim_index++) = positions_shape.dim(i);
+ for (uint32_t i = axis + 1; i < input_shape.rank(); ++i)
+ output_shape.dim(outdim_index++) = input_shape.dim(i);
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_gather_nd(const luci::CircleGatherNd *node)
+{
+ loco::TensorShape output_shape;
+
+ const auto params_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
+ const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+
+ const auto params_rank = params_shape.rank();
+ const auto indices_rank = indices_shape.rank();
+
+ // see https://www.tensorflow.org/api_docs/python/tf/gather_nd
+ // output.shape = indices.shape[:-1] + params.shape[indices.shape[-1]:]
+ // batch_dims isn't supported in tflite
+
+ // TODO: replace exceptions with setting shape to unknown?
+
+ if (!indices_shape.dim(indices_rank - 1).known())
+ INTERNAL_EXN("Last indices dimension is unknown");
+
+ auto indices_last_dim = indices_shape.dim(indices_rank - 1).value();
+
+ if (indices_last_dim > params_rank)
+ INTERNAL_EXN("Last indices dimension should be <= params rank");
+
+ const uint32_t output_rank = indices_rank + params_rank - indices_last_dim - 1;
+
+ output_shape.rank(output_rank);
+
+ uint32_t output_index = 0;
+ for (uint32_t i = 0; i < indices_rank - 1; ++i)
+ {
+ auto &dim = indices_shape.dim(i);
+ if (!dim.known())
+ INTERNAL_EXN("Unknown indices dimension is unsupported");
+ output_shape.dim(output_index++).set(dim.value());
+ }
+
+ for (uint32_t i = indices_last_dim; i < params_rank; ++i)
+ {
+ auto &dim = params_shape.dim(i);
+ if (!dim.known())
+ INTERNAL_EXN("Unknown params dimension is unsupported");
+ output_shape.dim(output_index++).set(dim.value());
+ }
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_matrix_diag(const luci::CircleMatrixDiag *node)
+{
+ loco::TensorShape output_shape;
+
+ auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
+ auto rank = diagonal_shape.rank();
+
+ output_shape.rank(rank + 1);
+
+ for (uint32_t i = 0; i < rank; i++)
+ {
+ output_shape.dim(i) = diagonal_shape.dim(i);
+ }
+
+ output_shape.dim(rank) = diagonal_shape.dim(rank - 1);
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_matrix_set_diag(const luci::CircleMatrixSetDiag *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
+
+ auto rank = diagonal_shape.rank();
+
+ LUCI_ASSERT(rank == input_shape.rank() - 1, "diagonal rank = input rank - 1");
+
+ for (uint32_t i = 0; i < rank - 1; i++)
+ {
+ LUCI_ASSERT(diagonal_shape.dim(i) == input_shape.dim(i), "diagonal dims = input dims");
+ }
+
+ auto dim = std::min(input_shape.dim(rank - 1).value(), input_shape.dim(rank).value());
+
+ LUCI_ASSERT(dim == diagonal_shape.dim(rank - 1), "Max diag len error");
+
+ return loco::NodeShape{input_shape};
+}
+
+loco::TensorShape infer_reducer(const loco::Node *input, const loco::Node *indices, bool keep_dims)
+{
+ const loco::DataType S32 = loco::DataType::S32;
+
+ auto input_shape = loco::shape_get(input).as<loco::TensorShape>();
+ auto reduction_indices = loco::must_cast<const luci::CircleConst *>(indices);
+
+ { // Exceptions
+ // TODO support non-const case
+ // TODO support other data type
+ LUCI_ASSERT(reduction_indices->dtype() == S32, "Only support int 32");
+ }
+
+ std::vector<int32_t> reduction_values;
+
+ for (uint32_t i = 0; i < reduction_indices->size<S32>(); ++i)
+ {
+ int32_t axis = reduction_indices->at<S32>(i);
+ if (axis < 0)
+ axis += input_shape.rank();
+ if (not(0 <= axis and axis < static_cast<int32_t>(input_shape.rank())))
+ INTERNAL_EXN_V("Invalid reduction axis for REDUCER", oops::to_uint32(axis));
+ reduction_values.push_back(axis);
+ }
+
+ loco::TensorShape output_shape;
+
+ if (keep_dims)
+ {
+ output_shape.rank(input_shape.rank());
+ for (uint32_t i = 0; i < input_shape.rank(); ++i)
+ output_shape.dim(i) = input_shape.dim(i);
+ for (uint32_t i = 0; i < reduction_values.size(); ++i)
+ output_shape.dim(reduction_values.at(i)) = 1;
+ }
+ else
+ {
+ std::vector<bool> check_reduce(input_shape.rank(), false);
+ for (uint32_t i = 0; i < reduction_values.size(); ++i)
+ check_reduce.at(reduction_values.at(i)) = true;
+
+ uint32_t reduce_cnt = 0;
+ for (uint32_t i = 0; i < check_reduce.size(); ++i)
+ if (check_reduce.at(i))
+ ++reduce_cnt;
+
+ output_shape.rank(input_shape.rank() - reduce_cnt);
+ for (uint32_t i = 0, j = 0; i < check_reduce.size(); ++i)
+ if (check_reduce.at(i) == false)
+ output_shape.dim(j++) = input_shape.dim(i);
+ }
+
+ return output_shape;
+}
- loco::NodeShape visit(const luci::CircleCast *node) final { return use_x(node); }
-
- loco::NodeShape visit(const luci::CircleCeil *node) final { return use_x(node); }
-
- loco::NodeShape visit(const luci::CircleConcatenation *node) final
+loco::NodeShape infer_mirror_pad(const luci::CircleMirrorPad *node)
+{
+ // TODO support non-const case
+ auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+ return use_paddings(node, paddings);
+}
+
+loco::NodeShape infer_one_hot(const luci::CircleOneHot *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
+ auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+ // Only support OneHot node's depth() is CircleConst with type S32
+ // TODO support depth with other types
+ auto depth = loco::must_cast<luci::CircleConst *>(node->depth());
+ LUCI_ASSERT(depth->dtype() == S32, "Only support int32 CircleConst");
+ if (depth->rank() != 0)
+ INTERNAL_EXN_V("Only support rank 0 CircleOneHot in Depth", oops::to_uint32(depth->rank()));
+ loco::TensorShape output_shape;
+ output_shape.rank(indices_shape.rank() + 1);
+ auto axis = node->axis();
+ if (axis < 0)
+ axis += indices_shape.rank() + 1;
+ LUCI_ASSERT(0 <= axis, "Axis is out of range");
+ LUCI_ASSERT(static_cast<uint32_t>(axis) <= indices_shape.rank(), "Axis is out of range");
+ uint32_t j = 0;
+ for (uint32_t i = 0; i < output_shape.rank(); i++)
+ {
+ if (i == static_cast<uint32_t>(axis))
+ {
+ output_shape.dim(i) = depth->at<S32>(0);
+ }
+ else
+ {
+ output_shape.dim(i) = indices_shape.dim(j++);
+ }
+ }
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_pack(const luci::CirclePack *node)
+{
+ LUCI_ASSERT(node->values_count() > 0, "Only support one or more inputs");
+
+ auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
+ // Make sure all inputs have the same shape.
+ for (uint32_t i = 1; i < node->values_count(); ++i)
{
- // TODO Support when CircleConcatenation has 0 input
- assert(node->numValues() > 0);
+ auto in_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
+ LUCI_ASSERT(loco::NodeShape{first_shape} == loco::NodeShape{in_shape},
+ "All inputs must have the same shape");
+ }
- auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
- auto axis = node->axis();
- if (axis < 0)
- axis += first_shape.rank();
+ // Checking shape capability for pack layer
+ // Input: tensors [D1, D2, ... Dn]
+ // Axis: K
+ // Output: [D1, D2, ... , D_K-1, n, D_K+1, ... Dn]
+ auto axis = node->axis();
+ if (axis < 0)
+ axis += first_shape.rank() + 1;
- assert(0 <= axis);
- assert(first_shape.rank() > static_cast<uint32_t>(axis));
+ LUCI_ASSERT(0 <= axis, "Axis is out of range");
+ LUCI_ASSERT(static_cast<uint32_t>(axis) <= first_shape.rank(), "Axis is out of range");
- loco::TensorShape output_shape;
+ loco::TensorShape output_shape;
+ output_shape.rank(first_shape.rank() + 1);
+
+ uint32_t j = 0;
+ for (uint32_t i = 0; i < output_shape.rank(); ++i)
+ {
+ if (i == static_cast<uint32_t>(axis))
+ {
+ output_shape.dim(i) = node->values_count();
+ }
+ else
+ {
+ output_shape.dim(i) = first_shape.dim(j++);
+ }
+ }
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_pad(const luci::CirclePad *node)
+{
+ // TODO support non-const case
+ auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+ return use_paddings(node, paddings);
+}
+
+loco::NodeShape infer_pad_v2(const luci::CirclePadV2 *node)
+{
+ // TODO support non-const case
+ auto paddings = dynamic_cast<luci::CircleConst *>(node->paddings());
+ if (!paddings)
+ {
+ auto node_shape = own_shape(node);
+ return loco::NodeShape{node_shape};
+ }
+ return use_paddings(node, paddings);
+}
+
+loco::NodeShape infer_p_relu(const luci::CirclePRelu *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto alpha_shape = loco::shape_get(node->alpha()).as<loco::TensorShape>();
+
+ auto output_shape = broadcast_shape(input_shape, alpha_shape);
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_range(const luci::CircleRange *node)
+{
+ loco::TensorShape output_shape;
+ output_shape.rank(1);
+
+ auto start_node = dynamic_cast<luci::CircleConst *>(node->start());
+ auto limit_node = dynamic_cast<luci::CircleConst *>(node->limit());
+ auto delta_node = dynamic_cast<luci::CircleConst *>(node->delta());
+
+ if (start_node == nullptr || limit_node == nullptr || delta_node == nullptr)
+ {
+ return use_own(node);
+ }
+
+ double start = 0, limit = 0, delta = 0;
+
+#define GET_RANGE_PARAM(DT) \
+ start = start_node->scalar<DT>(); \
+ limit = limit_node->scalar<DT>(); \
+ delta = delta_node->scalar<DT>();
+
+ switch (start_node->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ GET_RANGE_PARAM(loco::DataType::FLOAT32)
+ break;
+ case loco::DataType::S32:
+ GET_RANGE_PARAM(loco::DataType::S32)
+ break;
+ default:
+ INTERNAL_EXN("Range data type not supported");
+ }
+
+#undef GET_RANGE_PARAM
+
+ if (delta == 0)
+ INTERNAL_EXN("Delta can not be zero");
+
+ output_shape.dim(0) = ceil((limit - start) / delta);
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_reshape(const luci::CircleReshape *node)
+{
+ LOGGER(l);
+
+ const loco::DataType S32 = loco::DataType::S32;
- output_shape.rank(first_shape.rank());
- for (uint32_t i = 0; i < output_shape.rank(); ++i)
- output_shape.dim(i) = first_shape.dim(i);
+ loco::TensorShape shape_by_input;
+ {
+ LUCI_ASSERT(node->shape(), "2nd input shape() should not be nullptr");
- for (uint32_t i = 1; i < node->numValues(); ++i)
+ // Only support node's shape() is CircleConst with S32
+ // TODO support other node with other types
+ auto const_shape_node = dynamic_cast<luci::CircleConst *>(node->shape());
+ if (const_shape_node != nullptr)
{
- auto input_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
+ LUCI_ASSERT(const_shape_node->dtype() == S32, "Only support int32 CircleConst");
- for (uint32_t j = 0; j < output_shape.rank(); ++j)
+ shape_by_input.rank(const_shape_node->size<S32>());
+
+ for (uint32_t axis = 0; axis < shape_by_input.rank(); ++axis)
{
- if (j == static_cast<uint32_t>(axis))
- output_shape.dim(j) = output_shape.dim(j).value() + input_shape.dim(j).value();
- else
- assert(output_shape.dim(j) == input_shape.dim(j));
+ shape_by_input.dim(axis) = const_shape_node->at<S32>(axis);
}
}
+ else
+ {
+ // We use shape from the node itself
+ shape_by_input = own_shape(node);
+ }
+ }
- return loco::NodeShape{output_shape};
+ loco::TensorShape shape_by_attr;
+ {
+ shape_by_attr.rank(node->newShape()->rank());
+
+ for (uint32_t axis = 0; axis < shape_by_attr.rank(); ++axis)
+ {
+ shape_by_attr.dim(axis) = node->newShape()->dim(axis);
+ }
}
- loco::NodeShape visit(const luci::CircleConst *node) final { return use_own(node); }
+ if (!(shape_by_input == shape_by_attr))
+ {
+ INFO(l) << "CircleReshape: Two new shape information mismatched : " << std::endl;
+ INFO(l) << " shape_by_input : " << shape_by_input << std::endl;
+ INFO(l) << " shape_by_attr : " << shape_by_attr << std::endl;
+ }
+
+ loco::TensorShape output_shape = shape_by_input;
- loco::NodeShape visit(const luci::CircleConv2D *node) final
+ // One of the dimensions can have special value -1, meaning its actual value should be inferred.
+ const auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>();
+ const uint32_t input_element_count = loco::element_count(&input_shape);
+ uint32_t output_element_count = 1;
+ uint32_t unknown_dim_index = UINT32_MAX;
+ for (uint32_t dim_index = 0; dim_index < output_shape.rank(); ++dim_index)
{
- LOGGER(l);
+ const uint32_t dim_value = output_shape.dim(dim_index).value();
+ if (static_cast<int>(dim_value) == -1)
+ {
+ LUCI_ASSERT(unknown_dim_index == UINT32_MAX, "More than one unknown dimension");
+ unknown_dim_index = dim_index;
+ }
+ else
+ {
+ output_element_count *= dim_value;
+ }
+ }
+ if (unknown_dim_index != UINT32_MAX)
+ {
+ output_shape.dim(unknown_dim_index) = input_element_count / output_element_count;
+ }
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_resize_bilinear(const luci::CircleResizeBilinear *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+
+ if (input_shape.rank() != 4)
+ INTERNAL_EXN("Expected ResizeBilinear input to have rank 4");
+
+ auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
+
+ if (const_node->dtype() != loco::DataType::S32)
+ INTERNAL_EXN("Only S32 datatype is supported for ResizeBilinear size");
+
+ if (const_node->rank() != 1)
+ INTERNAL_EXN("Expected size tensor of rank 1");
+
+ if (const_node->dim(0).value() != 2)
+ INTERNAL_EXN("Expected size tensor with shape [2]");
+
+ loco::TensorShape output_shape;
+ output_shape.rank(4);
+ output_shape.dim(0) = input_shape.dim(0);
+ output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
+ output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
+ output_shape.dim(3) = input_shape.dim(3);
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_resize_nearest_neighbor(const luci::CircleResizeNearestNeighbor *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+
+ if (input_shape.rank() != 4)
+ INTERNAL_EXN("Expected ResizeNearesNeighbor input to have rank 4");
+
+ auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
+
+ if (const_node->dtype() != loco::DataType::S32)
+ INTERNAL_EXN("Only S32 datatype is supported for ResizeNearesNeighbor size");
+
+ if (const_node->rank() != 1)
+ INTERNAL_EXN("Expected size tensor of rank 1");
+
+ if (const_node->dim(0).value() != 2)
+ INTERNAL_EXN("Expected size tensor with shape [2]");
+
+ loco::TensorShape output_shape;
+ output_shape.rank(4);
+ output_shape.dim(0) = input_shape.dim(0);
+ output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
+ output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
+ output_shape.dim(3) = input_shape.dim(3);
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_scatter_nd(const luci::CircleScatterNd *node)
+{
+ loco::TensorShape output_shape;
+
+ auto shape_node = loco::must_cast<luci::CircleConst *>(node->shape());
+
+ const loco::DataType S32 = loco::DataType::S32;
+ const loco::DataType S64 = loco::DataType::S64;
+
+ std::vector<int64_t> vect_shape;
+
+ if (shape_node->dtype() == S32)
+ vect_shape = vector_from_constant<S32>(shape_node);
+ else if (shape_node->dtype() == S64)
+ vect_shape = vector_from_constant<S64>(shape_node);
+ else
+ LUCI_ASSERT(false, "Only support int32/int64 for shape()");
+
+ output_shape.rank(vect_shape.size());
+ for (uint32_t i = 0; i < vect_shape.size(); ++i)
+ output_shape.dim(i) = vect_shape[i];
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_segment_sum(const luci::CircleSegmentSum *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto segment_shape = loco::shape_get(node->segment_ids()).as<loco::TensorShape>();
+
+ LUCI_ASSERT(segment_shape.rank() == 1, "segment_ids must be 1-D tensor");
+ LUCI_ASSERT(segment_shape.dim(0).value() == input_shape.dim(0).value(),
+ "segment_ids size must be equal to the size of data's first dimension");
+
+ auto ids_shape_value = loco::must_cast<luci::CircleConst *>(node->segment_ids());
+
+ std::vector<int64_t> vect_ids;
- auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC
- auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in OHWI
+ if (ids_shape_value->dtype() == loco::DataType::S32)
+ vect_ids = vector_from_constant<loco::DataType::S32>(ids_shape_value);
- INFO(l) << "[luci] CircleConv2D ShapeInf ifm(" << ifm_shape.rank() << ") ker("
- << ker_shape.rank() << ")" << std::endl;
+ LUCI_ASSERT(std::is_sorted(vect_ids.begin(), vect_ids.end()),
+ "segment_ids values should be sorted")
- assert(ifm_shape.rank() == 4);
- assert(ker_shape.rank() == 4);
- assert(ifm_shape.dim(3) == ker_shape.dim(3));
+ loco::TensorShape output_shape;
+
+ output_shape.rank(input_shape.rank());
+
+ for (uint32_t i = 1; i < input_shape.rank(); ++i)
+ output_shape.dim(i) = input_shape.dim(i);
+
+ output_shape.dim(0) = vect_ids.back() + 1;
- uint32_t input_height = ifm_shape.dim(1).value();
- uint32_t input_width = ifm_shape.dim(2).value();
- uint32_t stride_height = node->stride()->h();
- uint32_t stride_width = node->stride()->w();
- uint32_t ker_height = ker_shape.dim(1).value();
- uint32_t ker_width = ker_shape.dim(2).value();
- uint32_t dilation_height = node->dilation()->h();
- uint32_t dilation_width = node->dilation()->w();
- uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1;
- uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1;
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_select(const luci::CircleSelect *node)
+{
+ auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
+ assert(t_shape == loco::shape_get(node->e()).as<loco::TensorShape>());
- uint32_t output_height = 0;
- uint32_t output_width = 0;
+ // condition shape validation
+ auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
+ if (c_shape.rank() != t_shape.rank())
+ {
+ if (c_shape.rank() != 0 && c_shape.rank() != 1)
+ INTERNAL_EXN_V("CircleSelect condition rank is not 0 nor 1: ", c_shape.rank());
- if (node->padding() == luci::Padding::VALID)
+ if (c_shape.rank() == 1)
{
- output_height = (input_height + stride_height - effective_ker_height) / stride_height;
- output_width = (input_width + stride_width - effective_ker_width) / stride_width;
+ if (c_shape.dim(0).value() != t_shape.dim(0).value())
+ INTERNAL_EXN("CircleSelect condition dim(0) should match with t.dim(0)");
}
- else if (node->padding() == luci::Padding::SAME)
+ }
+
+ return loco::NodeShape{t_shape};
+}
+
+loco::NodeShape infer_select_v2(const luci::CircleSelectV2 *node)
+{
+ auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
+ auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
+ auto e_shape = loco::shape_get(node->e()).as<loco::TensorShape>();
+
+ // validate ability to broadcast shapes to each other
+ auto b_shape = broadcast_shape(broadcast_shape(c_shape, t_shape), e_shape);
+ return loco::NodeShape{b_shape};
+}
+
+loco::NodeShape infer_shape(const luci::CircleShape *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+
+ loco::TensorShape output_shape;
+
+ output_shape.rank(1);
+ output_shape.dim(0) = input_shape.rank();
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_slice(const luci::CircleSlice *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
+ const loco::DataType S64 = loco::DataType::S64;
+
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+
+ auto const_begin = loco::must_cast<luci::CircleConst *>(node->begin());
+ auto const_size = loco::must_cast<luci::CircleConst *>(node->size());
+
+ loco::TensorShape output_shape;
+ std::vector<int64_t> vect_begin; // to hold both S32/S64, we use int64_t
+ std::vector<int64_t> vect_size;
+
+ if (const_begin->dtype() == S32)
+ vect_begin = vector_from_constant<S32>(const_begin);
+ else if (const_begin->dtype() == S64)
+ vect_begin = vector_from_constant<S64>(const_begin);
+ else
+ LUCI_ASSERT(false, "Only support int32/int64 for begin()");
+
+ if (const_size->dtype() == S32)
+ vect_size = vector_from_constant<S32>(const_size);
+ else if (const_size->dtype() == S64)
+ vect_size = vector_from_constant<S64>(const_size);
+ else
+ LUCI_ASSERT(false, "Only support int32/int64 for size()");
+
+ assert(input_shape.rank() == vect_begin.size());
+ assert(input_shape.rank() == vect_size.size());
+
+ output_shape.rank(vect_begin.size());
+ for (uint32_t idx = 0; idx < vect_begin.size(); ++idx)
+ {
+ auto size = vect_size.at(idx);
+ if (size == -1)
{
- output_height = (input_height + stride_height - 1) / stride_height;
- output_width = (input_width + stride_width - 1) / stride_width;
+ size = input_shape.dim(idx).value() - vect_begin.at(idx);
}
- else
- LUCI_ASSERT(false, "Wrong padding type");
+ output_shape.dim(idx) = size;
+ }
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_space_to_batch_nd(const luci::CircleSpaceToBatchND *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
+
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ // Support only input rank is 3 and 4
+ assert(input_shape.rank() == 3 || input_shape.rank() == 4);
+
+ // Only support block_shape() with S32 type CircleConst for now
+ auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape());
+ LUCI_ASSERT(const_block_shape->dtype() == S32, "Only support int32 block_shape");
- loco::TensorShape ofm_shape;
- ofm_shape.rank(4);
- ofm_shape.dim(0) = ifm_shape.dim(0);
- ofm_shape.dim(1) = output_height;
- ofm_shape.dim(2) = output_width;
- ofm_shape.dim(3) = ker_shape.dim(0);
+ // Only support paddings() with S32 type CircleConst for now
+ auto const_paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+ LUCI_ASSERT(const_paddings->dtype() == S32, "Only support int32 paddings");
- return loco::NodeShape{ofm_shape};
+ auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
+ auto const_paddings_shape = loco::shape_get(const_paddings).as<loco::TensorShape>();
+ assert(const_block_shape_shape.rank() == 1);
+ assert(const_paddings_shape.rank() == 2);
+
+ int32_t input_spatial_dim = input_shape.rank() - 2;
+ assert(const_block_shape_shape.dim(0) == input_spatial_dim);
+ assert(const_paddings_shape.dim(0) == input_spatial_dim);
+ assert(const_paddings_shape.dim(1) == 2);
+
+ // Check all values of block_shape >= 1
+ uint32_t ele_count = const_block_shape->size<S32>();
+ for (uint32_t e = 0; e < ele_count; ++e)
+ {
+ auto val = const_block_shape->at<S32>(e);
+ if (val < 1)
+ {
+ INTERNAL_EXN_V("All values of block_shape >= 1: ", e);
+ }
}
- loco::NodeShape visit(const luci::CircleCos *node) final { return use_x(node); }
+ loco::TensorShape shape_output;
- loco::NodeShape visit(const luci::CircleCustom *node) final { return use_own(node); }
+ shape_output.rank(input_shape.rank());
- loco::NodeShape visit(const luci::CircleDepthToSpace *node) final
+ int32_t output_batch_size = input_shape.dim(0).value();
+ for (int32_t dim = 0; dim < input_spatial_dim; ++dim)
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
+ int dim_size = input_shape.dim(dim + 1).value();
+ dim_size += const_paddings->at<S32>(dim * 2);
+ dim_size += const_paddings->at<S32>(dim * 2 + 1);
+ shape_output.dim(dim + 1) = dim_size / const_block_shape->at<S32>(dim);
+
+ assert(dim_size % const_block_shape->at<S32>(dim) == 0);
+ output_batch_size = output_batch_size * const_block_shape->at<S32>(dim);
+ }
+ shape_output.dim(0) = output_batch_size;
+ shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1);
+
+ return loco::NodeShape{shape_output};
+}
+
+loco::NodeShape infer_space_to_depth(const luci::CircleSpaceToDepth *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
+
+ // Only data format NHWC is supported
+ int32_t height = input_shape.dim(1).value();
+ int32_t width = input_shape.dim(2).value();
+ int32_t depth = input_shape.dim(3).value();
- // Only data format NHWC is supported
- // TODO need to clarify what to do with layout in this operator
- int32_t height = input_shape.dim(1).value();
- int32_t width = input_shape.dim(2).value();
- int32_t depth = input_shape.dim(3).value();
+ int block_size = node->block_size();
+
+ if (block_size < 2)
+ INTERNAL_EXN("Block size must be >= 2");
+
+ if ((height % block_size) || (width % block_size))
+ {
+ INTERNAL_EXN("The input tensor's height and width must be divisible by block_size");
+ }
+
+ loco::TensorShape output_shape;
+ output_shape.rank(4);
+
+ output_shape.dim(0) = input_shape.dim(0).value();
+ output_shape.dim(1) = height / block_size;
+ output_shape.dim(2) = width / block_size;
+ output_shape.dim(3) = block_size * block_size * depth;
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_sparse_to_dense(const luci::CircleSparseToDense *node)
+{
+ loco::TensorShape shape;
+ {
+ LUCI_ASSERT(node->output_shape(), "dims input should not be nullptr");
+
+ auto output_shape_node = dynamic_cast<luci::CircleConst *>(node->output_shape());
+ if (output_shape_node != nullptr)
+ {
+ // Only support node with S32
+ LUCI_ASSERT(output_shape_node->dtype() == loco::DataType::S32,
+ "Only support int32 CircleConst");
- int block_size = node->block_size();
+ if (output_shape_node->rank() != 1)
+ INTERNAL_EXN_V("Only support rank 1 CircleConst",
+ oops::to_uint32(output_shape_node->rank()));
- if (block_size < 2)
- INTERNAL_EXN("Block size must be >= 2");
+ shape.rank(output_shape_node->size<loco::DataType::S32>());
- if (depth % (block_size * block_size))
+ for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+ {
+ shape.dim(axis) = output_shape_node->at<loco::DataType::S32>(axis);
+ }
+ }
+ else
{
- INTERNAL_EXN("The input tensor's depth must be divisible by block_size^2");
+ shape = own_shape(node);
}
+ }
- loco::TensorShape output_shape;
- output_shape.rank(4);
+ return loco::NodeShape{shape};
+}
- output_shape.dim(0) = input_shape.dim(0).value();
- output_shape.dim(1) = height * block_size;
- output_shape.dim(2) = width * block_size;
- output_shape.dim(3) = depth / (block_size * block_size);
+loco::NodeShape infer_strided_slice(const luci::CircleStridedSlice *node)
+{
+ auto begin_node = dynamic_cast<luci::CircleConst *>(node->begin());
+ auto end_node = dynamic_cast<luci::CircleConst *>(node->end());
+ auto strides_node = dynamic_cast<luci::CircleConst *>(node->strides());
- return loco::NodeShape{output_shape};
+ if (begin_node == nullptr || end_node == nullptr || strides_node == nullptr)
+ {
+ return use_own(node);
}
- loco::NodeShape visit(const luci::CircleDepthwiseConv2D *node) final
+ loco::TensorShape shape = infer_output_shape(node);
+ return loco::NodeShape{shape};
+}
+
+loco::NodeShape infer_squeeze(const luci::CircleSqueeze *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+
+ // TODO input shape may be unknown before runtime
+ std::vector<bool> do_squeeze(input_shape.rank(), false);
+ uint32_t num_squeezed = 0;
+
+ if (!node->squeeze_dims().empty())
{
- auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC
- auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in 1 H W CM
+ // SqueezeDims not empty, squeeze only dims specified
+ for (int32_t raw_dim : node->squeeze_dims())
+ {
+ int32_t dim = raw_dim < 0 ? raw_dim + input_shape.rank() : raw_dim;
- assert(ifm_shape.rank() == 4);
- assert(ker_shape.rank() == 4);
- assert(ker_shape.dim(0).value() == 1);
+ if (dim < 0 || static_cast<uint32_t>(dim) >= input_shape.rank() ||
+ input_shape.dim(dim).value() != 1)
+ {
+ INTERNAL_EXN("invalid dimention specified to Squeeze");
+ }
- uint32_t input_height = ifm_shape.dim(1).value();
- uint32_t input_width = ifm_shape.dim(2).value();
- uint32_t stride_height = node->stride()->h();
- uint32_t stride_width = node->stride()->w();
- uint32_t ker_height = ker_shape.dim(1).value();
- uint32_t ker_width = ker_shape.dim(2).value();
- uint32_t dilation_height = node->dilation()->h();
- uint32_t dilation_width = node->dilation()->w();
- uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1;
- uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1;
+ if (!do_squeeze[dim])
+ ++num_squeezed;
+ do_squeeze[dim] = true;
+ }
+ }
+ else
+ {
+ // SqueezeDims empty, squeeze any dims with size == 1
+ for (uint32_t dim = 0; dim < input_shape.rank(); ++dim)
+ {
+ if (input_shape.dim(dim) == 1)
+ {
+ do_squeeze[dim] = true;
+ ++num_squeezed;
+ }
+ }
+ }
- uint32_t output_height = 0;
- uint32_t output_width = 0;
+ loco::TensorShape output_shape;
+ output_shape.rank(input_shape.rank() - num_squeezed);
- if (node->padding() == luci::Padding::VALID)
- {
- output_height = (input_height + stride_height - effective_ker_height) / stride_height;
- output_width = (input_width + stride_width - effective_ker_width) / stride_width;
- }
- else if (node->padding() == luci::Padding::SAME)
+ for (uint32_t in_dim = 0, out_dim = 0; in_dim < input_shape.rank(); ++in_dim)
+ {
+ if (!do_squeeze[in_dim])
{
- output_height = (input_height + stride_height - 1) / stride_height;
- output_width = (input_width + stride_width - 1) / stride_width;
+ output_shape.dim(out_dim++) = input_shape.dim(in_dim);
}
- else
- LUCI_ASSERT(false, "Wrong padding type");
+ }
- loco::TensorShape ofm_shape;
- ofm_shape.rank(4);
- ofm_shape.dim(0) = ifm_shape.dim(0);
- ofm_shape.dim(1) = output_height;
- ofm_shape.dim(2) = output_width;
- ofm_shape.dim(3) = ker_shape.dim(3);
+ return loco::NodeShape{output_shape};
+}
- return loco::NodeShape{ofm_shape};
- }
+loco::NodeShape infer_tile(const luci::CircleTile *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
- loco::NodeShape visit(const luci::CircleDiv *node) final { return broadcast_xy(node); }
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto multiples = loco::must_cast<luci::CircleConst *>(node->multiples());
- loco::NodeShape visit(const luci::CircleElu *node) final
- {
- auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+ // TODO support non-const case
+ // TODO support S64 type
+ LUCI_ASSERT(multiples->dtype() == S32, "Only support int32 multiples");
+ LUCI_ASSERT(multiples->rank() == 1, "multiples should be rank 1")
- return loco::NodeShape{input_shape};
- }
+ uint32_t n = multiples->dim(0).value();
- loco::NodeShape visit(const luci::CircleEqual *node) final { return broadcast_xy(node); }
+ LUCI_ASSERT(n == input_shape.rank(), "length of multiples should be the same with input rank");
- loco::NodeShape visit(const luci::CircleExp *node) final { return use_x(node); }
+ loco::TensorShape output_shape;
- loco::NodeShape visit(const luci::CircleExpandDims *node) final
+ output_shape.rank(input_shape.rank());
+ for (uint32_t ni = 0; ni < n; ++ni)
{
- const loco::DataType S32 = loco::DataType::S32;
- auto x_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- if (x_shape.rank() == 0)
- {
- // This maybe for unknown shape. We use shape from the node itself.
- return use_own(node);
- }
- auto const_axis = loco::must_cast<luci::CircleConst *>(node->axis());
- LUCI_ASSERT(const_axis->dtype() == S32, "Only support int32 CircleConst for axis");
- if (const_axis->rank() != 0 && const_axis->rank() != 1)
- {
- INTERNAL_EXN_V("Non-scalar axis in OP", node->opnum());
- }
- int32_t axis = const_axis->at<S32>(0);
- LUCI_ASSERT((axis <= static_cast<int32_t>(x_shape.rank())) &&
- (axis >= -1 - static_cast<int32_t>(x_shape.rank())),
- "Axis has to be between [-(D+1), D], where D is rank of input.");
- size_t positive_axis = axis < 0 ? x_shape.rank() + axis + 1 : axis;
- loco::TensorShape output_shape;
- output_shape.rank(x_shape.rank() + 1);
- size_t i = 0;
- for (; i < positive_axis; i++)
- output_shape.dim(i) = x_shape.dim(i);
- output_shape.dim(i) = loco::Dimension(1);
- for (; i < x_shape.rank(); i++)
- output_shape.dim(i + 1) = x_shape.dim(i);
- return loco::NodeShape{output_shape};
+ int32_t multiple = multiples->at<S32>(ni);
+ output_shape.dim(ni) = input_shape.dim(ni).value() * static_cast<uint32_t>(multiple);
}
- loco::NodeShape visit(const luci::CircleFill *node) final
- {
- loco::TensorShape shape;
- {
- LUCI_ASSERT(node->dims(), "dims input should not be nullptr");
+ return loco::NodeShape{output_shape};
+}
- auto dims_node = dynamic_cast<luci::CircleConst *>(node->dims());
- if (dims_node != nullptr)
- {
- // Only support node with S32
- LUCI_ASSERT(dims_node->dtype() == loco::DataType::S32, "Only support int32 CircleConst");
+loco::NodeShape infer_transpose(const luci::CircleTranspose *node)
+{
+ auto input_shape = loco::shape_get(node->a()).as<loco::TensorShape>();
- if (dims_node->rank() != 1)
- INTERNAL_EXN_V("Only support rank 1 CircleConst", oops::to_uint32(dims_node->rank()));
+ auto perm_node = loco::must_cast<luci::CircleConst *>(node->perm());
- shape.rank(dims_node->dim(0).value());
+ loco::TensorShape output_shape;
+ output_shape.rank(input_shape.rank());
- for (uint32_t axis = 0; axis < shape.rank(); ++axis)
- {
- shape.dim(axis) = dims_node->at<loco::DataType::S32>(axis);
- }
- }
- else
- {
- shape = own_shape(node);
- }
- }
+ assert(perm_node->dtype() == loco::DataType::S32);
+ assert(input_shape.rank() == perm_node->template size<loco::DataType::S32>());
- return loco::NodeShape{shape};
+ for (uint32_t out_axis = 0; out_axis < output_shape.rank(); out_axis++)
+ {
+ auto in_axis = perm_node->template at<loco::DataType::S32>(out_axis);
+ output_shape.dim(out_axis) = input_shape.dim(in_axis);
}
- loco::NodeShape visit(const luci::CircleFloor *node) final { return use_x(node); }
+ return output_shape;
+}
- loco::NodeShape visit(const luci::CircleFloorDiv *node) final { return broadcast_xy(node); }
+loco::NodeShape infer_transpose_conv(const luci::CircleTransposeConv *node)
+{
+ // TransposeConv's output shape is written in its 'inputSizes' argument
+ auto input_sizes_const = loco::must_cast<luci::CircleConst *>(node->inputSizes());
+ // TODO support non-const type
+ LUCI_ASSERT(input_sizes_const->dtype() == loco::DataType::S32, "Only support S32 dtype")
+ LUCI_ASSERT(input_sizes_const->rank() == 1 && input_sizes_const->dim(0).value() == 4,
+ "Only support rank 1 with 4 entries")
- loco::NodeShape visit(const luci::CircleFloorMod *node) final { return broadcast_xy(node); }
+ loco::TensorShape shape;
- loco::NodeShape visit(const luci::CircleFullyConnected *node) final
- {
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto weights_shape = loco::shape_get(node->weights()).as<loco::TensorShape>();
+ shape.rank(4);
+ for (uint32_t axis = 0; axis < 4; ++axis)
+ shape.dim(axis) = input_sizes_const->at<loco::DataType::S32>(axis);
- // Checking shape capability for fully connected layer
- // Input: a tensor of at least rank 2 [D1, D2, ... Dn]
- // Weight: [# of units, K]
- // Output: [D1 * D2 * ... * Dn / K, # of units]
- if (input_shape.rank() < 2 || weights_shape.rank() != 2)
- {
- // Return node own shape if shape inference is not possible
- return use_own(node);
- }
+ return loco::NodeShape{shape};
+}
- uint32_t input_size = 1;
- for (uint32_t i = 0; i < input_shape.rank(); i++)
- {
- input_size = input_size * input_shape.dim(i).value();
- }
- const uint32_t batch_size = input_size / weights_shape.dim(1).value();
- loco::TensorShape out_shape;
- out_shape.rank(2);
- out_shape.dim(0) = batch_size;
- out_shape.dim(1) = weights_shape.dim(0);
+loco::NodeShape infer_unpack(const luci::CircleUnpack *node)
+{
+ // CircleUnpack provides list(array) of Tensors which has one less dimension of the input
+ // We'll set shape of CircleUnpack to shape of actual outputs
+ // TODO fix this if any problem rises
+ auto value_shape = loco::shape_get(node->value()).as<loco::TensorShape>();
- return loco::NodeShape{out_shape};
- }
+ auto axis = node->axis();
+ auto num = node->num();
+ auto rank = static_cast<int32_t>(value_shape.rank());
- loco::NodeShape visit(const luci::CircleGather *node) final
+ if (rank == 0)
{
- loco::TensorShape output_shape;
+ // Unknown shape
+ return use_own(node);
+ }
- const auto input_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
- const auto positions_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
- int32_t axis = node->axis();
+ LUCI_ASSERT(-rank <= axis && axis < rank, "Axis is out of range");
- // If CircleGather input has a dynamic shape, it can't inference this shape. So, it returns the
- // shape that node already has.
- if (input_shape.rank() == 0 || positions_shape.rank() == 0)
- return use_own(node);
+ if (axis < 0)
+ axis += rank;
- if (axis < 0)
- axis += input_shape.rank();
+ LUCI_ASSERT(num == static_cast<int32_t>(value_shape.dim(axis).value()),
+ "num, axis maybe incorrect");
- output_shape.rank(input_shape.rank() - 1 + positions_shape.rank());
- int32_t outdim_index = 0;
- for (int32_t i = 0; i < axis; ++i)
- output_shape.dim(outdim_index++) = input_shape.dim(i);
- for (uint32_t i = 0; i < positions_shape.rank(); ++i)
- output_shape.dim(outdim_index++) = positions_shape.dim(i);
- for (uint32_t i = axis + 1; i < input_shape.rank(); ++i)
- output_shape.dim(outdim_index++) = input_shape.dim(i);
+ loco::TensorShape output_shape;
+ output_shape.rank(rank - 1);
- return loco::NodeShape{output_shape};
+ for (int32_t i = 0, o = 0; i < rank; ++i)
+ {
+ if (i != axis)
+ output_shape.dim(o++) = value_shape.dim(i);
}
- loco::NodeShape visit(const luci::CircleGatherNd *node) final
- {
- loco::TensorShape output_shape;
+ return loco::NodeShape{output_shape};
+}
- const auto params_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
- const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+loco::NodeShape infer_unique(const luci::CircleUnique *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- const auto params_rank = params_shape.rank();
- const auto indices_rank = indices_shape.rank();
+ assert(input_shape.rank() == 1);
- // see https://www.tensorflow.org/api_docs/python/tf/gather_nd
- // output.shape = indices.shape[:-1] + params.shape[indices.shape[-1]:]
- // batch_dims isn't supported in tflite
+ loco::TensorShape shape_output;
+ shape_output = own_shape(node);
- // TODO: replace exceptions with setting shape to unknown?
+ return loco::NodeShape{shape_output};
+}
- if (!indices_shape.dim(indices_rank - 1).known())
- INTERNAL_EXN("Last indices dimension is unknown");
+// Circle Only
+loco::NodeShape infer_bcq_fully_connected(const luci::CircleBCQFullyConnected *node)
+{
+ loco::TensorShape out_shape;
- auto indices_last_dim = indices_shape.dim(indices_rank - 1).value();
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto weights_clusters = loco::must_cast<luci::CircleConst *>(node->weights_clusters());
- if (indices_last_dim > params_rank)
- INTERNAL_EXN("Last indices dimension should be <= params rank");
+ LUCI_ASSERT(input_shape.rank() == 2, "Input rank of BCQFullyConnected should be 2");
- const uint32_t output_rank = indices_rank + params_rank - indices_last_dim - 1;
+ int32_t qbits_sum = 0;
+ for (uint32_t i = 0; i < weights_clusters->dim(0).value(); ++i)
+ {
+ qbits_sum += weights_clusters->at<loco::DataType::S32>(i * 2 + 1);
+ }
- output_shape.rank(output_rank);
+ out_shape.rank(2);
+ out_shape.dim(0) = qbits_sum;
+ out_shape.dim(1) = input_shape.dim(1);
- uint32_t output_index = 0;
- for (uint32_t i = 0; i < indices_rank - 1; ++i)
- {
- auto &dim = indices_shape.dim(i);
- if (!dim.known())
- INTERNAL_EXN("Unknown indices dimension is unsupported");
- output_shape.dim(output_index++).set(dim.value());
- }
+ return loco::NodeShape{out_shape};
+}
- for (uint32_t i = indices_last_dim; i < params_rank; ++i)
- {
- auto &dim = params_shape.dim(i);
- if (!dim.known())
- INTERNAL_EXN("Unknown params dimension is unsupported");
- output_shape.dim(output_index++).set(dim.value());
- }
+loco::NodeShape infer_bcq_gather(const luci::CircleBCQGather *node)
+{
+ loco::TensorShape input_shape;
+ loco::TensorShape output_shape;
- return loco::NodeShape{output_shape};
+ const auto input_binary_shape = loco::shape_get(node->input_binary()).as<loco::TensorShape>();
+ const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+ auto axis = node->axis();
+
+ auto input_clusters = loco::must_cast<luci::CircleConst *>(node->input_clusters());
+ auto qbits_sum = 0;
+ for (uint32_t i = 0; i < input_clusters->dim(0).value(); ++i)
+ {
+ qbits_sum += input_clusters->at<loco::DataType::S32>(i * 2 + 1);
}
- loco::NodeShape visit(const luci::CircleGreater *node) final { return broadcast_xy(node); }
+ input_shape.rank(2);
+ input_shape.dim(0) = qbits_sum;
+ input_shape.dim(1) = input_binary_shape.dim(1).value() * 32;
- loco::NodeShape visit(const luci::CircleGreaterEqual *node) final { return broadcast_xy(node); }
+ output_shape.rank(input_shape.rank() - 1 + indices_shape.rank());
+ int32_t outdim_index = 0;
+ for (int32_t i = 0; i < axis; ++i)
+ output_shape.dim(outdim_index++) = input_shape.dim(i);
+ for (uint32_t i = 0; i < indices_shape.rank(); ++i)
+ output_shape.dim(outdim_index++) = indices_shape.dim(i);
+ for (uint32_t i = axis + 1; i < input_shape.rank(); ++i)
+ output_shape.dim(outdim_index++) = input_shape.dim(i);
- loco::NodeShape visit(const luci::CircleIf *node) final
- {
- // Shape of CircleIf is not used. Just use input 0
- assert(node->input_count() > 0);
- const auto input_shape = loco::shape_get(node->input(0)).as<loco::TensorShape>();
- return loco::NodeShape{input_shape};
- }
+ return loco::NodeShape{output_shape};
+}
- loco::NodeShape visit(const luci::CircleL2Normalize *node) final { return use_x(node); }
+// Virtual
+loco::NodeShape infer_input(const luci::CircleInput *node)
+{
+ loco::TensorShape shape;
- loco::NodeShape visit(const luci::CircleL2Pool2D *node) final
- {
- return infer_pool_2d_shape(node);
- }
+ shape.rank(node->rank());
+ for (uint32_t axis = 0; axis < node->rank(); axis++)
+ shape.dim(axis) = node->dim(axis);
- loco::NodeShape visit(const luci::CircleLeakyRelu *node) final
- {
- const auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
- return loco::NodeShape{input_shape};
- }
+ return loco::NodeShape{shape};
+}
- loco::NodeShape visit(const luci::CircleLess *node) final { return broadcast_xy(node); }
+loco::NodeShape infer_output(const luci::CircleOutput *node)
+{
+ auto graph_outputs = node->graph()->outputs();
+ auto graph_output = graph_outputs->at(node->index());
+ auto output_shape = graph_output->shape();
- loco::NodeShape visit(const luci::CircleLessEqual *node) final { return broadcast_xy(node); }
+ return loco::NodeShape{*output_shape};
+}
- loco::NodeShape visit(const luci::CircleLocalResponseNormalization *node) final
+loco::NodeShape infer_if_out(const luci::CircleIfOut *node)
+{
+ /**
+ * @note IF operator type and shape are that of the "then" and "else"
+ * Graph Outputs.
+ */
+ auto circle_if = dynamic_cast<const luci::CircleIf *>(node->input());
+ if (circle_if == nullptr)
{
- const auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- return loco::NodeShape{input_shape};
+ INTERNAL_EXN("CircleIf IR is not configured correctly");
}
- loco::NodeShape visit(const luci::CircleLog *node) final { return use_x(node); }
+ auto index = node->index();
+ auto then_graph = circle_if->then_graph();
+ auto else_graph = circle_if->else_graph();
+ assert(then_graph != nullptr);
+ assert(else_graph != nullptr);
- loco::NodeShape visit(const luci::CircleLogicalAnd *node) final { return use_x(node); }
+ // shape and type are assumed to be same
+ // these are checked at post_import_graph() in Import
+ auto then_outputs = loco::output_nodes(then_graph);
+ auto else_outputs = loco::output_nodes(else_graph);
+ assert(then_outputs.size() == else_outputs.size());
+ assert(index < static_cast<int32_t>(then_outputs.size()));
- loco::NodeShape visit(const luci::CircleLogicalNot *node) final { return use_x(node); }
+ auto then_out = loco::must_cast<luci::CircleOutput *>(then_outputs.at(index));
+ auto else_out = loco::must_cast<luci::CircleOutput *>(else_outputs.at(index));
- loco::NodeShape visit(const luci::CircleLogicalOr *node) final { return use_x(node); }
+ auto then_graph_outputs = then_graph->outputs(); // loco::GraphOutput items
+ auto else_graph_outputs = else_graph->outputs();
+ assert(then_graph_outputs->size() == else_graph_outputs->size());
- loco::NodeShape visit(const luci::CircleLogistic *node) final { return use_x(node); }
+ auto then_graph_output = then_graph_outputs->at(then_out->index());
+ auto else_graph_output = else_graph_outputs->at(else_out->index());
+ (void)else_graph_output; // make compiler happy for unused variable warnings
+ assert(*then_graph_output->shape() == *else_graph_output->shape());
- loco::NodeShape visit(const luci::CircleMatrixSetDiag *node) final
- {
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
+ return loco::NodeShape{*then_graph_output->shape()};
+}
- auto rank = diagonal_shape.rank();
+loco::NodeShape infer_non_max_suppression_v4_out(const luci::CircleNonMaxSuppressionV4Out *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
- LUCI_ASSERT(rank == input_shape.rank() - 1, "diagonal rank = input rank - 1");
+ auto nmsv4 = dynamic_cast<const luci::CircleNonMaxSuppressionV4 *>(node->input());
+ if (nmsv4 == nullptr)
+ INTERNAL_EXN("CircleNonMaxSuppressionV4 IR is not configured correctly");
- for (uint32_t i = 0; i < rank - 1; i++)
- {
- LUCI_ASSERT(diagonal_shape.dim(i) == input_shape.dim(i), "diagonal dims = input dims");
- }
+ auto index = node->index();
+ if (index == 1)
+ return loco::TensorShape({0});
- auto dim = std::min(input_shape.dim(rank - 1).value(), input_shape.dim(rank).value());
+ assert(index == 0);
- LUCI_ASSERT(dim == diagonal_shape.dim(rank - 1), "Max diag len error");
+ auto unknown = loco::TensorShape{loco::Dimension()};
+ auto max_output_size = dynamic_cast<const luci::CircleConst *>(nmsv4->max_output_size());
+ if (max_output_size == nullptr)
+ return unknown; // we need CircleConst for max output size
- return loco::NodeShape{input_shape};
- }
+ LUCI_ASSERT(max_output_size->dtype() == S32, "Only support int32 for max_output_size");
- loco::NodeShape visit(const luci::CircleLogSoftmax *node) final { return use_logits(node); }
+ if (max_output_size->size<S32>() < 1)
+ return unknown;
- loco::NodeShape visit(const luci::CircleMatrixDiag *node) final
- {
- loco::TensorShape output_shape;
+ auto max_output_size_value = uint32_t(max_output_size->at<S32>(0));
+ return loco::TensorShape{max_output_size_value};
+}
- auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
- auto rank = diagonal_shape.rank();
+loco::NodeShape infer_non_max_suppression_v5_out(const luci::CircleNonMaxSuppressionV5Out *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
- output_shape.rank(rank + 1);
+ auto nmsv5 = dynamic_cast<const luci::CircleNonMaxSuppressionV5 *>(node->input());
+ if (nmsv5 == nullptr)
+ INTERNAL_EXN("CircleNonMaxSuppressionV5 IR is not configured correctly");
- for (uint32_t i = 0; i < rank; i++)
- {
- output_shape.dim(i) = diagonal_shape.dim(i);
- }
+ auto index = node->index();
+ if (index == 2)
+ return loco::TensorShape({0});
- output_shape.dim(rank) = diagonal_shape.dim(rank - 1);
+ assert(index == 0 || index == 1);
- return loco::NodeShape{output_shape};
- }
+ auto unknown = loco::TensorShape{loco::Dimension()};
+ auto max_output_size = dynamic_cast<const luci::CircleConst *>(nmsv5->max_output_size());
+ if (max_output_size == nullptr)
+ return unknown; // we need CircleConst for max output size
- loco::NodeShape visit(const luci::CircleMaximum *node) final { return broadcast_xy(node); }
+ LUCI_ASSERT(max_output_size->dtype() == S32, "Only support int32 for max_output_size");
- loco::NodeShape visit(const luci::CircleMaxPool2D *node) final
- {
- return infer_pool_2d_shape(node);
- }
+ if (max_output_size->size<S32>() < 1)
+ return unknown;
- loco::NodeShape visit(const luci::CircleMean *node) final
- {
- auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
- return loco::NodeShape{output_shape};
- }
+ auto max_output_size_value = uint32_t(max_output_size->at<S32>(0));
+ return loco::TensorShape{max_output_size_value};
+}
- loco::NodeShape visit(const luci::CircleMinimum *node) final { return broadcast_xy(node); }
+loco::NodeShape infer_split_out(const luci::CircleSplitOut *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
- loco::NodeShape visit(const luci::CircleMirrorPad *node) final
- {
- const loco::DataType S32 = loco::DataType::S32;
+ auto split = dynamic_cast<const luci::CircleSplit *>(node->input());
+ if (split == nullptr)
+ INTERNAL_EXN("CircleSplit IR is not configured correctly");
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+ loco::NodeShape unknown;
- // TODO support non-const case
- // TODO support other data type
- LUCI_ASSERT(paddings->dtype() == S32, "Only support int 32 for now");
- LUCI_ASSERT(paddings->rank() == 2, "paddings should be rank 2")
+ auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
- int32_t n = paddings->dim(0).value();
- int32_t v = paddings->dim(1).value();
+ auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim());
+ if (split_dim == nullptr)
+ return unknown; // we need CircleConst for split_dim
+ LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim");
- LUCI_ASSERT(v == 2, "paddings should be [n, 2]");
- LUCI_ASSERT(n == int32_t(input_shape.rank()),
- "paddings [n, 2] should have same value of input rank");
+ assert(split_dim->size<S32>() == 1);
+ auto split_dim_axis = split_dim->at<S32>(0);
+ if (split_dim_axis < 0)
+ split_dim_axis += split_shape.rank();
- loco::TensorShape output_shape;
+ auto split_dim_value = split_shape.dim(split_dim_axis).value();
+ assert(split_dim_value % split->num_split() == 0);
+ const int split_depth = split_dim_value / split->num_split();
- output_shape.rank(input_shape.rank());
- for (int32_t ni = 0; ni < n; ++ni)
- {
- int32_t idx = ni * 2;
- int value = input_shape.dim(ni).value();
- value += paddings->at<S32>(idx + 0); // left
- value += paddings->at<S32>(idx + 1); // right
- output_shape.dim(ni) = value;
- }
+ loco::TensorShape output_shape = split_shape;
- return loco::NodeShape{output_shape};
- }
+ // All shapes are equally same
+ output_shape.dim(split_dim_axis) = loco::Dimension(split_depth);
- loco::NodeShape visit(const luci::CircleMul *node) final { return broadcast_xy(node); }
+ return loco::NodeShape{output_shape};
+}
- loco::NodeShape visit(const luci::CircleNeg *node) final { return use_x(node); }
+loco::NodeShape infer_split_v_out(const luci::CircleSplitVOut *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
- loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4 *node) final
- {
- const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>();
- return loco::NodeShape{boxes_shape};
- }
+ auto split = dynamic_cast<const luci::CircleSplitV *>(node->input());
+ if (split == nullptr)
+ INTERNAL_EXN("CircleSplit IR is not configured correctly");
- loco::NodeShape visit(const luci::CircleNotEqual *node) final { return broadcast_xy(node); }
+ loco::NodeShape unknown;
- loco::NodeShape visit(const luci::CircleOneHot *node) final
- {
- const loco::DataType S32 = loco::DataType::S32;
- auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
- // Only support OneHot node's depth() is CircleConst with type S32
- // TODO support depth with other types
- auto depth = loco::must_cast<luci::CircleConst *>(node->depth());
- LUCI_ASSERT(depth->dtype() == S32, "Only support int32 CircleConst");
- if (depth->rank() != 0)
- INTERNAL_EXN_V("Only support rank 0 CircleOneHot in Depth", oops::to_uint32(depth->rank()));
- loco::TensorShape output_shape;
- output_shape.rank(indices_shape.rank() + 1);
- auto axis = node->axis();
- if (axis < 0)
- axis += indices_shape.rank() + 1;
- LUCI_ASSERT(0 <= axis, "Axis is out of range");
- LUCI_ASSERT(static_cast<uint32_t>(axis) <= indices_shape.rank(), "Axis is out of range");
- uint32_t j = 0;
- for (uint32_t i = 0; i < output_shape.rank(); i++)
- {
- if (i == static_cast<uint32_t>(axis))
- {
- output_shape.dim(i) = depth->at<S32>(0);
- }
- else
- {
- output_shape.dim(i) = indices_shape.dim(j++);
- }
- }
- return loco::NodeShape{output_shape};
- }
+ auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
- loco::NodeShape visit(const luci::CirclePack *node) final
- {
- LUCI_ASSERT(node->values_count() > 0, "Only support one or more inputs");
+ auto size_splits = dynamic_cast<const luci::CircleConst *>(split->size_splits());
+ if (size_splits == nullptr)
+ return unknown; // we need CircleConst for size_splits
+ LUCI_ASSERT(size_splits->dtype() == S32, "Only support int32 for size_splits");
- auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
- // Make sure all inputs have the same shape.
- for (uint32_t i = 1; i < node->values_count(); ++i)
- {
- auto in_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
- LUCI_ASSERT(loco::NodeShape{first_shape} == loco::NodeShape{in_shape},
- "All inputs must have the same shape");
- }
+ auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim());
+ if (split_dim == nullptr)
+ return unknown; // we need CircleConst for split_dim
+ LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim");
- // Checking shape capability for pack layer
- // Input: tensors [D1, D2, ... Dn]
- // Axis: K
- // Output: [D1, D2, ... , D_K-1, n, D_K+1, ... Dn]
- auto axis = node->axis();
- if (axis < 0)
- axis += first_shape.rank() + 1;
+ // fetch axis
+ assert(split_dim->size<S32>() == 1);
+ auto split_dim_axis = split_dim->at<S32>(0);
+ if (split_dim_axis < 0)
+ split_dim_axis += split_shape.rank();
- LUCI_ASSERT(0 <= axis, "Axis is out of range");
- LUCI_ASSERT(static_cast<uint32_t>(axis) <= first_shape.rank(), "Axis is out of range");
+ // interpret size_splits values
+ int32_t size_splits_count = static_cast<int32_t>(size_splits->size<S32>());
+ assert(size_splits_count == split->num_split());
- loco::TensorShape output_shape;
- output_shape.rank(first_shape.rank() + 1);
+ int64_t minus_one_count = 0, size_splits_sum = 0;
+ for (int32_t idx = 0; idx < size_splits_count; ++idx)
+ {
+ auto size = size_splits->at<S32>(idx);
+ assert(size >= -1);
+ if (size == -1)
+ ++minus_one_count;
+ else
+ size_splits_sum += size;
+ }
+ if (minus_one_count > 1)
+ INTERNAL_EXN("CircleSplitV size_splits has more than two -1 values");
- uint32_t j = 0;
- for (uint32_t i = 0; i < output_shape.rank(); ++i)
- {
- if (i == static_cast<uint32_t>(axis))
- {
- output_shape.dim(i) = node->values_count();
- }
- else
- {
- output_shape.dim(i) = first_shape.dim(j++);
- }
- }
+ // calcuate this SplitVOut shape
+ auto input_size = split_shape.dim(split_dim_axis).value();
+ assert(size_splits_sum <= input_size);
- return loco::NodeShape{output_shape};
- }
+ auto index_this = node->index();
+ assert(0 <= index_this && index_this < split->num_split());
+ auto split_depth = size_splits->at<S32>(index_this);
+ if (split_depth == -1)
+ split_depth = input_size - size_splits_sum;
- loco::NodeShape visit(const luci::CirclePad *node) final
- {
- const loco::DataType S32 = loco::DataType::S32;
+ loco::TensorShape output_shape = split_shape;
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+ output_shape.dim(split_dim_axis) = loco::Dimension(split_depth);
- // TODO support non-const case
- // TODO support other data type
- LUCI_ASSERT(paddings->dtype() == S32, "Only support int 32 for now");
- LUCI_ASSERT(paddings->rank() == 2, "paddings should be rank 2")
+ return loco::NodeShape{output_shape};
+}
- int32_t n = paddings->dim(0).value();
- int32_t v = paddings->dim(1).value();
+loco::NodeShape infer_top_k_v2_out(const luci::CircleTopKV2Out *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
- LUCI_ASSERT(v == 2, "paddings should be [n, 2]");
- LUCI_ASSERT(n == int32_t(input_shape.rank()),
- "paddings [n, 2] should have same value of input rank");
+ auto topkv2 = dynamic_cast<const luci::CircleTopKV2 *>(node->input());
+ if (topkv2 == nullptr)
+ INTERNAL_EXN("CircleSplit IR is not configured correctly");
- loco::TensorShape output_shape;
+ // shape of topkv2 is same as topkv2->input()
+ auto input_shape = loco::shape_get(topkv2).as<loco::TensorShape>();
- output_shape.rank(input_shape.rank());
- for (int32_t ni = 0; ni < n; ++ni)
- {
- int32_t idx = ni * 2;
- int value = input_shape.dim(ni).value();
- value += paddings->at<S32>(idx + 0); // left
- value += paddings->at<S32>(idx + 1); // right
- output_shape.dim(ni) = value;
- }
+ auto node_k = loco::must_cast<const luci::CircleConst *>(topkv2->k());
+ LUCI_ASSERT(node_k->dtype() == S32, "Only support Int32");
+ assert(node_k->size<S32>() == 1);
- return loco::NodeShape{output_shape};
+ loco::TensorShape output_shape;
+
+ output_shape.rank(input_shape.rank());
+ for (uint32_t idx = 0; idx < input_shape.rank() - 1; ++idx)
+ {
+ output_shape.dim(idx) = input_shape.dim(idx);
}
+ output_shape.dim(input_shape.rank() - 1) = node_k->at<S32>(0);
- loco::NodeShape visit(const luci::CirclePow *node) final { return broadcast_xy(node); }
+ return loco::NodeShape{output_shape};
+}
- loco::NodeShape visit(const luci::CirclePRelu *node) final
+loco::NodeShape infer_unique_out(const luci::CircleUniqueOut *node)
+{
+ if (node->index() == 0)
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto alpha_shape = loco::shape_get(node->alpha()).as<loco::TensorShape>();
+ auto unique_shape = own_shape(node);
+ return loco::NodeShape{unique_shape};
+ }
+ assert(node->index() == 1);
+ auto unique = loco::must_cast<luci::CircleUnique *>(node->input());
+ auto unique_shape = loco::shape_get(unique->input()).as<loco::TensorShape>();
- auto output_shape = broadcast_shape(input_shape, alpha_shape);
+ assert(unique_shape.rank() == 1);
- return loco::NodeShape{output_shape};
+ loco::TensorShape shape_output;
+ shape_output.rank(1);
+ shape_output.dim(0) = unique_shape.dim(0);
+ return loco::NodeShape{shape_output};
+}
+
+loco::NodeShape infer_unpack_out(const luci::CircleUnpackOut *node)
+{
+ auto unpack = dynamic_cast<const luci::CircleUnpack *>(node->input());
+ if (unpack == nullptr)
+ {
+ INTERNAL_EXN("CircleUnpack IR is not configured correctly");
}
- loco::NodeShape visit(const luci::CircleRange *node) final
+ auto unpack_shape = loco::shape_get(unpack).as<loco::TensorShape>();
+
+ return loco::NodeShape{unpack_shape};
+}
+
+loco::NodeShape infer_while_out(const luci::CircleWhileOut *node)
+{
+ /**
+ * @note WHILE operator's shape is the same with the "cond"
+ * Graph input.
+ */
+ auto circle_while = dynamic_cast<const luci::CircleWhile *>(node->input());
+ if (circle_while == nullptr)
{
- loco::TensorShape output_shape;
- output_shape.rank(1);
+ INTERNAL_EXN("CircleWhile IR is not configured correctly");
+ }
- auto start_node = dynamic_cast<luci::CircleConst *>(node->start());
- auto limit_node = dynamic_cast<luci::CircleConst *>(node->limit());
- auto delta_node = dynamic_cast<luci::CircleConst *>(node->delta());
+ auto index = node->index();
+ auto cond_graph = circle_while->cond_graph();
+ assert(cond_graph != nullptr);
- if (start_node == nullptr || limit_node == nullptr || delta_node == nullptr)
- {
- return use_own(node);
- }
+ // Assumption: the index of CircleWhileOut matches with the index of input nodes returned by
+ // loco::input_nodes
+ auto cond_inputs = loco::input_nodes(cond_graph);
+ auto cond_in = loco::must_cast<luci::CircleInput *>(cond_inputs.at(index));
+
+ auto cond_graph_inputs = cond_graph->inputs();
+ auto cond_graph_input = cond_graph_inputs->at(cond_in->index());
+
+ auto cond_graph_input_shape = *cond_graph_input->shape();
+ auto this_shape = own_shape(node);
+
+ if (!(this_shape == cond_graph_input_shape))
+ {
+ LOGGER(l);
+ WARN(l) << "Warning: CircleWhileOut '" << node->name() << "' shape mispatch " << this_shape
+ << " vs " << cond_graph_input_shape;
+ }
- double start = 0, limit = 0, delta = 0;
+ return loco::NodeShape{this_shape};
+}
-#define GET_RANGE_PARAM(DT) \
- start = start_node->scalar<DT>(); \
- limit = limit_node->scalar<DT>(); \
- delta = delta_node->scalar<DT>();
+/**
+ * @brief Class to infer the shape of CircleNode
+ *
+ * @note All CircleNode's inputs and outputs are always loco::Domain::Tensor
+ */
+class ShapeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::NodeShape>
+{
+public:
+ loco::NodeShape visit(const luci::CircleAbs *node) final { return use_x(node); }
- switch (start_node->dtype())
- {
- case loco::DataType::FLOAT32:
- GET_RANGE_PARAM(loco::DataType::FLOAT32)
- break;
- case loco::DataType::S32:
- GET_RANGE_PARAM(loco::DataType::S32)
- break;
- default:
- INTERNAL_EXN("Range data type not supported");
- }
+ loco::NodeShape visit(const luci::CircleAdd *node) final { return broadcast_xy(node); }
-#undef GET_RANGE_PARAM
+ loco::NodeShape visit(const luci::CircleAddN *node) final { return infer_add_n(node); }
- if (delta == 0)
- INTERNAL_EXN("Delta can not be zero");
+ loco::NodeShape visit(const luci::CircleArgMax *node) final { return infer_arg_max(node); }
- output_shape.dim(0) = ceil((limit - start) / delta);
+ loco::NodeShape visit(const luci::CircleArgMin *node) final { return infer_arg_min(node); }
- return loco::NodeShape{output_shape};
+ loco::NodeShape visit(const luci::CircleAveragePool2D *node) final
+ {
+ return infer_pool_2d_shape(node);
}
- loco::NodeShape visit(const luci::CircleRank *) final
+ loco::NodeShape visit(const luci::CircleBatchMatMul *node) final
{
- loco::TensorShape shape_output;
- shape_output.rank(0);
+ auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+ auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
- return loco::NodeShape{shape_output};
+ return infer_batchmatmul_shape(x_shape, y_shape, node->adj_x(), node->adj_y());
}
- loco::NodeShape visit(const luci::CircleReduceAny *node) final
+ loco::NodeShape visit(const luci::CircleBatchToSpaceND *node) final
{
- auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
- return loco::NodeShape{output_shape};
+ return infer_batch_to_space_nd(node);
}
- loco::NodeShape visit(const luci::CircleReduceMax *node) final
+ loco::NodeShape visit(const luci::CircleCast *node) final { return use_x(node); }
+
+ loco::NodeShape visit(const luci::CircleCeil *node) final { return use_x(node); }
+
+ loco::NodeShape visit(const luci::CircleConcatenation *node) final
{
- auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
- return loco::NodeShape{output_shape};
+ return infer_concatenation(node);
}
- loco::NodeShape visit(const luci::CircleReduceMin *node) final
+ loco::NodeShape visit(const luci::CircleConst *node) final { return use_own(node); }
+
+ loco::NodeShape visit(const luci::CircleConv2D *node) final { return infer_conv2d(node); }
+
+ loco::NodeShape visit(const luci::CircleCos *node) final { return use_x(node); }
+
+ loco::NodeShape visit(const luci::CircleCustom *node) final { return use_own(node); }
+
+ loco::NodeShape visit(const luci::CircleDepthToSpace *node) final
{
- auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
- return loco::NodeShape{output_shape};
+ return infer_depth_to_space(node);
}
- loco::NodeShape visit(const luci::CircleReduceProd *node) final
+ loco::NodeShape visit(const luci::CircleDepthwiseConv2D *node) final
{
- auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
- return loco::NodeShape{output_shape};
+ return infer_depthwise_conv2d(node);
}
- loco::NodeShape visit(const luci::CircleRelu *node) final
+ loco::NodeShape visit(const luci::CircleDiv *node) final { return broadcast_xy(node); }
+
+ loco::NodeShape visit(const luci::CircleElu *node) final
{
auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
return loco::NodeShape{input_shape};
}
- loco::NodeShape visit(const luci::CircleRelu6 *node) final
- {
- auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+ loco::NodeShape visit(const luci::CircleEqual *node) final { return broadcast_xy(node); }
- return loco::NodeShape{input_shape};
- }
+ loco::NodeShape visit(const luci::CircleExp *node) final { return use_x(node); }
- loco::NodeShape visit(const luci::CircleReluN1To1 *node) final
+ loco::NodeShape visit(const luci::CircleExpandDims *node) final
{
- auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
-
- return loco::NodeShape{input_shape};
+ return infer_expand_dims(node);
}
- /**
- * @note CircleReshape has new shape info in two places: 2nd input and attribute.
- * This shape inference uses shape from input 'shape' node when it's constant.
- * If not, shape will be from node itself. shape from attribute is not used.
- *
- * TODO Change this policy when not appropriate
- */
- loco::NodeShape visit(const luci::CircleReshape *node) final
- {
- LOGGER(l);
+ loco::NodeShape visit(const luci::CircleFill *node) final { return infer_fill(node); }
- const loco::DataType S32 = loco::DataType::S32;
+ loco::NodeShape visit(const luci::CircleFloor *node) final { return use_x(node); }
- loco::TensorShape shape_by_input;
- {
- LUCI_ASSERT(node->shape(), "2nd input shape() should not be nullptr");
+ loco::NodeShape visit(const luci::CircleFloorDiv *node) final { return broadcast_xy(node); }
- // Only support node's shape() is CircleConst with S32
- // TODO support other node with other types
- auto const_shape_node = dynamic_cast<luci::CircleConst *>(node->shape());
- if (const_shape_node != nullptr)
- {
- LUCI_ASSERT(const_shape_node->dtype() == S32, "Only support int32 CircleConst");
+ loco::NodeShape visit(const luci::CircleFloorMod *node) final { return broadcast_xy(node); }
- shape_by_input.rank(const_shape_node->size<S32>());
+ loco::NodeShape visit(const luci::CircleFullyConnected *node) final
+ {
+ return infer_fully_connected(node);
+ }
- for (uint32_t axis = 0; axis < shape_by_input.rank(); ++axis)
- {
- shape_by_input.dim(axis) = const_shape_node->at<S32>(axis);
- }
- }
- else
- {
- // We use shape from the node itself
- shape_by_input = own_shape(node);
- }
- }
+ loco::NodeShape visit(const luci::CircleGather *node) final { return infer_gather(node); }
- loco::TensorShape shape_by_attr;
- {
- shape_by_attr.rank(node->newShape()->rank());
+ loco::NodeShape visit(const luci::CircleGatherNd *node) final { return infer_gather_nd(node); }
- for (uint32_t axis = 0; axis < shape_by_attr.rank(); ++axis)
- {
- shape_by_attr.dim(axis) = node->newShape()->dim(axis);
- }
- }
+ loco::NodeShape visit(const luci::CircleGreater *node) final { return broadcast_xy(node); }
- if (!(shape_by_input == shape_by_attr))
- {
- INFO(l) << "CircleReshape: Two new shape information mismatched : " << std::endl;
- INFO(l) << " shape_by_input : " << shape_by_input << std::endl;
- INFO(l) << " shape_by_attr : " << shape_by_attr << std::endl;
- }
+ loco::NodeShape visit(const luci::CircleGreaterEqual *node) final { return broadcast_xy(node); }
- loco::TensorShape output_shape = shape_by_input;
+ loco::NodeShape visit(const luci::CircleIf *node) final
+ {
+ // Shape of CircleIf is not used. Just use input 0
+ assert(node->input_count() > 0);
+ const auto input_shape = loco::shape_get(node->input(0)).as<loco::TensorShape>();
+ return loco::NodeShape{input_shape};
+ }
- // One of the dimensions can have special value -1, meaning its actual value should be inferred.
- const auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>();
- const uint32_t input_element_count = loco::element_count(&input_shape);
- uint32_t output_element_count = 1;
- uint32_t unknown_dim_index = UINT32_MAX;
- for (uint32_t dim_index = 0; dim_index < output_shape.rank(); ++dim_index)
- {
- const uint32_t dim_value = output_shape.dim(dim_index).value();
- if (static_cast<int>(dim_value) == -1)
- {
- LUCI_ASSERT(unknown_dim_index == UINT32_MAX, "More than one unknown dimension");
- unknown_dim_index = dim_index;
- }
- else
- {
- output_element_count *= dim_value;
- }
- }
- if (unknown_dim_index != UINT32_MAX)
- {
- output_shape.dim(unknown_dim_index) = input_element_count / output_element_count;
- }
+ loco::NodeShape visit(const luci::CircleL2Normalize *node) final { return use_x(node); }
- return loco::NodeShape{output_shape};
+ loco::NodeShape visit(const luci::CircleL2Pool2D *node) final
+ {
+ return infer_pool_2d_shape(node);
}
- loco::NodeShape visit(const luci::CircleResizeBilinear *node) final
+ loco::NodeShape visit(const luci::CircleLeakyRelu *node) final
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
- if (input_shape.rank() != 4)
- INTERNAL_EXN("Expected ResizeBilinear input to have rank 4");
-
- auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
-
- if (const_node->dtype() != loco::DataType::S32)
- INTERNAL_EXN("Only S32 datatype is supported for ResizeBilinear size");
-
- if (const_node->rank() != 1)
- INTERNAL_EXN("Expected size tensor of rank 1");
-
- if (const_node->dim(0).value() != 2)
- INTERNAL_EXN("Expected size tensor with shape [2]");
+ const auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+ return loco::NodeShape{input_shape};
+ }
- loco::TensorShape output_shape;
- output_shape.rank(4);
- output_shape.dim(0) = input_shape.dim(0);
- output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
- output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
- output_shape.dim(3) = input_shape.dim(3);
+ loco::NodeShape visit(const luci::CircleLess *node) final { return broadcast_xy(node); }
- return loco::NodeShape{output_shape};
- }
+ loco::NodeShape visit(const luci::CircleLessEqual *node) final { return broadcast_xy(node); }
- loco::NodeShape visit(const luci::CircleResizeNearestNeighbor *node) final
+ loco::NodeShape visit(const luci::CircleLocalResponseNormalization *node) final
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ const auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ return loco::NodeShape{input_shape};
+ }
- if (input_shape.rank() != 4)
- INTERNAL_EXN("Expected ResizeNearesNeighbor input to have rank 4");
+ loco::NodeShape visit(const luci::CircleLog *node) final { return use_x(node); }
- auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
+ loco::NodeShape visit(const luci::CircleLogicalAnd *node) final { return use_x(node); }
- if (const_node->dtype() != loco::DataType::S32)
- INTERNAL_EXN("Only S32 datatype is supported for ResizeNearesNeighbor size");
+ loco::NodeShape visit(const luci::CircleLogicalNot *node) final { return use_x(node); }
- if (const_node->rank() != 1)
- INTERNAL_EXN("Expected size tensor of rank 1");
+ loco::NodeShape visit(const luci::CircleLogicalOr *node) final { return use_x(node); }
- if (const_node->dim(0).value() != 2)
- INTERNAL_EXN("Expected size tensor with shape [2]");
+ loco::NodeShape visit(const luci::CircleLogistic *node) final { return use_x(node); }
- loco::TensorShape output_shape;
- output_shape.rank(4);
- output_shape.dim(0) = input_shape.dim(0);
- output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
- output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
- output_shape.dim(3) = input_shape.dim(3);
+ loco::NodeShape visit(const luci::CircleLogSoftmax *node) final { return use_logits(node); }
- return loco::NodeShape{output_shape};
+ loco::NodeShape visit(const luci::CircleMatrixDiag *node) final
+ {
+ return infer_matrix_diag(node);
}
- loco::NodeShape visit(const luci::CircleReverseSequence *node) final
+ loco::NodeShape visit(const luci::CircleMatrixSetDiag *node) final
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
- return loco::NodeShape{input_shape};
+ return infer_matrix_set_diag(node);
}
- loco::NodeShape visit(const luci::CircleRound *node) final { return use_x(node); }
+ loco::NodeShape visit(const luci::CircleMaximum *node) final { return broadcast_xy(node); }
- loco::NodeShape visit(const luci::CircleReverseV2 *node) final
+ loco::NodeShape visit(const luci::CircleMaxPool2D *node) final
{
- auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>();
-
- LUCI_ASSERT(loco::shape_get(node->axis()).as<loco::TensorShape>().rank() == 1,
- "Tensor must be 1-D");
-
- return loco::NodeShape{input_shape};
+ return infer_pool_2d_shape(node);
}
- loco::NodeShape visit(const luci::CircleRsqrt *node) final { return use_x(node); }
-
- loco::NodeShape visit(const luci::CircleScatterNd *node) final
+ loco::NodeShape visit(const luci::CircleMean *node) final
{
- loco::TensorShape output_shape;
-
- auto shape_node = loco::must_cast<luci::CircleConst *>(node->shape());
+ auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
+ return loco::NodeShape{output_shape};
+ }
- const loco::DataType S32 = loco::DataType::S32;
- const loco::DataType S64 = loco::DataType::S64;
+ loco::NodeShape visit(const luci::CircleMinimum *node) final { return broadcast_xy(node); }
- std::vector<int64_t> vect_shape;
+ loco::NodeShape visit(const luci::CircleMirrorPad *node) final { return infer_mirror_pad(node); }
- if (shape_node->dtype() == S32)
- vect_shape = vector_from_constant<S32>(shape_node);
- else if (shape_node->dtype() == S64)
- vect_shape = vector_from_constant<S64>(shape_node);
- else
- LUCI_ASSERT(false, "Only support int32/int64 for shape()");
+ loco::NodeShape visit(const luci::CircleMul *node) final { return broadcast_xy(node); }
- output_shape.rank(vect_shape.size());
- for (uint32_t i = 0; i < vect_shape.size(); ++i)
- output_shape.dim(i) = vect_shape[i];
+ loco::NodeShape visit(const luci::CircleNeg *node) final { return use_x(node); }
- return loco::NodeShape{output_shape};
+ loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4 *node) final
+ {
+ const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>();
+ return loco::NodeShape{boxes_shape};
}
- loco::NodeShape visit(const luci::CircleSegmentSum *node) final
+ loco::NodeShape visit(const luci::CircleNonMaxSuppressionV5 *node) final
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto segment_shape = loco::shape_get(node->segment_ids()).as<loco::TensorShape>();
+ const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>();
+ return loco::NodeShape{boxes_shape};
+ }
- LUCI_ASSERT(segment_shape.rank() == 1, "segment_ids must be 1-D tensor");
- LUCI_ASSERT(segment_shape.dim(0).value() == input_shape.dim(0).value(),
- "segment_ids size must be equal to the size of data's first dimension");
+ loco::NodeShape visit(const luci::CircleNotEqual *node) final { return broadcast_xy(node); }
- auto ids_shape_value = loco::must_cast<luci::CircleConst *>(node->segment_ids());
+ loco::NodeShape visit(const luci::CircleOneHot *node) final { return infer_one_hot(node); }
- std::vector<int64_t> vect_ids;
+ loco::NodeShape visit(const luci::CirclePack *node) final { return infer_pack(node); }
- if (ids_shape_value->dtype() == loco::DataType::S32)
- vect_ids = vector_from_constant<loco::DataType::S32>(ids_shape_value);
+ loco::NodeShape visit(const luci::CirclePad *node) final { return infer_pad(node); }
- LUCI_ASSERT(std::is_sorted(vect_ids.begin(), vect_ids.end()),
- "segment_ids values should be sorted")
+ loco::NodeShape visit(const luci::CirclePadV2 *node) final { return infer_pad_v2(node); }
- loco::TensorShape output_shape;
+ loco::NodeShape visit(const luci::CirclePow *node) final { return broadcast_xy(node); }
- output_shape.rank(input_shape.rank());
+ loco::NodeShape visit(const luci::CirclePRelu *node) final { return infer_p_relu(node); }
- for (uint32_t i = 1; i < input_shape.rank(); ++i)
- output_shape.dim(i) = input_shape.dim(i);
+ loco::NodeShape visit(const luci::CircleRange *node) final { return infer_range(node); }
- output_shape.dim(0) = vect_ids.back() + 1;
+ loco::NodeShape visit(const luci::CircleRank *) final
+ {
+ loco::TensorShape shape_output;
+ shape_output.rank(0);
- return loco::NodeShape{output_shape};
+ return loco::NodeShape{shape_output};
}
- loco::NodeShape visit(const luci::CircleSelect *node) final
+ loco::NodeShape visit(const luci::CircleReduceAny *node) final
{
- auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
- assert(t_shape == loco::shape_get(node->e()).as<loco::TensorShape>());
-
- // condition shape validation
- auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
- if (c_shape.rank() != t_shape.rank())
- {
- if (c_shape.rank() != 0 && c_shape.rank() != 1)
- INTERNAL_EXN_V("CircleSelect condition rank is not 0 nor 1: ", c_shape.rank());
-
- if (c_shape.rank() == 1)
- {
- if (c_shape.dim(0).value() != t_shape.dim(0).value())
- INTERNAL_EXN("CircleSelect condition dim(0) should match with t.dim(0)");
- }
- }
-
- return loco::NodeShape{t_shape};
+ auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
+ return loco::NodeShape{output_shape};
}
- loco::NodeShape visit(const luci::CircleSelectV2 *node) final
+ loco::NodeShape visit(const luci::CircleReduceMax *node) final
{
- auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
- auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
- auto e_shape = loco::shape_get(node->e()).as<loco::TensorShape>();
-
- // validate ability to broadcast shapes to each other
- auto b_shape = broadcast_shape(broadcast_shape(c_shape, t_shape), e_shape);
- return loco::NodeShape{b_shape};
+ auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
+ return loco::NodeShape{output_shape};
}
- loco::NodeShape visit(const luci::CircleShape *node) final
+ loco::NodeShape visit(const luci::CircleReduceMin *node) final
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
- loco::TensorShape output_shape;
-
- output_shape.rank(1);
- output_shape.dim(0) = input_shape.rank();
-
+ auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
return loco::NodeShape{output_shape};
}
- loco::NodeShape visit(const luci::CircleSin *node) final { return use_x(node); }
-
- loco::NodeShape visit(const luci::CircleSlice *node) final
+ loco::NodeShape visit(const luci::CircleReduceProd *node) final
{
- const loco::DataType S32 = loco::DataType::S32;
- const loco::DataType S64 = loco::DataType::S64;
+ auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
+ return loco::NodeShape{output_shape};
+ }
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ loco::NodeShape visit(const luci::CircleRelu *node) final
+ {
+ auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
- auto const_begin = loco::must_cast<luci::CircleConst *>(node->begin());
- auto const_size = loco::must_cast<luci::CircleConst *>(node->size());
+ return loco::NodeShape{input_shape};
+ }
- loco::TensorShape output_shape;
- std::vector<int64_t> vect_begin; // to hold both S32/S64, we use int64_t
- std::vector<int64_t> vect_size;
+ loco::NodeShape visit(const luci::CircleRelu6 *node) final
+ {
+ auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
- if (const_begin->dtype() == S32)
- vect_begin = vector_from_constant<S32>(const_begin);
- else if (const_begin->dtype() == S64)
- vect_begin = vector_from_constant<S64>(const_begin);
- else
- LUCI_ASSERT(false, "Only support int32/int64 for begin()");
+ return loco::NodeShape{input_shape};
+ }
- if (const_size->dtype() == S32)
- vect_size = vector_from_constant<S32>(const_size);
- else if (const_size->dtype() == S64)
- vect_size = vector_from_constant<S64>(const_size);
- else
- LUCI_ASSERT(false, "Only support int32/int64 for size()");
+ loco::NodeShape visit(const luci::CircleReluN1To1 *node) final
+ {
+ auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
- assert(input_shape.rank() == vect_begin.size());
- assert(input_shape.rank() == vect_size.size());
+ return loco::NodeShape{input_shape};
+ }
- output_shape.rank(vect_begin.size());
- for (uint32_t idx = 0; idx < vect_begin.size(); ++idx)
- {
- auto size = vect_size.at(idx);
- if (size == -1)
- {
- size = input_shape.dim(idx).value() - vect_begin.at(idx);
- }
- output_shape.dim(idx) = size;
- }
+ /**
+ * @note CircleReshape has new shape info in two places: 2nd input and attribute.
+ * This shape inference uses shape from input 'shape' node when it's constant.
+ * If not, shape will be from node itself. shape from attribute is not used.
+ *
+ * TODO Change this policy when not appropriate
+ */
+ loco::NodeShape visit(const luci::CircleReshape *node) final { return infer_reshape(node); }
- return loco::NodeShape{output_shape};
+ loco::NodeShape visit(const luci::CircleResizeBilinear *node) final
+ {
+ return infer_resize_bilinear(node);
}
- loco::NodeShape visit(const luci::CircleSoftmax *node) final { return use_logits(node); }
-
- loco::NodeShape visit(const luci::CircleSpaceToBatchND *node) final
+ loco::NodeShape visit(const luci::CircleResizeNearestNeighbor *node) final
{
- const loco::DataType S32 = loco::DataType::S32;
+ return infer_resize_nearest_neighbor(node);
+ }
+ loco::NodeShape visit(const luci::CircleReverseSequence *node) final
+ {
auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- // Support only input rank is 3 and 4
- assert(input_shape.rank() == 3 || input_shape.rank() == 4);
-
- // Only support block_shape() with S32 type CircleConst for now
- auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape());
- LUCI_ASSERT(const_block_shape->dtype() == S32, "Only support int32 block_shape");
-
- // Only support paddings() with S32 type CircleConst for now
- auto const_paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
- LUCI_ASSERT(const_paddings->dtype() == S32, "Only support int32 paddings");
-
- auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
- auto const_paddings_shape = loco::shape_get(const_paddings).as<loco::TensorShape>();
- assert(const_block_shape_shape.rank() == 1);
- assert(const_paddings_shape.rank() == 2);
-
- int32_t input_spatial_dim = input_shape.rank() - 2;
- assert(const_block_shape_shape.dim(0) == input_spatial_dim);
- assert(const_paddings_shape.dim(0) == input_spatial_dim);
- assert(const_paddings_shape.dim(1) == 2);
-
- // Check all values of block_shape >= 1
- uint32_t ele_count = const_block_shape->size<S32>();
- for (uint32_t e = 0; e < ele_count; ++e)
- {
- auto val = const_block_shape->at<S32>(e);
- if (val < 1)
- {
- INTERNAL_EXN_V("All values of block_shape >= 1: ", e);
- }
- }
- loco::TensorShape shape_output;
+ return loco::NodeShape{input_shape};
+ }
- shape_output.rank(input_shape.rank());
+ loco::NodeShape visit(const luci::CircleRound *node) final { return use_x(node); }
- int32_t output_batch_size = input_shape.dim(0).value();
- for (int32_t dim = 0; dim < input_spatial_dim; ++dim)
- {
- int dim_size = input_shape.dim(dim + 1).value();
- dim_size += const_paddings->at<S32>(dim * 2);
- dim_size += const_paddings->at<S32>(dim * 2 + 1);
- shape_output.dim(dim + 1) = dim_size / const_block_shape->at<S32>(dim);
+ loco::NodeShape visit(const luci::CircleReverseV2 *node) final
+ {
+ auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>();
- assert(dim_size % const_block_shape->at<S32>(dim) == 0);
- output_batch_size = output_batch_size * const_block_shape->at<S32>(dim);
- }
- shape_output.dim(0) = output_batch_size;
- shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1);
+ LUCI_ASSERT(loco::shape_get(node->axis()).as<loco::TensorShape>().rank() == 1,
+ "Tensor must be 1-D");
- return loco::NodeShape{shape_output};
+ return loco::NodeShape{input_shape};
}
- loco::NodeShape visit(const luci::CircleSpaceToDepth *node) final
+ loco::NodeShape visit(const luci::CircleRsqrt *node) final { return use_x(node); }
+
+ loco::NodeShape visit(const luci::CircleScatterNd *node) final { return infer_scatter_nd(node); }
+
+ loco::NodeShape visit(const luci::CircleSegmentSum *node) final
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
+ return infer_segment_sum(node);
+ }
- // Only data format NHWC is supported
- int32_t height = input_shape.dim(1).value();
- int32_t width = input_shape.dim(2).value();
- int32_t depth = input_shape.dim(3).value();
+ loco::NodeShape visit(const luci::CircleSelect *node) final { return infer_select(node); }
- int block_size = node->block_size();
+ loco::NodeShape visit(const luci::CircleSelectV2 *node) final { return infer_select_v2(node); }
- if (block_size < 2)
- INTERNAL_EXN("Block size must be >= 2");
+ loco::NodeShape visit(const luci::CircleShape *node) final { return infer_shape(node); }
- if ((height % block_size) || (width % block_size))
- {
- INTERNAL_EXN("The input tensor's height and width must be divisible by block_size");
- }
+ loco::NodeShape visit(const luci::CircleSin *node) final { return use_x(node); }
- loco::TensorShape output_shape;
- output_shape.rank(4);
+ loco::NodeShape visit(const luci::CircleSlice *node) final { return infer_slice(node); }
- output_shape.dim(0) = input_shape.dim(0).value();
- output_shape.dim(1) = height / block_size;
- output_shape.dim(2) = width / block_size;
- output_shape.dim(3) = block_size * block_size * depth;
+ loco::NodeShape visit(const luci::CircleSoftmax *node) final { return use_logits(node); }
- return loco::NodeShape{output_shape};
+ loco::NodeShape visit(const luci::CircleSpaceToBatchND *node) final
+ {
+ return infer_space_to_batch_nd(node);
}
- loco::NodeShape visit(const luci::CircleSparseToDense *node) final
+ loco::NodeShape visit(const luci::CircleSpaceToDepth *node) final
{
- loco::TensorShape shape;
- {
- LUCI_ASSERT(node->output_shape(), "dims input should not be nullptr");
-
- auto output_shape_node = dynamic_cast<luci::CircleConst *>(node->output_shape());
- if (output_shape_node != nullptr)
- {
- // Only support node with S32
- LUCI_ASSERT(output_shape_node->dtype() == loco::DataType::S32,
- "Only support int32 CircleConst");
-
- if (output_shape_node->rank() != 1)
- INTERNAL_EXN_V("Only support rank 1 CircleConst",
- oops::to_uint32(output_shape_node->rank()));
-
- shape.rank(output_shape_node->dim(0).value());
-
- for (uint32_t axis = 0; axis < shape.rank(); ++axis)
- {
- shape.dim(axis) = output_shape_node->at<loco::DataType::S32>(axis);
- }
- }
- else
- {
- shape = own_shape(node);
- }
- }
+ return infer_space_to_depth(node);
+ }
- return loco::NodeShape{shape};
+ loco::NodeShape visit(const luci::CircleSparseToDense *node) final
+ {
+ return infer_sparse_to_dense(node);
}
loco::NodeShape visit(const luci::CircleSplit *node) final
loco::NodeShape visit(const luci::CircleStridedSlice *node) final
{
- auto begin_node = dynamic_cast<luci::CircleConst *>(node->begin());
- auto end_node = dynamic_cast<luci::CircleConst *>(node->end());
- auto strides_node = dynamic_cast<luci::CircleConst *>(node->strides());
-
- if (begin_node == nullptr || end_node == nullptr || strides_node == nullptr)
- {
- return use_own(node);
- }
-
- loco::TensorShape shape = infer_output_shape(node);
- return loco::NodeShape{shape};
+ return infer_strided_slice(node);
}
- loco::NodeShape visit(const luci::CircleSqueeze *node) final
- {
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
- // TODO input shape may be unknown before runtime
- std::vector<bool> do_squeeze(input_shape.rank(), false);
- uint32_t num_squeezed = 0;
-
- if (!node->squeeze_dims().empty())
- {
- // SqueezeDims not empty, squeeze only dims specified
- for (int32_t raw_dim : node->squeeze_dims())
- {
- int32_t dim = raw_dim < 0 ? raw_dim + input_shape.rank() : raw_dim;
-
- if (dim < 0 || static_cast<uint32_t>(dim) >= input_shape.rank() ||
- input_shape.dim(dim).value() != 1)
- {
- INTERNAL_EXN("invalid dimention specified to Squeeze");
- }
-
- if (!do_squeeze[dim])
- ++num_squeezed;
- do_squeeze[dim] = true;
- }
- }
- else
- {
- // SqueezeDims empty, squeeze any dims with size == 1
- for (uint32_t dim = 0; dim < input_shape.rank(); ++dim)
- {
- if (input_shape.dim(dim) == 1)
- {
- do_squeeze[dim] = true;
- ++num_squeezed;
- }
- }
- }
-
- loco::TensorShape output_shape;
- output_shape.rank(input_shape.rank() - num_squeezed);
-
- for (uint32_t in_dim = 0, out_dim = 0; in_dim < input_shape.rank(); ++in_dim)
- {
- if (!do_squeeze[in_dim])
- {
- output_shape.dim(out_dim++) = input_shape.dim(in_dim);
- }
- }
-
- return loco::NodeShape{output_shape};
- }
+ loco::NodeShape visit(const luci::CircleSqueeze *node) final { return infer_squeeze(node); }
loco::NodeShape visit(const luci::CircleSub *node) final { return broadcast_xy(node); }
loco::NodeShape visit(const luci::CircleTanh *node) final { return use_x(node); }
- loco::NodeShape visit(const luci::CircleTile *node) final
- {
- const loco::DataType S32 = loco::DataType::S32;
-
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto multiples = loco::must_cast<luci::CircleConst *>(node->multiples());
-
- // TODO support non-const case
- // TODO support S64 type
- LUCI_ASSERT(multiples->dtype() == S32, "Only support int32 multiples");
- LUCI_ASSERT(multiples->rank() == 1, "multiples should be rank 1")
-
- uint32_t n = multiples->dim(0).value();
-
- LUCI_ASSERT(n == input_shape.rank(), "length of multiples should be the same with input rank");
-
- loco::TensorShape output_shape;
-
- output_shape.rank(input_shape.rank());
- for (uint32_t ni = 0; ni < n; ++ni)
- {
- int32_t multiple = multiples->at<S32>(ni);
- output_shape.dim(ni) = input_shape.dim(ni).value() * static_cast<uint32_t>(multiple);
- }
-
- return loco::NodeShape{output_shape};
- }
+ loco::NodeShape visit(const luci::CircleTile *node) final { return infer_tile(node); }
loco::NodeShape visit(const luci::CircleTopKV2 *node) final
{
return loco::NodeShape{input_shape};
}
- loco::NodeShape visit(const luci::CircleTranspose *node) final
- {
- auto input_shape = loco::shape_get(node->a()).as<loco::TensorShape>();
-
- auto perm_node = loco::must_cast<luci::CircleConst *>(node->perm());
-
- loco::TensorShape output_shape;
- output_shape.rank(input_shape.rank());
-
- assert(perm_node->dtype() == loco::DataType::S32);
- assert(input_shape.rank() == perm_node->template size<loco::DataType::S32>());
-
- for (uint32_t out_axis = 0; out_axis < output_shape.rank(); out_axis++)
- {
- auto in_axis = perm_node->template at<loco::DataType::S32>(out_axis);
- output_shape.dim(out_axis) = input_shape.dim(in_axis);
- }
-
- return output_shape;
- }
-
- loco::NodeShape visit(const luci::CircleUnique *node) final
- {
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
- assert(input_shape.rank() == 1);
-
- loco::TensorShape shape_output;
- shape_output = own_shape(node);
-
- return loco::NodeShape{shape_output};
- }
+ loco::NodeShape visit(const luci::CircleTranspose *node) final { return infer_transpose(node); }
loco::NodeShape visit(const luci::CircleTransposeConv *node) final
{
- // TransposeConv's output shape is written in its 'inputSizes' argument
- auto input_sizes_const = loco::must_cast<luci::CircleConst *>(node->inputSizes());
- // TODO support non-const type
- LUCI_ASSERT(input_sizes_const->dtype() == loco::DataType::S32, "Only support S32 dtype")
- LUCI_ASSERT(input_sizes_const->rank() == 1 && input_sizes_const->dim(0).value() == 4,
- "Only support rank 1 with 4 entries")
-
- loco::TensorShape shape;
-
- shape.rank(4);
- for (uint32_t axis = 0; axis < 4; ++axis)
- shape.dim(axis) = input_sizes_const->at<loco::DataType::S32>(axis);
-
- return loco::NodeShape{shape};
+ return infer_transpose_conv(node);
}
- loco::NodeShape visit(const luci::CircleUnpack *node) final
- {
- // CircleUnpack provides list(array) of Tensors which has one less dimension of the input
- // We'll set shape of CircleUnpack to shape of actual outputs
- // TODO fix this if any problem rises
- auto value_shape = loco::shape_get(node->value()).as<loco::TensorShape>();
-
- auto axis = node->axis();
- auto num = node->num();
- auto rank = static_cast<int32_t>(value_shape.rank());
-
- if (rank == 0)
- {
- // Unknown shape
- return use_own(node);
- }
-
- LUCI_ASSERT(-rank <= axis && axis < rank, "Axis is out of range");
-
- if (axis < 0)
- axis += rank;
-
- LUCI_ASSERT(num == static_cast<int32_t>(value_shape.dim(axis).value()),
- "num, axis maybe incorrect");
-
- loco::TensorShape output_shape;
- output_shape.rank(rank - 1);
-
- for (int32_t i = 0, o = 0; i < rank; ++i)
- {
- if (i != axis)
- output_shape.dim(o++) = value_shape.dim(i);
- }
+ loco::NodeShape visit(const luci::CircleUnpack *node) final { return infer_unpack(node); }
- return loco::NodeShape{output_shape};
- }
+ loco::NodeShape visit(const luci::CircleUnique *node) final { return infer_unique(node); }
loco::NodeShape visit(const luci::CircleWhere *node) final { return use_own(node); }
// Circle Only
loco::NodeShape visit(const luci::CircleBCQFullyConnected *node) final
{
- loco::TensorShape out_shape;
-
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto weights_clusters = loco::must_cast<luci::CircleConst *>(node->weights_clusters());
-
- LUCI_ASSERT(input_shape.rank() == 2, "Input rank of BCQFullyConnected should be 2");
-
- int32_t qbits_sum = 0;
- for (uint32_t i = 0; i < weights_clusters->dim(0).value(); ++i)
- {
- qbits_sum += weights_clusters->at<loco::DataType::S32>(i * 2 + 1);
- }
-
- out_shape.rank(2);
- out_shape.dim(0) = qbits_sum;
- out_shape.dim(1) = input_shape.dim(1);
-
- return loco::NodeShape{out_shape};
+ return infer_bcq_fully_connected(node);
}
- loco::NodeShape visit(const luci::CircleBCQGather *node) final
- {
- loco::TensorShape input_shape;
- loco::TensorShape output_shape;
-
- const auto input_binary_shape = loco::shape_get(node->input_binary()).as<loco::TensorShape>();
- const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
- auto axis = node->axis();
-
- auto input_clusters = loco::must_cast<luci::CircleConst *>(node->input_clusters());
- auto qbits_sum = 0;
- for (uint32_t i = 0; i < input_clusters->dim(0).value(); ++i)
- {
- qbits_sum += input_clusters->at<loco::DataType::S32>(i * 2 + 1);
- }
-
- input_shape.rank(2);
- input_shape.dim(0) = qbits_sum;
- input_shape.dim(1) = input_binary_shape.dim(1).value() * 32;
-
- output_shape.rank(input_shape.rank() - 1 + indices_shape.rank());
- int32_t outdim_index = 0;
- for (int32_t i = 0; i < axis; ++i)
- output_shape.dim(outdim_index++) = input_shape.dim(i);
- for (uint32_t i = 0; i < indices_shape.rank(); ++i)
- output_shape.dim(outdim_index++) = indices_shape.dim(i);
- for (uint32_t i = axis + 1; i < input_shape.rank(); ++i)
- output_shape.dim(outdim_index++) = input_shape.dim(i);
-
- return loco::NodeShape{output_shape};
- }
+ loco::NodeShape visit(const luci::CircleBCQGather *node) final { return infer_bcq_gather(node); }
loco::NodeShape visit(const luci::CircleInstanceNorm *node) final
{
}
// Virtual
- loco::NodeShape visit(const luci::CircleInput *node) final
- {
- loco::TensorShape shape;
-
- shape.rank(node->rank());
- for (uint32_t axis = 0; axis < node->rank(); axis++)
- shape.dim(axis) = node->dim(axis);
-
- return loco::NodeShape{shape};
- }
+ loco::NodeShape visit(const luci::CircleInput *node) final { return infer_input(node); }
- loco::NodeShape visit(const luci::CircleOutput *node) final
- {
- auto graph_outputs = node->graph()->outputs();
- auto graph_output = graph_outputs->at(node->index());
- auto output_shape = graph_output->shape();
-
- return loco::NodeShape{*output_shape};
- }
+ loco::NodeShape visit(const luci::CircleOutput *node) final { return infer_output(node); }
loco::NodeShape visit(const luci::CircleOutputDummy *node) final { return use_own(node); }
loco::NodeShape visit(const luci::CircleCustomOut *node) final { return use_own(node); }
- loco::NodeShape visit(const luci::CircleIfOut *node) final
- {
- /**
- * @note IF operator type and shape are that of the "then" and "else"
- * Graph Outputs.
- */
- auto circle_if = dynamic_cast<const luci::CircleIf *>(node->input());
- if (circle_if == nullptr)
- {
- INTERNAL_EXN("CircleIf IR is not configured correctly");
- }
-
- auto index = node->index();
- auto then_graph = circle_if->then_graph();
- auto else_graph = circle_if->else_graph();
- assert(then_graph != nullptr);
- assert(else_graph != nullptr);
-
- // shape and type are assumed to be same
- // these are checked at post_import_graph() in Import
- auto then_outputs = loco::output_nodes(then_graph);
- auto else_outputs = loco::output_nodes(else_graph);
- assert(then_outputs.size() == else_outputs.size());
- assert(index < static_cast<int32_t>(then_outputs.size()));
-
- auto then_out = loco::must_cast<luci::CircleOutput *>(then_outputs.at(index));
- auto else_out = loco::must_cast<luci::CircleOutput *>(else_outputs.at(index));
-
- auto then_graph_outputs = then_graph->outputs(); // loco::GraphOutput items
- auto else_graph_outputs = else_graph->outputs();
- assert(then_graph_outputs->size() == else_graph_outputs->size());
-
- auto then_graph_output = then_graph_outputs->at(then_out->index());
- auto else_graph_output = else_graph_outputs->at(else_out->index());
- (void)else_graph_output; // make compiler happy for unused variable warnings
- assert(*then_graph_output->shape() == *else_graph_output->shape());
-
- return loco::NodeShape{*then_graph_output->shape()};
- }
+ loco::NodeShape visit(const luci::CircleIfOut *node) final { return infer_if_out(node); }
loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4Out *node) final
{
- const loco::DataType S32 = loco::DataType::S32;
-
- auto nmsv4 = dynamic_cast<const luci::CircleNonMaxSuppressionV4 *>(node->input());
- if (nmsv4 == nullptr)
- INTERNAL_EXN("CircleNonMaxSuppressionV4 IR is not configured correctly");
-
- auto index = node->index();
- if (index == 1)
- return loco::TensorShape({0});
-
- assert(index == 0);
-
- auto unknown = loco::TensorShape{loco::Dimension()};
- auto max_output_size = dynamic_cast<const luci::CircleConst *>(nmsv4->max_output_size());
- if (max_output_size == nullptr)
- return unknown; // we need CircleConst for max output size
-
- LUCI_ASSERT(max_output_size->dtype() == S32, "Only support int32 for max_output_size");
-
- if (max_output_size->size<S32>() < 1)
- return unknown;
-
- auto max_output_size_value = uint32_t(max_output_size->at<S32>(0));
- return loco::TensorShape{max_output_size_value};
+ return infer_non_max_suppression_v4_out(node);
}
- loco::NodeShape visit(const luci::CircleSplitOut *node) final
+ loco::NodeShape visit(const luci::CircleNonMaxSuppressionV5Out *node) final
{
- const loco::DataType S32 = loco::DataType::S32;
-
- auto split = dynamic_cast<const luci::CircleSplit *>(node->input());
- if (split == nullptr)
- INTERNAL_EXN("CircleSplit IR is not configured correctly");
-
- loco::NodeShape unknown;
-
- auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
-
- auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim());
- if (split_dim == nullptr)
- return unknown; // we need CircleConst for split_dim
- LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim");
-
- assert(split_dim->size<S32>() == 1);
- auto split_dim_axis = split_dim->at<S32>(0);
- if (split_dim_axis < 0)
- split_dim_axis += split_shape.rank();
-
- auto split_dim_value = split_shape.dim(split_dim_axis).value();
- assert(split_dim_value % split->num_split() == 0);
- const int split_depth = split_dim_value / split->num_split();
-
- loco::TensorShape output_shape = split_shape;
-
- // All shapes are equally same
- output_shape.dim(split_dim_axis) = loco::Dimension(split_depth);
-
- return loco::NodeShape{output_shape};
+ return infer_non_max_suppression_v5_out(node);
}
- loco::NodeShape visit(const luci::CircleSplitVOut *node) final
- {
- const loco::DataType S32 = loco::DataType::S32;
-
- auto split = dynamic_cast<const luci::CircleSplitV *>(node->input());
- if (split == nullptr)
- INTERNAL_EXN("CircleSplit IR is not configured correctly");
-
- loco::NodeShape unknown;
-
- auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
-
- auto size_splits = dynamic_cast<const luci::CircleConst *>(split->size_splits());
- if (size_splits == nullptr)
- return unknown; // we need CircleConst for size_splits
- LUCI_ASSERT(size_splits->dtype() == S32, "Only support int32 for size_splits");
-
- auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim());
- if (split_dim == nullptr)
- return unknown; // we need CircleConst for split_dim
- LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim");
+ loco::NodeShape visit(const luci::CircleSplitOut *node) final { return infer_split_out(node); }
- // fetch axis
- assert(split_dim->size<S32>() == 1);
- auto split_dim_axis = split_dim->at<S32>(0);
- if (split_dim_axis < 0)
- split_dim_axis += split_shape.rank();
-
- // interpret size_splits values
- int32_t size_splits_count = static_cast<int32_t>(size_splits->size<S32>());
- assert(size_splits_count == split->num_split());
-
- int64_t minus_one_count = 0, size_splits_sum = 0;
- for (int32_t idx = 0; idx < size_splits_count; ++idx)
- {
- auto size = size_splits->at<S32>(idx);
- assert(size >= -1);
- if (size == -1)
- ++minus_one_count;
- else
- size_splits_sum += size;
- }
- if (minus_one_count > 1)
- INTERNAL_EXN("CircleSplitV size_splits has more than two -1 values");
-
- // calcuate this SplitVOut shape
- auto input_size = split_shape.dim(split_dim_axis).value();
- assert(size_splits_sum <= input_size);
-
- auto index_this = node->index();
- assert(0 <= index_this && index_this < split->num_split());
- auto split_depth = size_splits->at<S32>(index_this);
- if (split_depth == -1)
- split_depth = input_size - size_splits_sum;
-
- loco::TensorShape output_shape = split_shape;
-
- output_shape.dim(split_dim_axis) = loco::Dimension(split_depth);
-
- return loco::NodeShape{output_shape};
- }
+ loco::NodeShape visit(const luci::CircleSplitVOut *node) final { return infer_split_v_out(node); }
loco::NodeShape visit(const luci::CircleTopKV2Out *node) final
{
- const loco::DataType S32 = loco::DataType::S32;
-
- auto topkv2 = dynamic_cast<const luci::CircleTopKV2 *>(node->input());
- if (topkv2 == nullptr)
- INTERNAL_EXN("CircleSplit IR is not configured correctly");
-
- // shape of topkv2 is same as topkv2->input()
- auto input_shape = loco::shape_get(topkv2).as<loco::TensorShape>();
-
- auto node_k = loco::must_cast<const luci::CircleConst *>(topkv2->k());
- LUCI_ASSERT(node_k->dtype() == S32, "Only support Int32");
- assert(node_k->size<S32>() == 1);
-
- loco::TensorShape output_shape;
-
- output_shape.rank(input_shape.rank());
- for (uint32_t idx = 0; idx < input_shape.rank() - 1; ++idx)
- {
- output_shape.dim(idx) = input_shape.dim(idx);
- }
- output_shape.dim(input_shape.rank() - 1) = node_k->at<S32>(0);
-
- return loco::NodeShape{output_shape};
- }
-
- loco::NodeShape visit(const luci::CircleUniqueOut *node) final
- {
- auto unique = dynamic_cast<const luci::CircleUnique *>(node->input());
- if (unique == nullptr)
- {
- INTERNAL_EXN("CircleUnique IR is not configured correctly");
- }
-
- auto unique_shape = loco::shape_get(unique).as<loco::TensorShape>();
-
- return loco::NodeShape{unique_shape};
- }
-
- loco::NodeShape visit(const luci::CircleUnpackOut *node) final
- {
- auto unpack = dynamic_cast<const luci::CircleUnpack *>(node->input());
- if (unpack == nullptr)
- {
- INTERNAL_EXN("CircleUnpack IR is not configured correctly");
- }
-
- auto unpack_shape = loco::shape_get(unpack).as<loco::TensorShape>();
-
- return loco::NodeShape{unpack_shape};
+ return infer_top_k_v2_out(node);
}
- loco::NodeShape visit(const luci::CircleWhileOut *node) final
- {
- /**
- * @note WHILE operator's shape is the same with the "cond"
- * Graph input.
- */
- auto circle_while = dynamic_cast<const luci::CircleWhile *>(node->input());
- if (circle_while == nullptr)
- {
- INTERNAL_EXN("CircleWhile IR is not configured correctly");
- }
-
- auto index = node->index();
- auto cond_graph = circle_while->cond_graph();
- assert(cond_graph != nullptr);
-
- // Assumption: the index of CircleWhileOut matches with the index of input nodes returned by
- // loco::input_nodes
- auto cond_inputs = loco::input_nodes(cond_graph);
- auto cond_in = loco::must_cast<luci::CircleInput *>(cond_inputs.at(index));
-
- auto cond_graph_inputs = cond_graph->inputs();
- auto cond_graph_input = cond_graph_inputs->at(cond_in->index());
+ loco::NodeShape visit(const luci::CircleUniqueOut *node) final { return infer_unique_out(node); }
- auto cond_graph_input_shape = *cond_graph_input->shape();
- auto this_shape = own_shape(node);
+ loco::NodeShape visit(const luci::CircleUnpackOut *node) final { return infer_unpack_out(node); }
- if (!(this_shape == cond_graph_input_shape))
- {
- LOGGER(l);
- WARN(l) << "Warning: CircleWhileOut '" << node->name() << "' shape mispatch " << this_shape
- << " vs " << cond_graph_input_shape;
- }
-
- return loco::NodeShape{this_shape};
- }
+ loco::NodeShape visit(const luci::CircleWhileOut *node) final { return infer_while_out(node); }
};
} // namespace
return loco::dtype_get(node->boxes());
}
+ loco::DataType visit(const luci::CircleNonMaxSuppressionV5 *node) final
+ {
+ return loco::dtype_get(node->boxes());
+ }
+
loco::DataType visit(const luci::CircleNotEqual *) final { return loco::DataType::BOOL; }
loco::DataType visit(const luci::CirclePack *node) final
loco::DataType visit(const luci::CirclePad *node) final { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const luci::CirclePadV2 *node) final
+ {
+ return loco::dtype_get(node->input());
+ }
+
loco::DataType visit(const luci::CirclePow *node) final
{
// TODO make sure types cannot differ
return loco::DataType::S32;
}
+ loco::DataType visit(const luci::CircleNonMaxSuppressionV5Out *node) final
+ {
+ (void)node;
+ if (node->index() == 0 || node->index() == 2)
+ {
+ return loco::DataType::S32;
+ }
+ assert(node->index() == 1);
+ return loco::DataType::FLOAT32;
+ }
+
loco::DataType visit(const luci::CircleSplitOut *node) final
{
return loco::dtype_get(node->input());
addread(Mul_000)
addread(Mul_U8_000)
addread(Neg_000)
+addread(NonMaxSuppressionV4_000)
+addread(NonMaxSuppressionV4_001)
+addread(NonMaxSuppressionV5_000)
+addread(NonMaxSuppressionV5_001)
addread(NotEqual_000)
addread(OneHot_000)
addread(OneHot_001)
addread(Pack_U8_000)
addread(Pad_000)
addread(Pad_U8_000)
+addread(PadV2_000)
addread(Pow_000)
addread(PRelu_000)
addread(Range_000)
addread(Reshape_003)
addread(Reshape_U8_000)
addread(ResizeBilinear_000)
+addread(ResizeBilinear_U8_000)
addread(ResizeNearestNeighbor_000)
addread(ReverseSequence_000)
addread(ReverseV2_000)
addread(SpaceToBatchND_002)
addread(SpaceToBatchND_003)
addread(SpaceToDepth_000)
+addread(SpaceToDepth_U8_000)
addread(SparseToDense_000)
addread(Split_000)
addread(SplitV_000)
addread(Sum_000)
addread(Sum_001)
addread(Tanh_000)
+addread(Tanh_U8_000)
addread(Tile_000)
addread(Tile_U8_000)
addread(TopKV2_000)
addread(TopKV2_001)
addread(Transpose_000)
addread(TransposeConv_000)
+addread(Unique_000)
+addread(Unique_001)
+addread(Unique_002)
+addread(Unique_003)
+addread(Unique_U8_000)
+addread(Unique_U8_001)
addread(Unpack_000)
addread(Unpack_001)
addread(Unpack_002)
addwrite(Mul_000)
addwrite(Mul_U8_000)
addwrite(Neg_000)
+addwrite(NonMaxSuppressionV4_000)
+addwrite(NonMaxSuppressionV4_001)
+addwrite(NonMaxSuppressionV5_000)
+addwrite(NonMaxSuppressionV5_001)
addwrite(NotEqual_000)
addwrite(OneHot_000)
addwrite(OneHot_001)
addwrite(Pack_000)
addwrite(Pack_U8_000)
addwrite(Pad_000)
+addwrite(PadV2_000)
addwrite(Pow_000)
addwrite(PRelu_000)
addwrite(Range_000)
addwrite(Reshape_003)
addwrite(Reshape_U8_000)
addwrite(ResizeBilinear_000)
+addwrite(ResizeBilinear_U8_000)
addwrite(ResizeNearestNeighbor_000)
addwrite(ReverseSequence_000)
addwrite(ReverseV2_000)
addwrite(SpaceToBatchND_002)
addwrite(SpaceToBatchND_003)
addwrite(SpaceToDepth_000)
+addwrite(SpaceToDepth_U8_000)
addwrite(SparseToDense_000)
addwrite(Split_000)
addwrite(SplitV_000)
addwrite(Sum_000)
addwrite(Sum_001)
addwrite(Tanh_000)
+addwrite(Tanh_U8_000)
addwrite(Tile_000)
addwrite(Tile_U8_000)
addwrite(TopKV2_000)
addwrite(TopKV2_001)
addwrite(Transpose_000)
addwrite(TransposeConv_000)
+addwrite(Unique_000)
+addwrite(Unique_001)
+addwrite(Unique_002)
+addwrite(Unique_003)
+addwrite(Unique_U8_000)
+addwrite(Unique_U8_001)
addwrite(Unpack_000)
addwrite(Unpack_001)
addwrite(Unpack_002)
esac
done
+if [ -n ${INPUT_SHAPES} ] && [ ${TF_INTERFACE} = "--v2" ]; then
+ echo "Warning: if --v2 option is used, shape will be ignored"
+fi
+
if [ -z ${INPUT_PATH} ] || [ ! -e ${INPUT_PATH} ]; then
echo "Error: input model not found"
echo ""
trap show_err_onexit ERR
# generate temporary tflite file
-echo "python" "${DRIVER_PATH}/tf2tfliteV2.py" ${TF_INTERFACE} --input_path ${INPUT_PATH} \
---input_arrays ${INPUT_ARRAYS} --input_shapes ${INPUT_SHAPES} \
---output_path "${TMPDIR}/${MODEL_NAME}.tflite" \
---output_arrays ${OUTPUT_ARRAYS} > "${OUTPUT_PATH}.log"
-echo " " >> "${OUTPUT_PATH}.log"
+CONVERT_SCRIPT="python ${DRIVER_PATH}/tf2tfliteV2.py ${TF_INTERFACE} "
+CONVERT_SCRIPT+="--input_path ${INPUT_PATH} "
+CONVERT_SCRIPT+="--input_arrays ${INPUT_ARRAYS} "
+CONVERT_SCRIPT+="--output_path ${TMPDIR}/${MODEL_NAME}.tflite "
+CONVERT_SCRIPT+="--output_arrays ${OUTPUT_ARRAYS} "
+if [ ! -z ${INPUT_SHAPES} ]; then
+ CONVERT_SCRIPT+="--input_shapes ${INPUT_SHAPES} "
+fi
-python "${DRIVER_PATH}/tf2tfliteV2.py" ${TF_INTERFACE} --input_path ${INPUT_PATH} \
---input_arrays ${INPUT_ARRAYS} --input_shapes ${INPUT_SHAPES} \
---output_path "${TMPDIR}/${MODEL_NAME}.tflite" \
---output_arrays ${OUTPUT_ARRAYS} >> "${OUTPUT_PATH}.log" 2>&1
+echo ${CONVERT_SCRIPT} > "${OUTPUT_PATH}.log"
+echo "" >> "${OUTPUT_PATH}.log"
+$CONVERT_SCRIPT >> "${OUTPUT_PATH}.log" 2>&1
# convert .tflite to .circle
echo " " >> "${OUTPUT_PATH}.log"
# Install tensorflow
source "${VENV_ACTIVATE}"
+# TODO remove version number of 'pip==20.2.1 setuptools==49.3.0'
+# NOTE adding version is for temporary hotfix of setuptools 50.x.y version
python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
- install -U pip setuptools
+ install -U pip==20.2.1 setuptools==49.3.0
python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
install tensorflow-cpu==2.3.0
--- /dev/null
+{
+ "weights": [
+ [
+ [
+ [
+ 1.0,
+ 2.0
+ ],
+ [
+ -3.0,
+ -4.0
+ ]
+ ],
+ [
+ [
+ -5.0,
+ 6.0
+ ],
+ [
+ -7.0,
+ 8.0
+ ]
+ ]
+ ],
+ [
+ [
+ [
+ 4.0,
+ -2.0
+ ],
+ [
+ 3.0,
+ -1.0
+ ]
+ ],
+ [
+ [
+ -8.0,
+ -6.0
+ ],
+ [
+ 7.0,
+ 5.0
+ ]
+ ]
+ ]
+ ]
+}
--- /dev/null
+{
+ "weights": [
+ 4374,
+ 8747
+ ],
+ "scale": [
+ 0.0002286423499283808,
+ 0.0002286423499283808
+ ]
+}
--- /dev/null
+{
+ "scale": 0.0038869199343025684,
+ "zero_point": 0.0
+}
--- /dev/null
+{
+ "weights": [
+ [
+ [
+ [
+ 136,
+ 153
+ ],
+ [
+ 68,
+ 51
+ ]
+ ],
+ [
+ [
+ 34,
+ 221
+ ],
+ [
+ 0,
+ 255
+ ]
+ ]
+ ],
+ [
+ [
+ [
+ 204,
+ 102
+ ],
+ [
+ 187,
+ 119
+ ]
+ ],
+ [
+ [
+ 0,
+ 34
+ ],
+ [
+ 255,
+ 221
+ ]
+ ]
+ ]
+ ],
+ "scale": [
+ 0.058823529411764705,
+ 0.058823529411764705
+ ],
+ "zero_point": [
+ 119.0,
+ 136.0
+ ],
+ "min": [
+ -7.0,
+ -8.0
+ ],
+ "max": [
+ 8.0,
+ 7.0
+ ]
+}
--- /dev/null
+{
+ "scale": 0.05829785391688347,
+ "zero_point": 0.0
+}
--- /dev/null
+{
+ "min": 0.022708916887640953,
+ "max": 0.9911645770072937
+}
--- /dev/null
+{
+ "min": 0.0,
+ "max": 14.86595230102539
+}
--- /dev/null
+{
+ "weights": [
+ [
+ [
+ [
+ 1.0352935791015625,
+ 1.976470947265625,
+ 2.9568634033203125,
+ 3.95294189453125
+ ],
+ [
+ -8.972549438476562,
+ 9.976470947265625,
+ -11.011764526367188,
+ 11.9686279296875
+ ]
+ ],
+ [
+ [
+ 5.0039215087890625,
+ 6.023530960083008,
+ 7.035295486450195,
+ 8.01568603515625
+ ],
+ [
+ 13.027450561523438,
+ -14.023529052734375,
+ 14.988235473632812,
+ -16.0313720703125
+ ]
+ ]
+ ]
+ ]
+}
--- /dev/null
+{
+ "weights": [
+ 2985,
+ 5473,
+ 7578,
+ 9382
+ ],
+ "scale": [
+ 0.0003349798455903035,
+ 0.0003654325561959198,
+ 0.00039588526680153606,
+ 0.00042633797740715233
+ ]
+}
--- /dev/null
+{
+ "scale": 0.003882720833644271,
+ "zero_point": 0.0
+}
--- /dev/null
+{
+ "weights": [
+ [
+ [
+ [
+ 116,
+ 170,
+ 137,
+ 182
+ ],
+ [
+ 0,
+ 255,
+ 0,
+ 255
+ ]
+ ],
+ [
+ [
+ 162,
+ 213,
+ 177,
+ 219
+ ],
+ [
+ 255,
+ 0,
+ 255,
+ 0
+ ]
+ ]
+ ]
+ ],
+ "scale": [
+ 0.08627450980392157,
+ 0.09411764705882353,
+ 0.10196078431372549,
+ 0.10980392156862745
+ ],
+ "zero_point": [
+ 104.0,
+ 149.0,
+ 108.0,
+ 146.0
+ ],
+ "min": [
+ -8.972549019607843,
+ -14.023529411764706,
+ -11.011764705882353,
+ -16.031372549019608
+ ],
+ "max": [
+ 13.027450980392157,
+ 9.976470588235294,
+ 14.988235294117647,
+ 11.968627450980392
+ ]
+}
--- /dev/null
+{
+ "scale": 0.07756166160106659,
+ "zero_point": 0.0
+}
--- /dev/null
+{
+ "min": 0.003264044094830751,
+ "max": 0.9900938200950622
+}
--- /dev/null
+{
+ "min": 0.0,
+ "max": 19.778222274780273
+}
--- /dev/null
+{
+ "weights": [
+ [
+ 1.0039215087890625,
+ 2.007843017578125,
+ -3.0117650032043457,
+ -4.015686511993408,
+ -5.019608020782471,
+ 6.023530006408691,
+ -7.027451515197754,
+ 7.968626976013184,
+ 4.015686988830566,
+ -2.007843017578125,
+ 3.0117645263671875,
+ -1.0039215087890625,
+ -7.9686279296875,
+ -6.023530006408691,
+ 7.027451515197754,
+ 5.019608497619629
+ ],
+ [
+ 1.0039215087890625,
+ 2.007843017578125,
+ -3.0117650032043457,
+ -4.015686511993408,
+ -5.019608020782471,
+ 6.023530006408691,
+ -7.027451515197754,
+ 7.968626976013184,
+ 4.015686988830566,
+ -2.007843017578125,
+ 3.0117645263671875,
+ -1.0039215087890625,
+ -7.9686279296875,
+ -6.023530006408691,
+ 7.027451515197754,
+ 5.019608497619629
+ ],
+ [
+ 1.0039215087890625,
+ 2.007843017578125,
+ -3.0117650032043457,
+ -4.015686511993408,
+ -5.019608020782471,
+ 6.023530006408691,
+ -7.027451515197754,
+ 7.968626976013184,
+ 4.015686988830566,
+ -2.007843017578125,
+ 3.0117645263671875,
+ -1.0039215087890625,
+ -7.9686279296875,
+ -6.023530006408691,
+ 7.027451515197754,
+ 5.019608497619629
+ ],
+ [
+ 1.0039215087890625,
+ 2.007843017578125,
+ -3.0117650032043457,
+ -4.015686511993408,
+ -5.019608020782471,
+ 6.023530006408691,
+ -7.027451515197754,
+ 7.968626976013184,
+ 4.015686988830566,
+ -2.007843017578125,
+ 3.0117645263671875,
+ -1.0039215087890625,
+ -7.9686279296875,
+ -6.023530006408691,
+ 7.027451515197754,
+ 5.019608497619629
+ ]
+ ]
+}
--- /dev/null
+{
+ "weights": [
+ 4099,
+ -8199,
+ -12298,
+ 16398
+ ],
+ "scale": [
+ 0.00024393631821001058,
+ 0.00024393631821001058,
+ 0.00024393631821001058,
+ 0.00024393631821001058
+ ]
+}
--- /dev/null
+{
+ "scale": 0.003887734841555357,
+ "zero_point": 0.0
+}
--- /dev/null
+{
+ "scale": 0.061938945204019547,
+ "zero_point": 171.0
+}
--- /dev/null
+{
+ "weights": [
+ [
+ 144,
+ 160,
+ 80,
+ 64,
+ 48,
+ 224,
+ 16,
+ 255,
+ 192,
+ 96,
+ 176,
+ 112,
+ 1,
+ 32,
+ 240,
+ 208
+ ],
+ [
+ 144,
+ 160,
+ 80,
+ 64,
+ 48,
+ 224,
+ 16,
+ 255,
+ 192,
+ 96,
+ 176,
+ 112,
+ 1,
+ 32,
+ 240,
+ 208
+ ],
+ [
+ 144,
+ 160,
+ 80,
+ 64,
+ 48,
+ 224,
+ 16,
+ 255,
+ 192,
+ 96,
+ 176,
+ 112,
+ 1,
+ 32,
+ 240,
+ 208
+ ],
+ [
+ 144,
+ 160,
+ 80,
+ 64,
+ 48,
+ 224,
+ 16,
+ 255,
+ 192,
+ 96,
+ 176,
+ 112,
+ 1,
+ 32,
+ 240,
+ 208
+ ]
+ ],
+ "scale": [
+ 0.06274509803921569,
+ 0.06274509803921569,
+ 0.06274509803921569,
+ 0.06274509803921569
+ ],
+ "zero_point": [
+ 128.0,
+ 128.0,
+ 128.0,
+ 128.0
+ ],
+ "min": [
+ -8.031372549019608,
+ -8.031372549019608,
+ -8.031372549019608,
+ -8.031372549019608
+ ],
+ "max": [
+ 7.968627450980392,
+ 7.968627450980392,
+ 7.968627450980392,
+ 7.968627450980392
+ ]
+}
--- /dev/null
+{
+ "min": 0.010438590832054616,
+ "max": 0.9913724160194397
+}
--- /dev/null
+{
+ "min": -10.584291763305664,
+ "max": 5.210139312744141
+}
--- /dev/null
+{
+ "weights": [
+ [
+ [
+ [
+ 0.960784912109375,
+ 2.0588245391845703
+ ],
+ [
+ -3.0196075439453125,
+ -3.980391502380371
+ ],
+ [
+ 4.9411773681640625,
+ -6.039215087890625
+ ]
+ ],
+ [
+ [
+ 7.0,
+ 7.960784912109375
+ ],
+ [
+ -9.058823585510254,
+ -10.019607543945312
+ ],
+ [
+ 10.980392456054688,
+ -11.941176414489746
+ ]
+ ],
+ [
+ [
+ 13.039216995239258,
+ 14.000001907348633
+ ],
+ [
+ -14.960784912109375,
+ -16.05882453918457
+ ],
+ [
+ 17.019607543945312,
+ -17.980392456054688
+ ]
+ ]
+ ]
+ ]
+}
--- /dev/null
+{
+ "scale": 0.0038701011799275875,
+ "zero_point": 0.0
+}
--- /dev/null
+{
+ "weights": [
+ [
+ [
+ [
+ 138,
+ 146
+ ],
+ [
+ 109,
+ 102
+ ],
+ [
+ 167,
+ 87
+ ]
+ ],
+ [
+ [
+ 182,
+ 189
+ ],
+ [
+ 65,
+ 58
+ ],
+ [
+ 211,
+ 44
+ ]
+ ],
+ [
+ [
+ 226,
+ 233
+ ],
+ [
+ 22,
+ 14
+ ],
+ [
+ 255,
+ 0
+ ]
+ ]
+ ]
+ ],
+ "scale": [
+ 0.13725490196078433
+ ],
+ "zero_point": [
+ 131.0
+ ],
+ "min": [
+ -17.980392156862745
+ ],
+ "max": [
+ 17.019607843137255
+ ]
+}
--- /dev/null
+{
+ "scale": 0.25486624240875244,
+ "zero_point": 178.0
+}
--- /dev/null
+{
+ "min": 0.006121497452259064,
+ "max": 0.9868757891654968
+}
--- /dev/null
+{
+ "min": -45.46586318969727,
+ "max": 19.525028419494628
+}
+addTest(Conv2D_004 channel uint8)
addTest(Conv2D_004 layer uint8)
+addTest(DepthwiseConv2D_002 channel uint8)
addTest(DepthwiseConv2D_002 layer uint8)
+addTest(FullyConnected_003 channel uint8)
addTest(FullyConnected_003 layer uint8)
+addTest(TransposeConv_001 channel uint8)
addTest(TransposeConv_001 layer uint8)
--- /dev/null
+0.19242816,0.44059092,0.06788187,0.04543579,0.14106855,0.6858487 ,0.6214997 ,0.31582046,0.859484 ,0.3664256 ,0.86936104,0.871024 ,0.68752515,0.5296719 ,0.99137205,0.02956272,0.14838405,0.69830126,0.22359788,0.9060323 ,0.7141239 ,0.5573066 ,0.96645916,0.11426282
--- /dev/null
+0.57016104,0.2788207 ,0.8045938 ,0.7589986 ,0.81506515,0.8411593 ,0.4162234 ,0.1664247 ,0.5584996 ,0.7799966 ,0.4213713 ,0.97587234,0.79440975,0.5089373 ,0.90030503,0.78015554,0.10080549,0.5115089 ,0.77238286,0.9580212 ,0.8758745 ,0.14367636,0.4304664 ,0.55175275
--- /dev/null
+0.6224246 ,0.30448085,0.29629433,0.44483584,0.30473125,0.6186932 ,0.45563242,0.5394331 ,0.22901213,0.4313142 ,0.4019574 ,0.02263176,0.3806077 ,0.27828163,0.23962335,0.26323524,0.6125012 ,0.5459546 ,0.6340052 ,0.19074932,0.2216875 ,0.77709603,0.03312786,0.02945002
--- /dev/null
+0.7524557 ,0.5408983 ,0.07039106,0.5143847 ,0.04857475,0.7305833 ,0.36986747,0.42291477,0.90452653,0.43744263,0.24857366,0.7537328 ,0.04559262,0.65276045,0.3851062 ,0.49503985,0.37213495,0.10627239,0.7085863 ,0.1913133 ,0.08057284,0.31767172,0.9685745 ,0.5942544
--- /dev/null
+0.16251074,0.5574537 ,0.5857036 ,0.877607 ,0.29711136,0.02456062,0.8250261 ,0.21300122,0.5064036 ,0.5882086 ,0.7736793 ,0.09394809,0.98618525,0.6611699 ,0.5001983 ,0.06507304,0.88984424,0.57143325,0.07953393,0.02649987,0.9283147 ,0.65522593,0.18371649,0.12332761
--- /dev/null
+0.4383064 ,0.8700848 ,0.86010957,0.08396256,0.7963264 ,0.4156023 ,0.28146362,0.82196397,0.9921972 ,0.09969576,0.23987265,0.6734369 ,0.5469574 ,0.20805728,0.32639247,0.76773816
--- /dev/null
+0.4565062 ,0.92036587,0.47286046,0.18118097,0.5347498 ,0.91550153,0.300375 ,0.00581101,0.38686675,0.91085213,0.07278002,0.35556316,0.13014294,0.7274307 ,0.13867259,0.27517235
--- /dev/null
+0.6900174 ,0.28745306,0.30255774,0.5095008 ,0.6689176 ,0.4914624 ,0.92629427,0.504829 ,0.33514255,0.49005315,0.08569656,0.60965323,0.82193315,0.12380831,0.06971261,0.8822662
--- /dev/null
+0.4240734 ,0.5430392 ,0.7536325 ,0.46065134,0.00315792,0.02719985,0.7080977 ,0.24389206,0.8114604 ,0.13292362,0.346597 ,0.70247084,0.55753845,0.01969242,0.82950485,0.66249627
--- /dev/null
+0.31586212,0.19079527,0.9161567 ,0.8614566 ,0.9018915 ,0.34651542,0.62554437,0.05542602,0.8268219 ,0.38112178,0.9396123 ,0.49426383,0.8034765 ,0.72456217,0.5404088 ,0.8512237
--- /dev/null
+0.12934422,0.01033248,0.85648465,0.77248603,0.5128501 ,0.2453174 ,0.05065866,0.6601359 ,0.984665 ,0.57697976,0.58360994,0.79360527,0.90097004,0.26150337,0.1575109 ,0.9711614
--- /dev/null
+0.23895125,0.30275205,0.9916519 ,0.52355504,0.2577219 ,0.03600567,0.75446343,0.8064663 ,0.07550113,0.919774 ,0.84333146,0.48820078,0.31365713,0.97172034,0.7472666 ,0.66353893
--- /dev/null
+0.6186688 ,0.4357826 ,0.63239735,0.64489084,0.17722449,0.7146202 ,0.5182415 ,0.45549247,0.21316396,0.9769707 ,0.18412311,0.05855984,0.6755795 ,0.8516815 ,0.20649713,0.32990783
--- /dev/null
+0.15501449,0.67026544,0.2957976 ,0.95577955,0.6215903 ,0.2029572 ,0.6069057 ,0.60434276,0.01298514,0.66787016,0.02053251,0.34120578,0.63562113,0.9166186 ,0.7134427 ,0.95491254
--- /dev/null
+0.46877268,0.36748132,0.09441566,0.4476946 ,0.08834982,0.5387882 ,0.8359256 ,0.4374628 ,0.3835091 ,0.3577151 ,0.49470654,0.6017202 ,0.3546875 ,0.64218026,0.69008195,0.37631917
--- /dev/null
+0.5177879 ,0.10991199,0.19134527,0.25834408,0.16297385,0.5499753 ,0.8782323 ,0.74750453,0.16825114,0.72425395,0.68458 ,0.9399099 ,0.81214494,0.73325175,0.6407931 ,0.02865177,0.04341139,0.44781777,0.59848577,0.72099334,0.654926 ,0.93810713,0.5193446 ,0.8657371 ,0.50826824,0.10122011,0.6946167 ,0.5009533 ,0.27305812,0.7708204 ,0.14410722,0.7092205
--- /dev/null
+0.57410187,0.5534829 ,0.434663 ,0.55580896,0.9040647 ,0.16827786,0.82538676,0.25387943,0.7611494 ,0.49195638,0.00602222,0.20389748,0.541152 ,0.962896 ,0.37785006,0.9330408 ,0.9868882 ,0.57428783,0.830525 ,0.67987496,0.5576374 ,0.4303 ,0.8442439 ,0.21868347,0.45653513,0.7913927 ,0.31475154,0.6723579 ,0.5749264 ,0.07061622,0.6450232 ,0.52825755
--- /dev/null
+0.49751657,0.3004485 ,0.11624487,0.17704253,0.9022095 ,0.24667789,0.9204152 ,0.09801941,0.9194739 ,0.35418576,0.36659864,0.4962548 ,0.83799136,0.58057517,0.2948883 ,0.28411615,0.14429809,0.8460358 ,0.7026028 ,0.25956342,0.5251088 ,0.06569998,0.01754393,0.45209908,0.95638806,0.6044543 ,0.17229715,0.6828144 ,0.8684328 ,0.5829665 ,0.1456113 ,0.3334334
--- /dev/null
+0.00850414,0.5746211 ,0.7659193 ,0.8643168 ,0.36803156,0.08386383,0.76002747,0.19255683,0.05220222,0.18169314,0.88597506,0.6793377 ,0.45955214,0.16984127,0.5275391 ,0.910098 ,0.64607793,0.3997594 ,0.38601097,0.40899974,0.10289235,0.896202 ,0.22364503,0.30232555,0.11873382,0.07853477,0.20674925,0.35148785,0.02880615,0.09937044,0.4382221 ,0.53562754
--- /dev/null
+0.8097857 ,0.4602844 ,0.01609277,0.7885611 ,0.9090256 ,0.75475484,0.98657864,0.5927874 ,0.73494065,0.374227 ,0.23557834,0.6020654 ,0.0122237 ,0.37126908,0.38277507,0.67635936,0.4139088 ,0.8625733 ,0.37775922,0.15304309,0.6196326 ,0.4827059 ,0.76868814,0.5530773 ,0.3336473 ,0.11217184,0.5877591 ,0.5325879 ,0.48493427,0.6317438 ,0.9385114 ,0.02825027
#include "RecordMinMax.h"
#include "RecordFunction.h"
-#include "CircleExpContract.h"
#include "MinMaxObserver.h"
#include "HDF5Importer.h"
#include <luci/Importer.h>
#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
#include <luci/IR/CircleQuantParam.h>
#include <algorithm>
}
std::vector<char> model_data((std::istreambuf_iterator<char>(fs)),
std::istreambuf_iterator<char>());
+
+ // Verify flatbuffers
+ flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model_data.data()),
+ model_data.size()};
+ if (!circle::VerifyModelBuffer(verifier))
+ {
+ throw std::runtime_error("ERROR: Failed to verify circle '" + input_model_path + "'");
+ }
+
_module = luci::Importer().importModule(circle::GetModel(model_data.data()));
if (_module == nullptr)
{
// Export to output Circle file
luci::CircleExporter exporter;
- CircleExpContract contract(_module.get(), output_model_path);
+
+ luci::CircleFileExpContract contract(_module.get(), output_model_path);
if (!exporter.invoke(&contract))
{
+nnas_find_package(Protobuf QUIET)
+
+if(NOT Protobuf_FOUND)
+ message(STATUS "Build souschef: FAILED (missing Protobuf")
+ return()
+endif(NOT Protobuf_FOUND)
+
file(GLOB_RECURSE SOURCES "src/*.cpp")
add_library(souschef STATIC ${SOURCES})
set_target_properties(souschef PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(souschef PUBLIC include)
+target_link_libraries(souschef PUBLIC libprotobuf)
#include <vector>
+#include <google/protobuf/repeated_field.h>
+
namespace souschef
{
std::vector<T> _vec;
};
+template <typename T> std::vector<T> as_vector(const ::google::protobuf::RepeatedPtrField<T> &field)
+{
+ std::vector<T> res;
+ for (const auto &elem : field)
+ {
+ res.emplace_back(elem);
+ }
+ return res;
+}
+
+template <typename T> Dataset<T> as_dataset(const ::google::protobuf::RepeatedPtrField<T> &field)
+{
+ return Dataset<T>(as_vector<T>(field));
+}
+
} // namespace souschef
#endif // __SOUSCHEF_DATASET_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SOUSCHEF_DIMS_H__
+#define __SOUSCHEF_DIMS_H__
+
+#include <functional>
+#include <numeric>
+#include <vector>
+
+namespace souschef
+{
+
+template <typename T> using Dims = std::vector<T>;
+
+template <typename SHAPETYPE> Dims<int32_t> as_dims(const SHAPETYPE &shape)
+{
+ std::vector<int32_t> res;
+
+ for (auto &dim : shape.dim())
+ {
+ res.emplace_back(static_cast<int32_t>(dim));
+ }
+
+ return res;
+}
+
+int32_t element_count(const Dims<int32_t> &dims)
+{
+ return std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<int32_t>());
+}
+
+} // namespace souschef
+
+#endif // __SOUSCHEF_DIMS_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SOUSCHEF_TENSOR_FILLER_H__
+#define __SOUSCHEF_TENSOR_FILLER_H__
+
+#include <map>
+#include <vector>
+
+namespace souschef
+{
+
+class TensorFiller
+{
+public:
+ virtual ~TensorFiller() = default;
+
+ /**
+ * @brief This will record the tensor by index, if it needs filler option,
+ * such as kernel, bias.
+ */
+ void set_tensor_filler(uint32_t tensor_index) { _tensor_filler[tensor_index] = true; }
+
+ /**
+ * @brief This will store int32 filler values such as reshape information for the tensor
+ */
+ void set_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
+ {
+ _tensor_filler_vint32[tensor_index] = expvalues;
+ }
+
+ void set_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
+ {
+ _tensor_filler_vfloat[tensor_index] = expvalues;
+ }
+
+ /**
+ * @brief This will return true if the tensor by index, needs a filler option.
+ */
+ bool get_tensor_filler(uint32_t tensor_index)
+ {
+ auto it = _tensor_filler.find(tensor_index);
+ if (it != _tensor_filler.end())
+ {
+ return it->second;
+ }
+ return false;
+ }
+
+ /**
+ * @brief This will return true if the tensor by index, needs a int array filler option.
+ */
+ bool get_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
+ {
+ auto it = _tensor_filler_vint32.find(tensor_index);
+ if (it != _tensor_filler_vint32.end())
+ {
+ expvalues = it->second;
+ return true;
+ }
+ return false;
+ }
+
+ bool get_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
+ {
+ auto it = _tensor_filler_vfloat.find(tensor_index);
+ if (it != _tensor_filler_vfloat.end())
+ {
+ expvalues = it->second;
+ return true;
+ }
+ return false;
+ }
+
+private:
+ std::map<uint32_t, bool> _tensor_filler{};
+ std::map<uint32_t, std::vector<int32_t>> _tensor_filler_vint32{};
+ std::map<uint32_t, std::vector<float>> _tensor_filler_vfloat{};
+};
+
+} // namespace souschef
+
+#endif // __SOUSCHEF_TENSOR_FILLER_H__
* limitations under the License.
*/
-#include "CircleExpContract.h"
+#include "souschef/Dims.h"
-#include <oops/InternalExn.h>
-
-#include <fstream>
-#include <iostream>
-
-bool CircleExpContract::store(const char *ptr, const size_t size) const
-{
- if (!ptr)
- INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
-
- std::ofstream fs(_filepath.c_str(), std::ofstream::binary);
- fs.write(ptr, size);
-
- return fs.good();
-}
+// NOTE Do NOT delete this file; this file checks the completeness of 'Dims.h'
#include "OpChefs.h"
#include <souschef/Dataset.h>
+#include <souschef/Dims.h>
#include "Log.h"
#include <sstream>
#include <stdexcept>
-namespace
-{
-
using namespace souschef;
-template <typename T> std::vector<T> as_vector(const ::google::protobuf::RepeatedPtrField<T> &field)
-{
- std::vector<T> res;
- for (const auto &elem : field)
- {
- res.emplace_back(elem);
- }
- return res;
-}
-
-template <typename T> Dataset<T> as_dataset(const ::google::protobuf::RepeatedPtrField<T> &field)
-{
- return Dataset<T>(as_vector<T>(field));
-}
-
-} // namespace
-
-namespace
-{
-
-template <typename T> using Dims = std::vector<T>;
-
-Dims<int32_t> as_dims(const tflchef::TensorShape &shape)
-{
- std::vector<int32_t> res;
-
- for (auto &dim : shape.dim())
- {
- res.emplace_back(static_cast<int32_t>(dim));
- }
-
- return res;
-}
-
-int32_t element_count(const Dims<int32_t> &dims)
-{
- return std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<int32_t>());
-}
-
-} // namespace
-
namespace
{
* limitations under the License.
*/
-#include "CircleExpContract.h"
+#include "NonMaxSuppressionV5.h"
-#include <oops/InternalExn.h>
-
-#include <fstream>
-#include <iostream>
-
-namespace record_minmax
-{
-
-bool CircleExpContract::store(const char *ptr, const size_t size) const
+flatbuffers::Offset<void> NonMaxSuppressionV5Chef::value(flatbuffers::FlatBufferBuilder &fbb) const
{
- if (!ptr)
- INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
+ tflite::NonMaxSuppressionV5OptionsBuilder options_builder{fbb};
- std::ofstream fs(_filepath, std::ofstream::binary);
- fs.write(ptr, size);
-
- return fs.good();
+ return options_builder.Finish().Union();
}
-} // namespace record_minmax
+std::unique_ptr<OpChef>
+NonMaxSuppressionV5ChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new NonMaxSuppressionV5Chef{operation}};
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_NON_MAX_SUPPRESSION_V5_H__
+#define __OP_NON_MAX_SUPPRESSION_V5_H__
+
+#include "OpChef.h"
+
+class NonMaxSuppressionV5Chef final : public OpChef
+{
+public:
+ explicit NonMaxSuppressionV5Chef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override
+ {
+ return tflite::BuiltinOperator_NON_MAX_SUPPRESSION_V5;
+ }
+
+ tflite::BuiltinOptions type(void) const override
+ {
+ return tflite::BuiltinOptions_NonMaxSuppressionV5Options;
+ }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct NonMaxSuppressionV5ChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_NON_MAX_SUPPRESSION_V5_H__
OP_CHEF(Mul, MulChefFactory)
OP_CHEF(Neg, NegChefFactory)
OP_CHEF(NonMaxSuppressionV4, NonMaxSuppressionV4ChefFactory)
+OP_CHEF(NonMaxSuppressionV5, NonMaxSuppressionV5ChefFactory)
OP_CHEF(NotEqual, NotEqualChefFactory)
OP_CHEF(OneHot, OneHotChefFactory)
OP_CHEF(Pack, PackChefFactory)
#include "Op/Mul.h"
#include "Op/Neg.h"
#include "Op/NonMaxSuppressionV4.h"
+#include "Op/NonMaxSuppressionV5.h"
#include "Op/NotEqual.h"
#include "Op/OneHot.h"
#include "Op/Pack.h"
// None
}
+message NonMaxSuppressionV5Options {
+ // None
+}
+
message NotEqualOptions {
// None
}
// HardSwishOptions 196
optional DepthToSpaceOptions depth_to_space_options = 197;
optional NonMaxSuppressionV4Options non_max_suppression_v4_options = 198;
- // NonMaxSuppressionV5Options 199
+ optional NonMaxSuppressionV5Options non_max_suppression_v5_options = 199;
optional ScatterNdOptions scatter_nd_options = 200;
optional NotEqualOptions notequal_options = 201;
optional ExpandDimsOptions expand_dims_options = 202;
target_link_libraries(tflchef_tflite mio_tflite)
target_link_libraries(tflchef_tflite stdex)
target_link_libraries(tflchef_tflite cwrap)
+target_link_libraries(tflchef_tflite souschef)
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NonMaxSuppressionV5.h"
+
+#include "Convert.h"
+#include "FillerHelper.h"
+
+namespace tflchef
+{
+
+void TFliteOpNonMaxSuppressionV5::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ const auto &inputs = *op->inputs();
+
+ const tflite::Tensor *max_output_size_tensor = import->tensors()->Get(inputs[2]);
+ assert(max_output_size_tensor->type() == tflite::TensorType::TensorType_INT32);
+
+ const tflite::Tensor *iou_threshold_tensor = import->tensors()->Get(inputs[3]);
+ assert(iou_threshold_tensor->type() == tflite::TensorType::TensorType_FLOAT32);
+
+ const tflite::Tensor *score_threshold_tensor = import->tensors()->Get(inputs[4]);
+ assert(score_threshold_tensor->type() == tflite::TensorType::TensorType_FLOAT32);
+
+ const tflite::Tensor *soft_nms_sigma_tensor = import->tensors()->Get(inputs[5]);
+ assert(soft_nms_sigma_tensor->type() == tflite::TensorType::TensorType_FLOAT32);
+
+ for (int32_t index = 2; index < 6; ++index)
+ {
+ fill_tensor_to_import(index, import);
+ }
+}
+
+tflchef::Operation *TFliteOpNonMaxSuppressionV5::build(const tflite::Operator *op,
+ TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("NonMaxSuppressionV5");
+
+ return operation;
+}
+
+} // namespace tflchef
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_NON_MAX_SUPPRESSION_V5_H__
+#define __TFLITE_OP_NON_MAX_SUPPRESSION_V5_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for NON_MAX_SUPPRESSION_V5
+ */
+class TFliteOpNonMaxSuppressionV5 : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_NON_MAX_SUPPRESSION_V5_H__
#include <mio/tflite/schema_generated.h>
+#include <souschef/TensorFiller.h>
+
#include <tflchef.pb.h>
#include <map>
/**
* @brief Loads TF lite file and provides helpers to access attributes
*/
-class TFliteImport
+class TFliteImport : public souschef::TensorFiller
{
public:
TFliteImport(const tflite::Model *model);
std::string opcode_name(const tflite::Operator *op) const;
size_t buffer_info(const tflite::Tensor *tensor, const uint8_t **buff_data);
- /**
- * @brief This will record the tensor by index, if it needs filler option,
- * such as kernel, bias.
- */
- void set_tensor_filler(uint32_t tensor_index) { _tensor_filler[tensor_index] = true; }
-
- /**
- * @brief This will store int32 filler values such as reshape information for the tensor
- */
- void set_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
- {
- _tensor_filler_vint32[tensor_index] = expvalues;
- }
-
- void set_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
- {
- _tensor_filler_vfloat[tensor_index] = expvalues;
- }
-
- /**
- * @brief This will return true if the tensor by index, needs a filler option.
- */
- bool get_tensor_filler(uint32_t tensor_index)
- {
- auto it = _tensor_filler.find(tensor_index);
- if (it != _tensor_filler.end())
- {
- return it->second;
- }
- return false;
- }
-
- /**
- * @brief This will return true if the tensor by index, needs a int array filler option.
- */
- bool get_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
- {
- auto it = _tensor_filler_vint32.find(tensor_index);
- if (it != _tensor_filler_vint32.end())
- {
- expvalues = it->second;
- return true;
- }
- return false;
- }
-
- bool get_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
- {
- auto it = _tensor_filler_vfloat.find(tensor_index);
- if (it != _tensor_filler_vfloat.end())
- {
- expvalues = it->second;
- return true;
- }
- return false;
- }
-
private:
const TFliteSubGraphs_t *_subgraphs{nullptr};
const TFliteBuffers_t *_buffers{nullptr};
std::vector<const tflite::OperatorCode *> _op_codes{};
std::vector<int32_t> _inputs{};
std::vector<int32_t> _outputs{};
-
- std::map<uint32_t, bool> _tensor_filler{};
- std::map<uint32_t, std::vector<int32_t>> _tensor_filler_vint32{};
- std::map<uint32_t, std::vector<float>> _tensor_filler_vfloat{};
};
} // namespace tflchef
#include "Op/Mul.h"
#include "Op/Neg.h"
#include "Op/NonMaxSuppressionV4.h"
+#include "Op/NonMaxSuppressionV5.h"
#include "Op/NotEqual.h"
#include "Op/OneHot.h"
#include "Op/Pack.h"
REG_TFL_OP(MUL, TFliteOpMul);
REG_TFL_OP(NEG, TFliteOpNeg);
REG_TFL_OP(NON_MAX_SUPPRESSION_V4, TFliteOpNonMaxSuppressionV4);
+ REG_TFL_OP(NON_MAX_SUPPRESSION_V5, TFliteOpNonMaxSuppressionV5);
REG_TFL_OP(NOT_EQUAL, TFliteOpNotEqual);
REG_TFL_OP(ONE_HOT, TFliteOpOneHot);
REG_TFL_OP(PACK, TFliteOpPack);
_op_map[tflite::BuiltinOperator_MIRROR_PAD] = make_unique<MirrorPadPrinter>();
_op_map[tflite::BuiltinOperator_MUL] = make_unique<MulPrinter>();
// There is no Option for NON_MAX_SUPPRESSION_V4
+ // There is no Option for NON_MAX_SUPPRESSION_V5
_op_map[tflite::BuiltinOperator_ONE_HOT] = make_unique<OneHotPrinter>();
_op_map[tflite::BuiltinOperator_PACK] = make_unique<PackPrinter>();
// There is no Option for PAD
+ // There is no Option for PADV2
// There is no Option for PRELU
// There is no Option for RELU
// There is no Option for RELU6
#include "BuildBuiltinOptions/MulOptions.h"
#include "BuildBuiltinOptions/NegOptions.h"
#include "BuildBuiltinOptions/NonMaxSuppressionV4Options.h"
+#include "BuildBuiltinOptions/NonMaxSuppressionV5Options.h"
#include "BuildBuiltinOptions/NotEqualOptions.h"
#include "BuildBuiltinOptions/OneHotOptions.h"
#include "BuildBuiltinOptions/PackOptions.h"
* limitations under the License.
*/
-#include "CircleExpContract.h"
+#include "NonMaxSuppressionV5Options.h"
-#include <oops/InternalExn.h>
-
-#include <fstream>
-#include <iostream>
-
-bool CircleExpContract::store(const char *ptr, const size_t size) const
+namespace tflite2circle
{
- if (!ptr)
- INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
-
- std::ofstream fs(_filepath.c_str(), std::ofstream::binary);
- fs.write(ptr, size);
- return fs.good();
+flatbuffers::Offset<circle::NonMaxSuppressionV5Options>
+build_circle_NonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *)
+{
+ circle::NonMaxSuppressionV5OptionsBuilder builtin_options_builder{fb};
+ return builtin_options_builder.Finish();
}
+
+} // namespace tflite2circle
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_NON_MAX_SUPPRESSION_V5_OPTIONS_H__
+#define __BBO_NON_MAX_SUPPRESSION_V5_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::NonMaxSuppressionV5Options>
+build_circle_NonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_NON_MAX_SUPPRESSION_V5_OPTIONS_H__
// is_variable
bool is_variable = it->is_variable();
+ flatbuffers::Offset<circle::SparsityParameters> sparsity;
+ // sparsity
+ if (it->sparsity())
+ {
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order;
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map;
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
+ dim_metadata;
+
+ // traversal_order
+ if (it->sparsity()->traversal_order())
+ {
+ auto traversal_order_vec = std::vector<int32_t>{
+ it->sparsity()->traversal_order()->begin(), it->sparsity()->traversal_order()->end()};
+ traversal_order = fb->CreateVector(traversal_order_vec);
+ }
+
+ // block_map
+ if (it->sparsity()->block_map())
+ {
+ auto block_map_vec = std::vector<int32_t>{it->sparsity()->block_map()->begin(),
+ it->sparsity()->block_map()->end()};
+ block_map = fb->CreateVector(block_map_vec);
+ }
+
+ // dim_metadata
+ std::vector<flatbuffers::Offset<circle::DimensionMetadata>> dim_metadata_vec;
+ auto tflite_dim_metadata = it->sparsity()->dim_metadata();
+ for (auto it : *tflite_dim_metadata)
+ {
+ // array_segments
+ auto tflite_array_segments_type = it->array_segments_type();
+ auto circle_array_segments =
+ get_circle_sparse_index_vector(*fb, it, tflite_array_segments_type);
+ auto circle_array_segments_type =
+ get_circle_sparse_index_vector_type(tflite_array_segments_type);
+
+ // array_indices
+ auto tflite_array_indices_type = it->array_indices_type();
+ auto circle_array_indices =
+ get_circle_sparse_index_vector(*fb, it, tflite_array_indices_type);
+ auto circle_array_indices_type =
+ get_circle_sparse_index_vector_type(tflite_array_indices_type);
+
+ auto circle_dim_metadata_builder = circle::DimensionMetadataBuilder{*fb};
+
+ circle_dim_metadata_builder.add_format(get_circle_dimension_type(it->format()));
+ circle_dim_metadata_builder.add_dense_size(it->dense_size());
+ circle_dim_metadata_builder.add_array_segments(circle_array_segments);
+ circle_dim_metadata_builder.add_array_segments_type(circle_array_segments_type);
+ circle_dim_metadata_builder.add_array_indices(circle_array_indices);
+ circle_dim_metadata_builder.add_array_indices_type(circle_array_indices_type);
+ auto dim_metadata = circle_dim_metadata_builder.Finish();
+ dim_metadata_vec.emplace_back(dim_metadata);
+ }
+ dim_metadata = fb->CreateVector(dim_metadata_vec);
+
+ sparsity = circle::CreateSparsityParameters(*fb, traversal_order, block_map, dim_metadata);
+ }
+
+ // shape signature
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature;
+ if (it->shape_signature())
+ {
+ auto shape_signature_vec =
+ std::vector<int32_t>({it->shape_signature()->begin(), it->shape_signature()->end()});
+ shape_signature = fb->CreateVector(shape_signature_vec);
+ }
+
circle::TensorBuilder tensor_builder{*fb};
tensor_builder.add_shape(shape);
tensor_builder.add_type(get_circle_tensortype(it->type()));
tensor_builder.add_name(name);
tensor_builder.add_quantization(quantization);
tensor_builder.add_is_variable(is_variable);
+ tensor_builder.add_sparsity(sparsity);
+ tensor_builder.add_shape_signature(shape_signature);
auto tensor = tensor_builder.Finish();
tensor_vec.emplace_back(tensor);
}
: _version{0}, _description{fb->CreateString("nnpackage")}, _fb{fb}
{
const tflite::Model *tfl_model = model.load_model();
+ // verify flatbuffers
+ flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model._data.data()),
+ model._data.size()};
+ if (!tflite::VerifyModelBuffer(verifier))
+ {
+ throw std::runtime_error("ERROR: Failed to verify tflite");
+ }
+
_operator_codes_offset =
std::make_unique<Offset<OperatorCodeLink>>(fb, tfl_model->operator_codes());
_subGraphs_offset = std::make_unique<Offset<SubGraphLink>>(fb, tfl_model->subgraphs());
}
}
+circle::DimensionType get_circle_dimension_type(tflite::DimensionType tfl_dim_type)
+{
+ switch (tfl_dim_type)
+ {
+ case tflite::DimensionType_DENSE:
+ return circle::DimensionType_DENSE;
+ case tflite::DimensionType_SPARSE_CSR:
+ return circle::DimensionType_SPARSE_CSR;
+ default:
+ throw std::runtime_error("tflite2circle: wrong dimension type.");
+ }
+}
+
+flatbuffers::Offset<void>
+get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::DimensionMetadata *dm,
+ const tflite::SparseIndexVector &tfl_sparse_index_vector_type)
+{
+ switch (tfl_sparse_index_vector_type)
+ {
+ case tflite::SparseIndexVector_NONE:
+ return flatbuffers::Offset<void>();
+ case tflite::SparseIndexVector_Int32Vector:
+ {
+ auto values_vec_int32 =
+ std::vector<int32_t>{dm->array_segments_as_Int32Vector()->values()->begin(),
+ dm->array_segments_as_Int32Vector()->values()->end()};
+ auto values_int32 = fb.CreateVector(values_vec_int32);
+ circle::Int32VectorBuilder int32_vector_builder{fb};
+ int32_vector_builder.add_values(values_int32);
+ return int32_vector_builder.Finish().Union();
+ }
+ case tflite::SparseIndexVector_Uint16Vector:
+ {
+ auto values_vec_uint16 =
+ std::vector<uint16_t>{dm->array_segments_as_Uint16Vector()->values()->begin(),
+ dm->array_segments_as_Uint16Vector()->values()->end()};
+ auto values_uint16 = fb.CreateVector(values_vec_uint16);
+ circle::Uint16VectorBuilder uint16_vector_builder{fb};
+ uint16_vector_builder.add_values(values_uint16);
+ return uint16_vector_builder.Finish().Union();
+ }
+ case tflite::SparseIndexVector_Uint8Vector:
+ {
+ auto values_vec_uint8 =
+ std::vector<uint8_t>{dm->array_segments_as_Uint8Vector()->values()->begin(),
+ dm->array_segments_as_Uint8Vector()->values()->end()};
+ auto values_uint8 = fb.CreateVector(values_vec_uint8);
+ circle::Uint8VectorBuilder uint8_vector_builder{fb};
+ uint8_vector_builder.add_values(values_uint8);
+ return uint8_vector_builder.Finish().Union();
+ }
+ default:
+ throw std::runtime_error("tflite2circle: wrong SparseIndexVector type.");
+ }
+}
+
+circle::SparseIndexVector
+get_circle_sparse_index_vector_type(const tflite::SparseIndexVector &tfl_sparse_index_vector_type)
+{
+ switch (tfl_sparse_index_vector_type)
+ {
+ case tflite::SparseIndexVector_NONE:
+ return circle::SparseIndexVector_NONE;
+ case tflite::SparseIndexVector_Int32Vector:
+ return circle::SparseIndexVector_Int32Vector;
+ case tflite::SparseIndexVector_Uint16Vector:
+ return circle::SparseIndexVector_Uint16Vector;
+ case tflite::SparseIndexVector_Uint8Vector:
+ return circle::SparseIndexVector_Uint8Vector;
+ default:
+ throw std::runtime_error("tflite2circle: wrong SparseIndexVector type.");
+ }
+}
+
} // namespace tflite2circle
*/
circle::MirrorPadMode get_circle_mirrorpad_mode(tflite::MirrorPadMode tfl_mode);
+/**
+ * @brief Returns circle DimensionType according to tflite.
+*/
+circle::DimensionType get_circle_dimension_type(tflite::DimensionType tfl_dim_type);
+
+/**
+ * @brief Returns circle SparseIndexVector according to tflite.
+*/
+flatbuffers::Offset<void>
+get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::DimensionMetadata *dm,
+ const tflite::SparseIndexVector &tfl_sparse_index_vector_type);
+
+/**
+ * @brief Returns circle SparseIndexVector type according to tflite.
+*/
+circle::SparseIndexVector
+get_circle_sparse_index_vector_type(const tflite::SparseIndexVector &tfl_sparse_index_vector_type);
+
} // namespace tflite2circle
#endif // __DATA_LOOKUP_H__
TFL_BUILTIN_OPTIONS(WhileOptions)
TFL_BUILTIN_OPTIONS(DepthToSpaceOptions)
TFL_BUILTIN_OPTIONS(NonMaxSuppressionV4Options)
-//TFL_BUILTIN_OPTIONS(NonMaxSuppressionV5Options)
+TFL_BUILTIN_OPTIONS(NonMaxSuppressionV5Options)
TFL_BUILTIN_OPTIONS(RankOptions)
TFL_BUILTIN_OPTIONS(ScatterNdOptions)
TFL_BUILTIN_OPTIONS(SegmentSumOptions)
if (NOT VCONONE_VERSION)
- set(VCONONE_VERSION 0x0000000000080001)
+ set(VCONONE_VERSION 0x0000000000090001)
# NOTE order is [build patch minor major]
# if VCONONE_VERSION is set with -D option, it will be cached
# you may have to remove cache file if you remove -D option
const size_t num_of_kernels = axis.size();
const size_t num_of_interm_tensors = num_of_kernels - (keep_dims ? 1 : 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(num_of_kernels < 1);
+
// Create temporary tensor infos
auto interm_tensors = support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors);
/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
int32_t reverse_scaling_divisor;
int32_t reverse_scaling_right_shift;
int diff_min;
+ int32_t zero_point;
+ float scale;
+ float *table;
};
struct PackParams
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
#ifndef __NNFW_CKER_EIGEN_EIGEN_CONVOLUTION_HELPERS_H__
#define __NNFW_CKER_EIGEN_EIGEN_CONVOLUTION_HELPERS_H__
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
#ifndef __NNFW_CKER_EGIEN_EIGEN_SPATIAL_CONVOLUTIONS_H__
#define __NNFW_CKER_EGIEN_EIGEN_SPATIAL_CONVOLUTIONS_H__
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
#ifndef __NNFW_CKER_NEON_CHECK_H__
#define __NNFW_CKER_NEON_CHECK_H__
{
// TODO Change to apply neon for this function if it is faster
-inline void AveragePool(const PoolParams ¶ms, const Shape &input_shape, const float *input_data,
+template <typename T>
+void AveragePool(const PoolParams &, const Shape &, const T *, const Shape &, T *)
+{
+ static_assert(std::is_integral<T>::value || std::is_floating_point<T>::value,
+ "cker::MaxPool : This function supports only integer or floating point");
+ throw std::runtime_error("cker::AveragePool : Unsupported data type");
+}
+
+template <>
+void AveragePool<float>(const PoolParams ¶ms, const Shape &input_shape, const float *input_data,
const Shape &output_shape, float *output_data)
{
assert(input_shape.DimensionsCount() == 4);
}
}
-inline void AveragePool(const PoolParams ¶ms, const Shape &input_shape,
- const uint8_t *input_data, const Shape &output_shape, uint8_t *output_data)
+template <>
+void AveragePool<uint8_t>(const PoolParams ¶ms, const Shape &input_shape,
+ const uint8_t *input_data, const Shape &output_shape,
+ uint8_t *output_data)
{
if (params.filter_height * params.filter_width > 16 * 16)
{
#include "cker/Utils.h"
#include "cker/operation/reference/Conv.h"
#include "cker/operation/optimized/Conv.h"
+#include <iostream>
#include <vector>
namespace nnfw
class Conv
{
public:
- Conv()
- : _modified_filter_data(), _im2col_data(), _im2col_shape(4), _need_im2col(false),
- _prepared(false)
- {
- }
+ Conv() : _modified_filter_data(), _im2col_shape(4), _need_im2col(false), _prepared(false) {}
void prepare(const Shape &filter_shape, const float *filter_data, PaddingType padding_type,
- bool &is_replaced_weights)
+ bool &is_replaced_weights, uint32_t dilationWidthFactor,
+ uint32_t dilationHeightFactor)
{
if (!_prepared)
{
- if (usableMultiThreaded(padding_type))
+ if (usableMultiThreaded(padding_type, dilationWidthFactor, dilationHeightFactor))
{
transposeFilter(filter_shape, filter_data, is_replaced_weights);
}
const Shape &filter_shape, const float *filter_data, const Shape &bias_shape,
const float *bias_data, const Shape &output_shape, float *output_data)
{
- if (usableMultiThreaded(params.padding_type))
+ if (usableMultiThreaded(params.padding_type, params.dilation_width_factor,
+ params.dilation_height_factor))
{
bool transposed_in_execution = false;
if (!_prepared)
params.stride_height);
}
- uint8_t *im2col_raw_data = _im2col_data.data();
- optimized::Conv(params, input_shape, input_data, filter_shape, filter_data, bias_shape,
- bias_data, output_shape, output_data, _im2col_shape, im2col_raw_data);
+ int im2col_size = _need_im2col ? _im2col_shape.FlatSize() : 1;
+
+ // Use heap if size is larger than 8MB
+ if (im2col_size > 8 * 1024 * 1024)
+ {
+ std::unique_ptr<uint8_t[]> im2col_data = std::make_unique<uint8_t[]>(im2col_size);
+ optimized::Conv(params, input_shape, input_data, filter_shape, filter_data, bias_shape,
+ bias_data, output_shape, output_data, _im2col_shape, im2col_data.get());
+ }
+ else
+ {
+ uint8_t im2col_data[im2col_size];
+ optimized::Conv(params, input_shape, input_data, filter_shape, filter_data, bias_shape,
+ bias_data, output_shape, output_data, _im2col_shape, im2col_data);
+ }
}
private:
- bool usableMultiThreaded(PaddingType padding_type)
+ bool usableMultiThreaded(PaddingType padding_type, uint32_t dilation_width_factor,
+ int32_t dilation_height_factor)
{
- return padding_type != PaddingType::kNone && std::thread::hardware_concurrency() > 1;
+ return padding_type != PaddingType::kNone && std::thread::hardware_concurrency() > 1 &&
+ dilation_width_factor == 1 && dilation_height_factor == 1;
}
void transposeFilter(const Shape &filter_shape, const float *filter_data,
_im2col_shape.SetDim(1, output_shape.Dims(1));
_im2col_shape.SetDim(2, output_shape.Dims(2));
_im2col_shape.SetDim(3, input_shape.Dims(3) * kernel_shape.Dims(1) * kernel_shape.Dims(2));
- _im2col_data.resize(_im2col_shape.FlatSize());
}
}
private:
std::vector<float> _modified_filter_data;
- std::vector<uint8_t> _im2col_data;
Shape _im2col_shape;
bool _need_im2col;
bool _prepared;
* limitations under the License.
*/
-#include "ir/operation/Sin.h"
+#ifndef __NNFW_CKER_ERF_H__
+#define __NNFW_CKER_ERF_H__
-#include <cassert>
+#include "cker/Shape.h"
-#include "ir/OperationVisitor.h"
+#include <cmath>
-namespace onert
+namespace nnfw
{
-namespace ir
+namespace cker
{
-namespace operation
-{
-
-void Sin::accept(OperationVisitor &v) const { v.visit(*this); }
-Sin::Sin(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+inline void Erf(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+ float *output_data)
{
+ const int size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < size; i++)
+ {
+ output_data[i] = std::erf(input_data[i]);
+ }
}
-} // namespace operation
-} // namespace ir
-} // namespace onert
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_ERF_H__
}
}
+inline void LogSoftmax(const SoftmaxParams ¶ms, float input_scale, const Shape &input_shape,
+ const uint8_t *input_data, const Shape &output_shape, uint8_t *output_data)
+{
+ const int rank = input_shape.DimensionsCount();
+ const int axis = (params.axis < 0) ? params.axis + rank : params.axis;
+ const double beta = params.beta;
+ const int depth = MatchingDim(input_shape, axis, output_shape, axis);
+
+ const int32_t clamp_max = std::numeric_limits<uint8_t>::max();
+ const int32_t clamp_min = std::numeric_limits<uint8_t>::min();
+
+ int outer_size = 1;
+ for (int i = 0; i < axis; ++i)
+ {
+ outer_size *= input_shape.Dims(i);
+ }
+
+ int inner_size = 1;
+ for (int i = axis + 1; i < rank; ++i)
+ {
+ inner_size *= input_shape.Dims(i);
+ }
+
+ for (int i = 0; i < outer_size; ++i)
+ {
+ for (int j = 0; j < inner_size; ++j)
+ {
+ uint8_t max_val = std::numeric_limits<uint8_t>::min();
+ for (int c = 0; c < depth; ++c)
+ {
+ max_val = std::max(max_val, input_data[(i * depth + c) * inner_size]);
+ }
+
+ float sum_exp = 0.0f;
+ const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+ const float *table_offset = ¶ms.table[max_uint8 - max_val];
+ for (int c = 0; c < depth; ++c)
+ {
+ sum_exp += table_offset[input_data[(i * depth + c) * inner_size]];
+ }
+ const float log_sum_exp = std::log(sum_exp);
+
+ const float scale = input_scale / params.scale;
+ const float precomputed = (input_scale * max_val * beta + log_sum_exp) / params.scale;
+ for (int c = 0; c < depth; ++c)
+ {
+ const float log_prob =
+ scale * input_data[(i * depth + c) * inner_size] * beta - precomputed;
+ const int32_t prob_quantized = std::rint(log_prob) + params.zero_point;
+ output_data[(i * depth + c) * inner_size] =
+ static_cast<uint8_t>(std::max(std::min(clamp_max, prob_quantized), clamp_min));
+ }
+ }
+ }
+}
+
} // namespace cker
} // namespace nnfw
namespace cker
{
-inline void MaxPool(const PoolParams ¶ms, const Shape &input_shape, const float *input_data,
+template <typename T> void MaxPool(const PoolParams &, const Shape &, const T *, const Shape &, T *)
+{
+ static_assert(std::is_integral<T>::value || std::is_floating_point<T>::value,
+ "cker::MaxPool : This function supports only integer or floating point");
+ throw std::runtime_error("cker::MaxPool : Unsupported data type");
+}
+
+template <>
+void MaxPool<float>(const PoolParams ¶ms, const Shape &input_shape, const float *input_data,
const Shape &output_shape, float *output_data)
{
assert(input_shape.DimensionsCount() == 4);
}
}
-inline void MaxPool(const PoolParams ¶ms, const Shape &input_shape, const uint8_t *input_data,
- const Shape &output_shape, uint8_t *output_data)
+template <>
+void MaxPool<uint8_t>(const PoolParams ¶ms, const Shape &input_shape, const uint8_t *input_data,
+ const Shape &output_shape, uint8_t *output_data)
{
// Here, and in other pooling ops, in order to maintain locality of reference,
namespace cker
{
+// Performs softmax along the input of size (input_size * batch_size).
+inline void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
+ float *out)
+{
+ assert(input_size > 0);
+
+ // For each batch
+ for (int b = 0; b < batch_size; b++)
+ {
+ // Find the max coeff.
+ float max_coeff = in[0];
+ for (int i = 1; i < input_size; i++)
+ {
+ if (in[i] > max_coeff)
+ max_coeff = in[i];
+ }
+
+ // Compute the normalized sum of exps.
+ float exp_sum = 0.0;
+ for (int i = 0; i < input_size; i++)
+ {
+ out[i] = std::exp((in[i] - max_coeff) * beta);
+ exp_sum += out[i];
+ }
+
+ // Divide by the sum of exps.
+ float reciprocal_sum_exp = 1.f / exp_sum;
+ for (int i = 0; i < input_size; i++)
+ {
+ out[i] *= reciprocal_sum_exp;
+ }
+
+ // Advance in and out pointers for the next batch.
+ in += input_size;
+ out += input_size;
+ }
+}
+
inline void Softmax(const SoftmaxParams ¶ms, const Shape &input_shape, const float *input_data,
const Shape &output_shape, float *output_data)
{
uint8_t *output_data)
{
int i = 0;
+
+#ifdef USE_NEON
+ const uint8x8_t output_activation_min_vector = vdup_n_u8(params.quantized_activation_min);
+ const uint8x8_t output_activation_max_vector = vdup_n_u8(params.quantized_activation_max);
+ for (; i <= size - 8; i += 8)
+ {
+ const uint8x8_t input1_val_original = vld1_u8(input1_data + i);
+ const uint8x8_t input2_val_original = vld1_u8(input2_data + i);
+ const int16x8_t input1_val_s16 = vreinterpretq_s16_u16(vmovl_u8(input1_val_original));
+ const int16x8_t input2_val_s16 = vreinterpretq_s16_u16(vmovl_u8(input2_val_original));
+ const int16x8_t input1_val = vaddq_s16(input1_val_s16, vdupq_n_s16(params.input1_offset));
+ const int16x8_t input2_val = vaddq_s16(input2_val_s16, vdupq_n_s16(params.input2_offset));
+ const int16x4_t input1_val_high = vget_high_s16(input1_val);
+ const int16x4_t input1_val_low = vget_low_s16(input1_val);
+ const int16x4_t input2_val_high = vget_high_s16(input2_val);
+ const int16x4_t input2_val_low = vget_low_s16(input2_val);
+ int32x4_t x11 = vmovl_s16(input1_val_low);
+ int32x4_t x12 = vmovl_s16(input1_val_high);
+ int32x4_t x21 = vmovl_s16(input2_val_low);
+ int32x4_t x22 = vmovl_s16(input2_val_high);
+ const int32x4_t left_shift_dup = vdupq_n_s32(params.left_shift);
+ x11 = vshlq_s32(x11, left_shift_dup);
+ x12 = vshlq_s32(x12, left_shift_dup);
+ x21 = vshlq_s32(x21, left_shift_dup);
+ x22 = vshlq_s32(x22, left_shift_dup);
+ x11 = vqrdmulhq_n_s32(x11, params.input1_multiplier);
+ x12 = vqrdmulhq_n_s32(x12, params.input1_multiplier);
+ x21 = vqrdmulhq_n_s32(x21, params.input2_multiplier);
+ x22 = vqrdmulhq_n_s32(x22, params.input2_multiplier);
+ const int32x4_t input1_shift_dup = vdupq_n_s32(params.input1_shift);
+ const int32x4_t input2_shift_dup = vdupq_n_s32(params.input2_shift);
+ x11 = vshlq_s32(x11, input1_shift_dup);
+ x12 = vshlq_s32(x12, input1_shift_dup);
+ x21 = vshlq_s32(x21, input2_shift_dup);
+ x22 = vshlq_s32(x22, input2_shift_dup);
+ int32x4_t s1 = vaddq_s32(x11, x21);
+ int32x4_t s2 = vaddq_s32(x12, x22);
+ s1 = vqrdmulhq_n_s32(s1, params.output_multiplier);
+ s2 = vqrdmulhq_n_s32(s2, params.output_multiplier);
+ using gemmlowp::RoundingDivideByPOT;
+ s1 = RoundingDivideByPOT(s1, -params.output_shift);
+ s2 = RoundingDivideByPOT(s2, -params.output_shift);
+ const int16x4_t s1_narrowed = vmovn_s32(s1);
+ const int16x4_t s2_narrowed = vmovn_s32(s2);
+ const int16x8_t s =
+ vaddq_s16(vcombine_s16(s1_narrowed, s2_narrowed), vdupq_n_s16(params.output_offset));
+ const uint8x8_t clamped = vmax_u8(output_activation_min_vector,
+ vmin_u8(output_activation_max_vector, vqmovun_s16(s)));
+ vst1_u8(output_data + i, clamped);
+ }
+#endif // NEON
for (; i < size; ++i)
{
- int32_t clamped_output = quant8_sum(params, input1_data[i], input2_data[i]);
+ const int32_t input1_val = params.input1_offset + input1_data[i];
+ const int32_t input2_val = params.input2_offset + input2_data[i];
+ const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
+ const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
+ const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+ shifted_input1_val, params.input1_multiplier, params.input1_shift);
+ const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+ shifted_input2_val, params.input2_multiplier, params.input2_shift);
+ const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
+ const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+ raw_sum, params.output_multiplier, params.output_shift) +
+ params.output_offset;
+ const int32_t clamped_output = std::min(params.quantized_activation_max,
+ std::max(params.quantized_activation_min, raw_output));
output_data[i] = static_cast<uint8_t>(clamped_output);
}
}
uint8_t *output_data)
{
int i = 0;
- int32_t clamped_output;
- for (; i < size; i++)
+
+#ifdef USE_NEON
+ const auto input1_offset_vector = vdupq_n_s16(params.input1_offset);
+ const auto input2_offset_vector = vdupq_n_s16(params.input2_offset);
+ const auto output_offset_vector = vdupq_n_s16(params.output_offset);
+ const auto output_activation_min_vector = vdup_n_u8(params.quantized_activation_min);
+ const auto output_activation_max_vector = vdup_n_u8(params.quantized_activation_max);
+ const int left_shift = std::max(0, params.output_shift);
+ const int right_shift = std::max(0, -params.output_shift);
+ const int32x4_t left_shift_vec = vdupq_n_s32(left_shift);
+ for (; i <= size - 8; i += 8)
+ {
+ // We load / store 8 at a time, multiplying as two sets of 4 int32s.
+ const auto input1_val_original = vld1_u8(input1_data + i);
+ const auto input2_val_original = vld1_u8(input2_data + i);
+ const auto input1_val_s16 = vreinterpretq_s16_u16(vmovl_u8(input1_val_original));
+ const auto input2_val_s16 = vreinterpretq_s16_u16(vmovl_u8(input2_val_original));
+ const auto input1_val = vaddq_s16(input1_val_s16, input1_offset_vector);
+ const auto input2_val = vaddq_s16(input2_val_s16, input2_offset_vector);
+
+ const auto input1_val_low = vget_low_s16(input1_val);
+ const auto input1_val_high = vget_high_s16(input1_val);
+ const auto input2_val_low = vget_low_s16(input2_val);
+ const auto input2_val_high = vget_high_s16(input2_val);
+
+ auto p1 = vmull_s16(input2_val_low, input1_val_low);
+ auto p2 = vmull_s16(input2_val_high, input1_val_high);
+
+ p1 = vshlq_s32(p1, left_shift_vec);
+ p2 = vshlq_s32(p2, left_shift_vec);
+ p1 = vqrdmulhq_n_s32(p1, params.output_multiplier);
+ p2 = vqrdmulhq_n_s32(p2, params.output_multiplier);
+ using gemmlowp::RoundingDivideByPOT;
+ p1 = RoundingDivideByPOT(p1, right_shift);
+ p2 = RoundingDivideByPOT(p2, right_shift);
+
+ const auto p1_narrowed = vqmovn_s32(p1);
+ const auto p2_narrowed = vqmovn_s32(p2);
+ const auto p = vaddq_s16(vcombine_s16(p1_narrowed, p2_narrowed), output_offset_vector);
+ const auto clamped = vmax_u8(output_activation_min_vector,
+ vmin_u8(output_activation_max_vector, vqmovun_s16(p)));
+ vst1_u8(output_data + i, clamped);
+ }
+#endif // NEON
+
+ for (; i < size; ++i)
{
- clamped_output = quant8_mul(params, input1_data[i], input2_data[i]);
+ const int32_t input1_val = params.input1_offset + input1_data[i];
+ const int32_t input2_val = params.input2_offset + input2_data[i];
+ const int32_t unclamped_result =
+ params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val,
+ params.output_multiplier,
+ params.output_shift);
+ const int32_t clamped_output =
+ std::min(params.quantized_activation_max,
+ std::max(params.quantized_activation_min, unclamped_result));
output_data[i] = static_cast<uint8_t>(clamped_output);
}
}
}
}
+// Supports per-batch zero_byte for per-batch asymmetric quantized inputs.
+template <typename T>
+void DilatedIm2col(const ConvParams ¶ms, const Shape &input_shape, const T *input_data,
+ const Shape &filter_shape, const Shape &output_shape, T *im2col_data,
+ const int32_t *zero_bytes, const int zero_bytes_len)
+{
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ const int dilation_width_factor = params.dilation_width_factor;
+ const int dilation_height_factor = params.dilation_height_factor;
+ const int pad_width = params.padding_values.width;
+ const int pad_height = params.padding_values.height;
+ assert(input_shape.DimensionsCount() == 4);
+ assert(filter_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+
+ // For dilated convolution, the input pixels are not contiguous therefore we
+ // can't use the same optimizations as Im2Col(). Though note this code would
+ // work fine for the non-dilated case too (though likely a bit slower).
+ assert(dilation_width_factor != 1 || dilation_height_factor != 1);
+ assert(im2col_data);
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int input_height = input_shape.Dims(1);
+ const int input_width = input_shape.Dims(2);
+ const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+ const int filter_height = filter_shape.Dims(1);
+ const int filter_width = filter_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+ const int output_width = output_shape.Dims(2);
+ MatchingDim(output_shape, 3, filter_shape, 0);
+
+ // Construct the MxN sized im2col matrix.
+ // The rows M, are sub-ordered B x H x W
+ const Shape row_shape({1, batches, output_height, output_width});
+ // The columns, N, are sub-ordered Kh x Kw x Din
+ const Shape col_shape({1, filter_height, filter_width, input_depth});
+ // Use dimensions M and N to construct dims for indexing directly into im2col
+ const Shape im2col_shape({1, 1, row_shape.FlatSize(), col_shape.FlatSize()});
+
+ // Loop through the output rows (B x H x W)
+ for (int batch = 0; batch < batches; ++batch)
+ {
+ const T zero_byte =
+ zero_bytes_len > 1 ? static_cast<T>(zero_bytes[batch]) : static_cast<T>(zero_bytes[0]);
+ for (int out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int out_x = 0; out_x < output_width; ++out_x)
+ {
+ // Each im2col row is an output pixel. Arrange the input data in this
+ // row in an order we can conveniently multiply with the filter data.
+ int row_offset = Offset(row_shape, 0, batch, out_y, out_x);
+ const int in_x_origin = (out_x * stride_width) - pad_width;
+ const int in_y_origin = (out_y * stride_height) - pad_height;
+ // Loop through all the pixels of the filter (Kh x Kw)
+ for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ const int in_y = in_y_origin + dilation_height_factor * filter_y;
+ if ((in_y >= 0) && (in_y < input_height))
+ {
+ // Filter row is within the input data.
+ // Loop through all the filter pixels in this row.
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int in_x = in_x_origin + dilation_width_factor * filter_x;
+ int col_offset = Offset(col_shape, 0, filter_y, filter_x, 0);
+ T *dst = im2col_data + Offset(im2col_shape, 0, 0, row_offset, col_offset);
+ if ((in_x >= 0) && (in_x < input_width))
+ {
+ // Filter pixel is within the input, copy the input data.
+ T const *src = input_data + Offset(input_shape, batch, in_y, in_x, 0);
+ memcpy(dst, src, input_depth * sizeof(T));
+ }
+ else
+ {
+ // Filter pixel is outside the input, zero it out.
+ memset(dst, zero_byte, input_depth * sizeof(T));
+ }
+ }
+ }
+ else
+ {
+ // Filter row is outside the input, zero out the entire filter row.
+ int col_offset = Offset(col_shape, 0, filter_y, 0, 0);
+ T *dst = im2col_data + Offset(im2col_shape, 0, 0, row_offset, col_offset);
+ memset(dst, zero_byte, filter_width * input_depth * sizeof(T));
+ }
+ }
+ }
+ }
+ }
+}
+
template <typename T>
void DilatedIm2col(const ConvParams ¶ms, uint8_t zero_byte, const Shape &input_shape,
const T *input_data, const Shape &filter_shape, const Shape &output_shape,
T *im2col_data)
{
- (void)params;
- (void)zero_byte;
- (void)input_shape;
- (void)input_data;
- (void)filter_shape;
- (void)output_shape;
- (void)im2col_data;
- throw std::runtime_error{"NYI: cker DilatedIm2col"};
+ const int32_t zero_point = static_cast<int32_t>(zero_byte);
+ DilatedIm2col<T>(params, input_shape, input_data, filter_shape, output_shape, im2col_data,
+ &zero_point, 1);
}
template <typename T>
const T *input2_data, const Shape &output_shape, T *output_data,
const std::function<T(const T &, const T &)> &fn)
{
- const int32_t flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+ const int32_t flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; ++i)
{
output_data[i] = ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
float *output_data,
const std::function<float(const float &, const float &)> &fn)
{
- const int size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+ const int size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < size; i++)
{
output_data[i] =
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
set(TEST_COMPUTE test_compute)
file(GLOB_RECURSE TESTS "*.cc")
author = 'Samsung Research & contributors'
# The full version, including alpha/beta/rc tags
-release = '1.8.0'
+release = '1.9.0'
# -- General configuration ---------------------------------------------------
}
```
+5. Add code for shape inference
+- ONE runtime tries to calculate shapes and allocate memory during compilation time. For some calculations of output shapes that cannot be done during compilation time, ONE runtime will calculate shapes and allocate memory during execution time.
+- Calculation of shapes during compilation time is called _static shape inference_ and calculation of shapes during execution time is called _dynamic shape inference_.
+- [`StaticShapeInference.h`](`/runtime/onert/compiler/StaticShapeInference.h`)
+
+```CPP
+ void visit(const ir::operation::Select &op) override;
+```
+- [`StaticShapeInference.cc`](/runtime/onert/core/src/compiler/StaticShapeInference.cc)
+```CPP
+void StaticShapeInferer::visit(const ir::operation::Select &op)
+{
+ const auto input_cond_idx{op.getInputs().at(ir::operation::Select::Input::CONDITION)};
+ const auto &input_cond = _operands.at(input_cond_idx);
+
+ const auto &input_true = ...
+ const auto &input_false = ...
+ ir::Operand &output = ...
+
+ // Select output shpae
+ ir::Shape new_shape = shape_inference::inferSelectShape(
+ input_cond.info().shape(), input_true.info().shape(), input_false.info().shape());
+ output.info().shape(new_shape);
+}
+```
+- [`DynamicShapeInference.h`](/runtime/onert/core/include/exec/DynamicShapeInference.h)
+```CPP
+ void visit(const ir::operation::Select &op) override;
+```
+- [`DynamicShapeInference.cc`](/runtime/onert/core/src/exec/DynamicShapeInference.cc)
+```CPP
+void DynamicShapeInferer::visit(const ir::operation::Select &op)
+{
+ const auto input_cond_idx = op.getInputs().at(ir::operation::Select::Input::CONDITION);
+ const auto &input_cond = _tensor_registry->getITensor(input_cond_idx);
+
+ const auto &input_true = ...
+ const auto &input_false = ...
+ auto output = ...
+
+ if ((!input_cond->is_dynamic()) && (!input_true->is_dynamic()) && (!input_false->is_dynamic()))
+ {
+ return;
+ }
+
+ auto input_cond_shape = input_cond->getShape();
+ auto input_true_shape = input_true->getShape();
+ auto input_false_shape = input_false->getShape();
+
+ // Select output shpae
+ ir::Shape new_shape =
+ shape_inference::inferSelectShape(input_cond_shape, input_true_shape, input_false_shape);
+
+ dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+}
+```
+
## Frontend
This module generates IR from a model. There are two kinds of frontend: Loader and NNAPI. First, Loader loads a model file and generates IR from it. Second, NNAPI generates IR from a model set via [Neural Networks API of android](https://developer.android.com/ndk/guides/neuralnetworks)
--- /dev/null
+# Release Note 1.9.0
+
+## ONE Compiler
+
+### Compiler supports more operations
+
+- NonMaxSuppressionV4, NonMaxSuppressionV5, PadV2, Unique
+
+### Changes
+
+- Quantization enhancements: channel wise UINT8 quantization(Conv2D, DepwiseConv, TransposeConv, FullyConnected)
+- Experimental requantization from INT8 to UINT8
+- Adding more operator value tests
+- tf2tfliteV2 supports conversion from Keras model, saved model
+- Refactoring for better maintenance long Class codes using visitor patterns
+- Introducing optimization pass that fuses batch normalization with Transposed Convolution.
+
+
+## ONE Runtime
+
+### Runtime backend operation support
+
+- CPU backend: RANK
+- CPU backend qasymm uint8: LOG_SOFTMAX
+- ACL-CL backend: LEAKY_RELU, RESIZE_NEAREST_NEIGHBOR
+
+
+### Optimization
+
+- Copy Elimination between compatible backends
+
+### Operation Implementation
+
+- Operations with same parameters are unified
+
+### Change
+
+- CPU backend qasymm uint8 performance enhancement: arithmetic operations
# Compute
+
+`compute` directory is for the libraries for actual computation of neural network operations. These libraries are used by backends. Currently we have two libraries.
+
+## ARMComputeEx
+
+It is an extension of ARM [ComputeLibrary](https://github.com/ARM-software/ComputeLibrary), in order to support some operations that are not yet supported by ComputeLibrary. It is used by `acl_cl` and `acl_neon` backends.
+
+The code structure looks just like ComputeLibrary's. Some of the code could be copied from the latest version of ComputeLibrary to support some operations quickly when those are not included in the latest version yet.
+
+## cker
+
+"cker" stands for Cpu KERnel. It is a port of Tensorflow lite's operation kernels and possibly there are some own code. It is used by `cpu` backend.
-https://bitbucket.org/eigen/eigen/get/fd6845384b86.tar.gz
+https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/fd6845384b86.tar.gz
# EXTERNAL_DOWNLOAD_SERVER will be overwritten by CI server to use mirror server.
envoption(EXTERNAL_DOWNLOAD_SERVER "http://sourceforge.net")
- set(BOOST_URL ${EXTERNAL_DOWNLOAD_SERVER}/projects/boost/files/boost/1.58.0/boost_1_58_0.tar.gz)
+ envoption(BOOST_URL ${EXTERNAL_DOWNLOAD_SERVER}/projects/boost/files/boost/1.58.0/boost_1_58_0.tar.gz)
ExternalSource_Download(BOOST ${BOOST_URL})
set(BoostSource_DIR ${BOOST_SOURCE_DIR} PARENT_SCOPE)
# NOTE TensorFlow 1.13.1 uses https://bitbucket.org/eigen/eigen/get/9f48e814419e.tar.gz
# but it has a issue https://eigen.tuxfamily.org/bz/show_bug.cgi?id=1643
# The following URL resolves above issue
- envoption(EXTERNAL_DOWNLOAD_SERVER "https://bitbucket.org")
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://mirror.bazel.build/bitbucket.org")
envoption(EIGEN_1_13_1_URL ${EXTERNAL_DOWNLOAD_SERVER}/eigen/eigen/get/88fc23324517.tar.gz)
ExternalSource_Download(EIGEN
--- /dev/null
+function(_TensorFlowGEMMLowpSource_import)
+ if(NOT DOWNLOAD_GEMMLOWP)
+ set(TensorFlowGEMMLowpSource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT DOWNLOAD_GEMMLOWP)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ # Exact version used by TensorFlow v2.3.0.
+ # See tensorflow/tensorflow/workspace.bzl.
+ envoption(TENSORFLOW_2_3_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
+
+ ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.3.0-GEMMLOWP ${TENSORFLOW_2_3_0_GEMMLOWP_URL})
+
+ set(TensorFlowGEMMLowpSource_DIR ${GEMMLOWP_SOURCE_DIR} PARENT_SCOPE)
+ set(TensorFlowGEMMLowpSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowGEMMLowpSource_import)
+
+_TensorFlowGEMMLowpSource_import()
--- /dev/null
+set(PACKAGE_VERSION "2.3.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+ set(PACKAGE_VERSION_EXACT TRUE)
+ set(PACKAGE_VERSION_COMPATIBLE TRUE)
+ set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
--- /dev/null
+function(_TensorFlowRuySource_import)
+ if(NOT DOWNLOAD_RUY)
+ set(TensorFlowRuySource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT DOWNLOAD_RUY)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ # Exact version used by TensorFlow v2.3.0.
+ # See tensorflow/third_party/ruy/workspace.bzl
+ envoption(TENSORFLOW_2_3_0_RUY_URL https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip)
+
+ ExternalSource_Download(RUY DIRNAME TENSORFLOW-2.3.0-RUY ${TENSORFLOW_2_3_0_RUY_URL})
+
+ set(TensorFlowRuySource_DIR ${RUY_SOURCE_DIR} PARENT_SCOPE)
+ set(TensorFlowRuySource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowRuySource_import)
+
+_TensorFlowRuySource_import()
--- /dev/null
+set(PACKAGE_VERSION "2.3.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+ set(PACKAGE_VERSION_EXACT TRUE)
+ set(PACKAGE_VERSION_COMPATIBLE TRUE)
+ set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
option(DOWNLOAD_EIGEN "Download Eigen source" ON)
option(DOWNLOAD_FARMHASH "Download farmhash source" ON)
option(DOWNLOAD_GEMMLOWP "Download GEMM low precesion library source" ON)
+option(DOWNLOAD_RUY "Download ruy source" ON)
option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" ON)
option(DOWNLOAD_GFLAGS "Download GFlags source" OFF)
option(DOWNLOAD_FLATBUFFERS "Download FlatBuffers source" ON)
option(BUILD_TFLITE_LOADER "Build TensorFlow Lite loader" ON)
option(BUILD_CIRCLE_LOADER "Build circle loader" ON)
option(BUILD_TFLITE_LOADER_TEST_TOOL "Build tflite loader testing tool" ON)
+option(BUILD_WITH_HDF5 "Build test tool with HDF5 library" ON)
option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" ON)
option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" ON)
option(INSTALL_TEST_SCRIPTS "Install test scripts" ON)
option(BUILD_ANDROID_BENCHMARK_APP "Enable Android Benchmark App" ON)
option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
# Need boost library
-option(DOWNLOAD_BOOST "Download boost source" OFF)
-option(BUILD_BOOST "Build boost source" OFF)
+option(DOWNLOAD_BOOST "Download boost source" ON)
+option(BUILD_BOOST "Build boost source" ON)
option(BUILD_RUNTIME_NNAPI_TEST "Build Runtime NN API Generated Test" OFF)
option(BUILD_NNAPI_TEST "Build nnapi_test" OFF)
-option(BUILD_NNPACKAGE_RUN "Build nnpackge_run" OFF)
+option(BUILD_NNPACKAGE_RUN "Build nnpackge_run" ON)
option(BUILD_TFLITE_RUN "Build tflite-run" OFF)
option(BUILD_TFLITE_LOADER_TEST_TOOL "Build tflite loader testing tool" OFF)
option(BUILD_LOGGING "Build logging runtime" OFF)
option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
option(BUILD_LOGGING "Build logging runtime" OFF)
-option(BUILD_TFLITE_RUN "Build tflite-run" OFF)
option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
option(BUILD_LOGGING "Build logging runtime" OFF)
-option(BUILD_TFLITE_RUN "Build tflite-run" OFF)
option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
RESULT_VARIABLE Boost_BUILD)
endif()
- set(BoostBuild_DIR ${BoostSource_DIR})
+ set(BoostBuild_DIR ${CMAKE_BINARY_DIR}/externals/boost)
set(BoostInstall_DIR ${Boost_PREFIX})
unset(Boost_Options)
endif()
endif()
-set(Boost_PREFIX ${CMAKE_INSTALL_PREFIX})
+set(Boost_PREFIX ${EXT_OVERLAY_DIR})
if(BUILD_BOOST)
_Boost_Build("${Boost_PREFIX}")
- # Let's use locally built boost to system-wide one so sub modules
- # needing Boost library and header files can search for them
- # in ${Boost_PREFIX} directory
- list(APPEND CMAKE_PREFIX_PATH "${Boost_PREFIX}")
-
# Without Boost_INCLUDE_DIR, it complains the variable is missing during find_package.
- set(Boost_INCLUDE_DIR ${CMAKE_INSTALL_PREFIX}/include)
+ set(Boost_INCLUDE_DIR ${Boost_PREFIX}/include)
# 1) without static build, it will complain it cannot find libc++_shared.so.
# 2) We uses static libraries for other libraries.
unset(HDF5_CXX_LIBRARY_hdf5 CACHE)
unset(HDF5_CXX_LIBRARY_hdf5_cpp CACHE)
+if(NOT BUILD_WITH_HDF5)
+ set(HDF5_FOUND FALSE)
+ return()
+endif(NOT BUILD_WITH_HDF5)
+
# Case 1. external hdf5
if(DEFINED EXT_HDF5_DIR)
find_path(HDF5_INCLUDE_DIRS NAMES H5Cpp.h NO_CMAKE_FIND_ROOT_PATH PATHS "${EXT_HDF5_DIR}/include")
check_copyright() {
DIRECTORIES_NOT_TO_BE_TESTED=$1
- CORRECT_COPYRIGHT="Copyright \(c\) [0-9]+ Samsung Electronics Co\., Ltd\. All Rights Reserved"
+ CORRECT_COPYRIGHT="Copyright \(c\) [0-9\-]+ Samsung Electronics Co\., Ltd\. All Rights Reserved"
FILES_TO_CHECK=$(git ls-files -c --exclude-standard)
FILES_TO_CHECK_COPYRIGHTS=()
for f in ${FILES_TO_CHECK[@]}; do
+ # Manually ignore checking
+ if [[ ${f} == +(*/NeuralNetworks.h|*/NeuralNetworksExtensions.h) ]]; then
+ continue
+ fi
+
# File extension to check
if [[ ${f} == +(*.h|*.hpp|*.cpp|*.cc|*.c|*.cl) ]]; then
FILES_TO_CHECK_COPYRIGHTS+=("${f}")
java -jar $PROJECT_DIR/tca-standalone-0.0.8.jar \
--outdir=$PROJECT_DIR/tcm-output \
- --config=$PROJECT_DIR/.ahub/tcchecker-tca/config.yaml \
+ --config=$PROJECT_DIR/src/.ahub/tcchecker-tca/config.yaml \
--local=$PROJECT_DIR/src \
--logfile=$PROJECT_DIR/tcm-output/tcm.log \
--debug
# create python virtual environment
./nncc docker-run python3 -m venv "${NNCC_INSTALL_PREFIX}/bin/venv"
+# TODO remove version number of 'pip==20.2.1 setuptools==49.3.0'
+# NOTE adding version is for temporary hotfix of setuptools 50.x.y version
./nncc docker-run "${NNCC_INSTALL_PREFIX}/bin/venv/bin/python" \
-m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
- install -U pip setuptools
+ install -U pip==20.2.1 setuptools==49.3.0
./nncc docker-run "${NNCC_INSTALL_PREFIX}/bin/venv/bin/python" \
-m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
install tensorflow-cpu==2.3.0
echo "Usage: ./tizen_xu4_test.sh --rpm-dir=path/to/rpm-dir"
echo "Usage: ./tizen_xu4_test.sh --test-suite-path=path/to/test-suite.tar.gz"
echo "Usage: ./tizen_xu4_test.sh --skip-install-model"
+ echo "Usage: ./tizen_xu4_test.sh --rpm-dir=path/to/rpm-dir --skip-test"
echo ""
echo "--rpm-dir <dir> : directory containing nnfw.rpm and nnfw-test.rpm"
echo "--test-suite-path <dir> : filepath to test-suite.tar.gz"
echo "--skip-install-model : skip install downloaded model"
+ echo "--skip-test : skip running test"
echo "--gcov-dir <dir> : directory to save gcov files"
}
# download api test model file for nnfw_api_gtest
MODEL_CACHE_DIR=$(mktemp -d)
tests/scripts/models/run_test.sh --download=on --run=off \
- --configdir=test/scripts/nnfw_api_gtest/models \
+ --configdir=tests/scripts/models/nnfw_api_gtest \
--cachedir=$MODEL_CACHE_DIR
tar -zcf $MODEL_CACHE_DIR/api_model_test.tar.gz -C $MODEL_CACHE_DIR .
$SDB_CMD push $MODEL_CACHE_DIR/api_model_test.tar.gz $TEST_ROOT/Product/out/unittest_standalone/nnfw_api_gtest_models/
}
INSTALL_MODEL="1"
+RUN_TEST="1"
# Parse command argv
for i in "$@"
do
--gcov-dir=*)
GCOV_DIR=${i#*=}
;;
+ --skip-test)
+ RUN_TEST="0"
+ ;;
esac
shift
done
echo "======= Skip install model ======="
fi
+if [ $RUN_TEST = "0" ]; then
+ echo "======= Skip test ======="
+ exit 0
+fi
+
if [ -z "${GCOV_DIR}" ]; then
${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_cl --tflite-loader"
${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_neon"
Name: nnfw
Summary: nnfw
-Version: 1.8.0
+Version: 1.9.0
Release: 1
Group: Development
License: Apache-2.0 and MIT and BSD-2-Clause
%{_libdir}/pkgconfig/nnfw-plugin.pc
%endif
+%ifarch arm armv7l aarch64
%files minimal-app
%manifest %{name}.manifest
%defattr(-,root,root,-)
%{_bindir}/onert-minimal-app
+%endif
%if %{test_build} == 1
%files test
--- /dev/null
+# To check if BatchNorm op(mul + add) is fused to Transposed Convolution op
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "TCONV_EXIST" $(op_count TRANSPOSE_CONV) '=' 1
+RULE "NO_MUL" $(op_count MUL) '=' 0
+RULE "NO_ADD" $(op_count ADD) '=' 0
--- /dev/null
+operand {
+ name: "boxes"
+ type: FLOAT32
+ shape { dim: 10 dim: 4 }
+}
+operand {
+ name: "scores"
+ type: FLOAT32
+ shape { dim: 10 }
+}
+operand {
+ name: "max_output_size"
+ type: INT32
+ shape { }
+}
+operand {
+ name: "iou_threshold"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "score_threshold"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "selected_indices"
+ type: INT32
+ shape { }
+}
+operand {
+ name: "valid_outputs"
+ type: INT32
+ shape { }
+}
+operation {
+ type: "NonMaxSuppressionV4"
+ input: "boxes"
+ input: "scores"
+ input: "max_output_size"
+ input: "iou_threshold"
+ input: "score_threshold"
+ output: "selected_indices"
+ output: "valid_outputs"
+}
+input: "boxes"
+input: "scores"
+input: "max_output_size"
+input: "iou_threshold"
+input: "score_threshold"
+output: "selected_indices"
+output: "valid_outputs"
--- /dev/null
+operand {
+ name: "boxes"
+ type: FLOAT32
+ shape { dim: 10 dim: 4 }
+}
+operand {
+ name: "scores"
+ type: FLOAT32
+ shape { dim: 10 }
+}
+operand {
+ name: "max_output_size"
+ type: INT32
+ shape { }
+ filler { tag: "explicit" arg: "5" }
+}
+operand {
+ name: "iou_threshold"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "score_threshold"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "selected_indices"
+ type: INT32
+ shape { }
+}
+operand {
+ name: "valid_outputs"
+ type: INT32
+ shape { }
+}
+operation {
+ type: "NonMaxSuppressionV4"
+ input: "boxes"
+ input: "scores"
+ input: "max_output_size"
+ input: "iou_threshold"
+ input: "score_threshold"
+ output: "selected_indices"
+ output: "valid_outputs"
+}
+input: "boxes"
+input: "scores"
+input: "iou_threshold"
+input: "score_threshold"
+output: "selected_indices"
+output: "valid_outputs"
--- /dev/null
+operand {
+ name: "boxes"
+ type: FLOAT32
+ shape { dim: 10 dim: 4 }
+}
+operand {
+ name: "scores"
+ type: FLOAT32
+ shape { dim: 10 }
+}
+operand {
+ name: "max_output_size"
+ type: INT32
+ shape { }
+}
+operand {
+ name: "iou_threshold"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "score_threshold"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "soft_nms_sigma"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "selected_indices"
+ type: INT32
+ shape { }
+}
+operand {
+ name: "selected_scores"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "valid_outputs"
+ type: INT32
+ shape { }
+}
+operation {
+ type: "NonMaxSuppressionV5"
+ input: "boxes"
+ input: "scores"
+ input: "max_output_size"
+ input: "iou_threshold"
+ input: "score_threshold"
+ input: "soft_nms_sigma"
+ output: "selected_indices"
+ output: "selected_scores"
+ output: "valid_outputs"
+}
+input: "boxes"
+input: "scores"
+input: "max_output_size"
+input: "iou_threshold"
+input: "score_threshold"
+input: "soft_nms_sigma"
+output: "selected_indices"
+output: "selected_scores"
+output: "valid_outputs"
--- /dev/null
+operand {
+ name: "boxes"
+ type: FLOAT32
+ shape { dim: 10 dim: 4 }
+}
+operand {
+ name: "scores"
+ type: FLOAT32
+ shape { dim: 10 }
+}
+operand {
+ name: "max_output_size"
+ type: INT32
+ shape { }
+ filler { tag: "explicit" arg: "5" }
+}
+operand {
+ name: "iou_threshold"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "score_threshold"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "soft_nms_sigma"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "selected_indices"
+ type: INT32
+ shape { }
+}
+operand {
+ name: "selected_scores"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "valid_outputs"
+ type: INT32
+ shape { }
+}
+operation {
+ type: "NonMaxSuppressionV5"
+ input: "boxes"
+ input: "scores"
+ input: "max_output_size"
+ input: "iou_threshold"
+ input: "score_threshold"
+ input: "soft_nms_sigma"
+ output: "selected_indices"
+ output: "selected_scores"
+ output: "valid_outputs"
+}
+input: "boxes"
+input: "scores"
+input: "iou_threshold"
+input: "score_threshold"
+input: "soft_nms_sigma"
+output: "selected_indices"
+output: "selected_scores"
+output: "valid_outputs"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "padding"
+ type: INT32
+ shape { dim: 4 dim: 2 }
+ filler {
+ tag: "explicit"
+ arg: "0" arg: "0"
+ arg: "1" arg: "1"
+ arg: "2" arg: "2"
+ arg: "0" arg: "0"
+ }
+}
+operand {
+ name: "constant_values"
+ type: INT32
+ shape { dim: 1 }
+ filler {
+ tag: "explicit"
+ arg: "1"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim: 7 dim: 2 }
+}
+operation {
+ type: "PadV2"
+ input: "ifm"
+ input: "padding"
+ input: "constant_values"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ quant { min: 0 max: 2 scale: 0.0078125 zero_point: 0 }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ quant { min: 0 max: 2 scale: 0.0078125 zero_point: 0 }
+}
+operation {
+ type: "Tanh"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
operand {
name: "ofm"
type: FLOAT32
- shape { }
+ shape { dim: 0 }
}
operand {
name: "ofm_idx"
operand {
name: "ofm"
type: FLOAT32
- shape { }
+ shape { dim: 0 }
}
operand {
name: "ofm_idx"
operand {
name: "ofm"
type: INT32
- shape { }
+ shape { dim: 0 }
}
operand {
name: "ofm_idx"
operand {
name: "ofm"
type: INT32
- shape { }
+ shape { dim: 0 }
}
operand {
name: "ofm_idx"
operand {
name: "ofm"
type: UINT8
- shape { }
+ shape { dim: 0 }
}
operand {
name: "ofm_idx"
operand {
name: "ofm"
type: UINT8
- shape { }
+ shape { dim: 0 }
}
operand {
name: "ofm_idx"
--- /dev/null
+import tensorflow as tf
+import numpy as np
+
+input_ = tf.compat.v1.placeholder(shape=[1, 1, 1, 1], dtype=tf.float32)
+paddings_ = tf.compat.v1.constant(
+ np.array([[1, 1], [2, 2], [3, 3], [4, 4]], dtype=np.int32))
+constant_values_ = tf.compat.v1.constant(1, shape=(), dtype=tf.float32)
+op_ = tf.compat.v1.pad(input_, paddings=paddings_, constant_values=constant_values_)
--- /dev/null
+# NOTE please use TF2.4.0-dev or above to use gelu op
+import tensorflow as tf
+
+tf.compat.v1.disable_eager_execution()
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+op_ = tf.nn.gelu(in_, approximate=False, name="Output")
--- /dev/null
+# NOTE please use TF2.4.0-dev or above to use gelu op
+import tensorflow as tf
+
+tf.compat.v1.disable_eager_execution()
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+op_ = tf.nn.gelu(in_, approximate=True, name="Output")
--- /dev/null
+import tensorflow as tf
+
+max_output_size = tf.compat.v1.constant(4)
+
+in_boxes_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(8, 4), name="Hole")
+in_scores_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(8), name="Hole")
+non_max_suppression_padded_ = tf.compat.v1.image.non_max_suppression_padded(
+ in_boxes_, in_scores_, max_output_size)
--- /dev/null
+import tensorflow as tf
+
+max_output_size = tf.compat.v1.constant(6)
+iou_threshold = tf.compat.v1.constant(0.5)
+score_threshold = tf.compat.v1.constant(0.6)
+pad_to_max_output_size = True
+
+in_boxes_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(12, 4), name="Hole")
+in_scores_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(12), name="Hole")
+
+non_max_suppression_padded_ = tf.compat.v1.image.non_max_suppression_padded(
+ in_boxes_, in_scores_, max_output_size, iou_threshold, score_threshold,
+ pad_to_max_output_size)
--- /dev/null
+import tensorflow as tf
+
+max_output_size = tf.compat.v1.constant(4)
+
+in_boxes_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(8, 4), name="Hole")
+in_scores_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(8), name="Hole")
+
+# non_max_suppression_with_scores requires TF 1.15+
+non_max_suppression_with_scores_ = tf.compat.v1.image.non_max_suppression_with_scores(
+ in_boxes_, in_scores_, max_output_size)
--- /dev/null
+import tensorflow as tf
+
+max_output_size = tf.compat.v1.constant(6)
+iou_threshold = tf.compat.v1.constant(0.5)
+score_threshold = tf.compat.v1.constant(0.6)
+soft_nms_sigma = tf.compat.v1.constant(0.5)
+
+in_boxes_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(12, 4), name="Hole")
+in_scores_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(12), name="Hole")
+
+# non_max_suppression_with_scores requires TF 1.15+
+non_max_suppression_with_scores_ = tf.compat.v1.image.non_max_suppression_with_scores(
+ in_boxes_, in_scores_, max_output_size, iou_threshold, score_threshold,
+ soft_nms_sigma)
minSdkVersion 26
targetSdkVersion 29
versionCode 1
- versionName "1.8.0"
+ versionName "1.9.0"
externalNativeBuild {
ndkBuild {
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
/* DO NOT EDIT THIS FILE - it is machine generated */
#include <jni.h>
/* Header for class com_samsung_onert_NativeSessionWrapper */
}
const PhaseOption &option() const { return _option; }
- const MemoryPoller &mem_poll() const { return _mem_poll; }
+ const MemoryPoller &mem_poll() const { return *_mem_poll; }
const Phase &at(const std::string &tag) const { return _phases.at(tag); }
private:
private:
const PhaseOption _option;
std::unordered_map<std::string, Phase> _phases;
- MemoryPoller _mem_poll;
+ std::unique_ptr<MemoryPoller> _mem_poll;
};
} // namespace benchmark
stop = (_phases.size() == 0);
}
- if (_rss_map[phase] == 0)
+ mem = getVmRSS();
+ if (_gpu_poll)
{
- uint32_t mem = getVmRSS();
- if (_gpu_poll)
- {
- mem += getGpuMemory();
- }
- _rss_map[phase] = mem;
+ mem += getGpuMemory();
}
+ if (mem > _rss_map[phase])
+ _rss_map[phase] = mem;
- if (_hwm_map[phase] == 0)
+ mem = getVmHWM();
+ if (_gpu_poll)
{
- uint32_t mem = getVmHWM();
- if (_gpu_poll)
- {
- mem += getGpuMemory();
- }
- _hwm_map[phase] = mem;
+ mem += getGpuMemory();
}
+ _hwm_map[phase] = mem;
- if (_pss_map[phase] == 0)
- {
- uint32_t mem = getPssSum();
+ mem = getPssSum();
+ if (mem > _pss_map[phase])
_pss_map[phase] = mem;
- }
if (stop)
{
namespace benchmark
{
-Phases::Phases(const PhaseOption &option)
- : _option(option),
- _mem_poll(std::chrono::milliseconds(option.memory_interval), option.memory_gpu)
+Phases::Phases(const PhaseOption &option) : _option(option)
{
- // DO NOTHING
+ if (_option.memory)
+ {
+ _mem_poll = std::make_unique<MemoryPoller>(std::chrono::milliseconds(option.memory_interval),
+ option.memory_gpu);
+ }
}
void Phases::run(const std::string &tag, const PhaseFunc &exec, const PhaseFunc *post,
for (uint32_t i = 0; i < loop_num; ++i)
{
if (!option_disable && _option.memory)
- _mem_poll.start(p);
+ _mem_poll->start(p);
uint64_t t = 0u;
t = nowMicros();
t = nowMicros() - t;
if (!option_disable && _option.memory)
- _mem_poll.end(p);
+ _mem_poll->end(p);
phase.time.emplace_back(t);
if (!option_disable && _option.memory)
{
- phase.memory[MemoryType::RSS].emplace_back(_mem_poll.getRssMap().at(p));
- phase.memory[MemoryType::HWM].emplace_back(_mem_poll.getHwmMap().at(p));
- phase.memory[MemoryType::PSS].emplace_back(_mem_poll.getPssMap().at(p));
+ phase.memory[MemoryType::RSS].emplace_back(_mem_poll->getRssMap().at(p));
+ phase.memory[MemoryType::HWM].emplace_back(_mem_poll->getHwmMap().at(p));
+ phase.memory[MemoryType::PSS].emplace_back(_mem_poll->getPssMap().at(p));
}
if (post)
NNFW_STATUS_INVALID_STATE = 3,
/** When it is out of memory */
NNFW_STATUS_OUT_OF_MEMORY = 4,
+ /** When it was given an insufficient output buffer */
+ NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE = 5,
} NNFW_STATUS;
/**
NNFW_STATUS nnfw_register_custom_op_info(nnfw_session *session, const char *id,
custom_kernel_registration_info *info);
+/**
+ * @brief Get the input tensor index by name
+ *
+ * This function finds an input tensor of the given name.
+ * If found, the index value is set to the address that @c index points to, and returns
+ * @c NNFW_STATUS_NO_ERROR. Otherwise, @c index is unchanged and returns @c NNFW_STATUS_ERROR .
+ *
+ * @note If two or more input tensors are of the same name, the one with the lowest index is always
+ * returned.
+ *
+ * @param[in] session the session object
+ * @param[in] tensorname the name of the tensor to find, a null terminated char pointer string
+ * @param[out] index the index to be ret
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_input_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index);
+
+/**
+ * @brief Get the input tensor index by name
+ *
+ * This function finds an input tensor of the given name.
+ * If found, the index value is set to the address that @c index points to, and returns
+ * @c NNFW_STATUS_NO_ERROR. Otherwise, @c index is unchanged and returns @c NNFW_STATUS_ERROR .
+ *
+ * @note If two or more input tensors are of the same name, the one with the lowest index is always
+ * returned.
+ *
+ * @param[in] session the session object
+ * @param[in] tensorname the name of the tensor to find, a null terminated char pointer string
+ * @param[out] index the index to be ret
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_output_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index);
+
#endif // __NNFW_EXPERIMENTAL_H__
* NNFW_VERSION is a uint32 value representing nnfw runtime version
* in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
*/
-#define NNFW_VERSION 0x01000800
+#define NNFW_VERSION 0x01000900
#endif // __NNFW_VERSION_H__
STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_UNEXPECTED_NULL, 2);
STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_INVALID_STATE, 3);
STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_OUT_OF_MEMORY, 4);
+STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE, 5);
STATIC_ASSERT_ENUM_CHECK(NNFW_LAYOUT_NONE, 0);
STATIC_ASSERT_ENUM_CHECK(NNFW_LAYOUT_CHANNELS_LAST, 1);
NNFW_RETURN_ERROR_IF_NULL(session);
return session->load_circle_from_buffer(buffer, size);
}
+
+NNFW_STATUS nnfw_input_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->input_tensorindex(tensorname, index);
+}
+
+NNFW_STATUS nnfw_output_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->output_tensorindex(tensorname, index);
+}
#include "CustomKernelRegistry.h"
#include "compiler/Compiler.h"
#include "util/ConfigSource.h"
+#include "util/Exceptions.h"
#include "exec/Execution.h"
#include "circle_loader.h"
#include "tflite_loader.h"
#define MAX_BACKEND_NAME_LENGTH 32
#define MAX_OP_NAME_LENGTH 64
#define MAX_PATH_LENGTH 1024
+#define MAX_TENSOR_NAME_LENGTH 64
// Is null-terminating in length ?
static bool null_terminating(const char *str, uint32_t length)
return onert::ir::Layout::UNKNOWN;
}
+NNFW_STATUS getTensorIndexImpl(const onert::ir::Graph &graph, const char *tensorname,
+ uint32_t *index, bool is_input)
+{
+ if (!tensorname || !index)
+ return NNFW_STATUS_UNEXPECTED_NULL;
+
+ if (!null_terminating(tensorname, MAX_TENSOR_NAME_LENGTH))
+ {
+ std::cerr << "nnpackage path is too long" << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+
+ auto ind_found = is_input ? graph.getInputIndex(tensorname) : graph.getOutputIndex(tensorname);
+
+ if (ind_found.undefined())
+ {
+ // Not found
+ return NNFW_STATUS_ERROR;
+ }
+ else
+ {
+ *index = ind_found.value();
+ return NNFW_STATUS_NO_ERROR;
+ }
+}
+
nnfw_session::nnfw_session()
: _subgraphs{nullptr}, _execution{nullptr},
_kernel_registry{std::make_shared<onert::frontend::custom::KernelRegistry>()}
{
_execution->execute();
}
+ catch (const onert::InsufficientBufferSizeException &e)
+ {
+ // Currently insufficient buffer always means output buffer.
+ std::cerr << "Error during nnfw_session::run : " << e.what() << std::endl;
+ return NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE;
+ }
catch (const std::exception &e)
{
std::cerr << "Error during nnfw_session::run : " << e.what() << std::endl;
}
}
+ auto ind = primary_subgraph()->getInputs().at(index);
+ auto &input = primary_subgraph()->operands().at(ind);
+
+ onert::ir::Shape new_shape(ti.rank);
+ for (int32_t i = 0; i < ti.rank; i++)
+ new_shape.dim(i) = ti.dims[i];
+
+ // if passed shape is same with the shape of model, do nothing
+ if (input.info().shape() == new_shape)
+ return NNFW_STATUS_NO_ERROR;
+
if (!isStatePreparedOrFinishedRun())
{
// In this case, if we apply input shape in primary_subgraph, it will propagate after
// compilation and excution
- auto ind = primary_subgraph()->getInputs().at(index);
- auto &input = primary_subgraph()->operands().at(ind);
-
- onert::ir::Shape new_shape(ti.rank);
- for (int32_t i = 0; i < ti.rank; i++)
- new_shape.dim(i) = ti.dims[i];
// overwrite input shape with the shape from ti
input.info().shape(new_shape);
}
else // when called after nnfw_session::prepare()
{
- onert::ir::Shape new_shape(ti.rank);
- for (int32_t i = 0; i < ti.rank; i++)
- new_shape.dim(i) = ti.dims[i];
-
_execution->changeInputShape(onert::ir::IOIndex(index), new_shape);
}
{
return isStatePrepared() || isStateFinishedRun();
}
+
+NNFW_STATUS nnfw_session::input_tensorindex(const char *tensorname, uint32_t *index)
+{
+ return getTensorIndexImpl(*primary_subgraph(), tensorname, index, true);
+}
+
+NNFW_STATUS nnfw_session::output_tensorindex(const char *tensorname, uint32_t *index)
+{
+ return getTensorIndexImpl(*primary_subgraph(), tensorname, index, false);
+}
NNFW_STATUS input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti);
NNFW_STATUS output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti);
- NNFW_STATUS register_custom_operation(const std::string &id, nnfw_custom_eval eval_func);
-
NNFW_STATUS set_available_backends(const char *backends);
NNFW_STATUS set_op_backend(const char *op, const char *backend);
NNFW_STATUS set_config(const char *key, const char *value);
NNFW_STATUS get_config(const char *key, char *value, size_t value_size);
-
NNFW_STATUS load_circle_from_buffer(uint8_t *buffer, size_t size);
+ //
+ // Experimental API
+ //
+
+ NNFW_STATUS register_custom_operation(const std::string &id, nnfw_custom_eval eval_func);
+ NNFW_STATUS input_tensorindex(const char *tensorname, uint32_t *index);
+ NNFW_STATUS output_tensorindex(const char *tensorname, uint32_t *index);
+
private:
onert::ir::Graph *primary_subgraph();
bool isStateInitialized();
#include "KernelGenerator.h"
#include "TensorManager.h"
#include "Optimizer.h"
+#include "AclTensorRegistry.h"
namespace onert
{
const auto &operands = graph.operands();
const auto &operations = graph.operations();
auto context = std::make_unique<BackendContext>(this, &graph);
- auto tb = std::make_shared<TensorBuilder>(operands, createTensorManager(is_linear_executor));
+ auto tm = createTensorManager(is_linear_executor);
+ auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
+ auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+ context->tensor_registry = tr;
context->tensor_builder = tb;
- context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
- context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb);
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
context->tensor_register = nullptr;
context->optimizer = std::make_shared<Optimizer>(context.get());
return context;
{
ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+ const std::shared_ptr<ITensorRegistry> &tensor_reg)
+ : acl_common::AclConstantInitializer{operands, tensor_reg}
{
// DO NOTHING
}
-void ConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
-{
- assert(node.getInputs().size() > index);
-
- const auto &input_index = node.getInputs().at(index);
- const auto &input_obj = _operands.at(input_index);
- registerCopyInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
-{
- assert(node.getInputs().size() > index);
-
- const auto &input_index = node.getInputs().at(index);
- const auto &input_obj = _operands.at(input_index);
- registerPermuteInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
-{
- const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
- const auto &block_size_obj = _operands.at(block_size_index);
-
- if (block_size_obj.isConstant())
- {
- _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
- assert(model_obj.data());
- const auto &shape = model_obj.shape();
- const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
- assert(model_obj.shape().rank() == 1);
- obj.access([&](ITensor &tensor) {
- for (size_t i = 0; i < shape.num_elements(); ++i)
- {
- const int32_t value = base[shape.num_elements() - i - 1];
- int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
- tensor.calcOffset({static_cast<int32_t>(i)}));
- *into = value;
- }
- });
- };
- }
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
- permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
- copyInputInitialize(node, ir::operation::Conv2D::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
- permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
- copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
-}
-
void ConstantInitializer::visit(const ir::operation::EmbeddingLookup &node)
{
copyInputInitialize(node, ir::operation::EmbeddingLookup::LOOKUPS);
}
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
- copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
- copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
-}
-
void ConstantInitializer::visit(const ir::operation::Gather &node)
{
copyInputInitialize(node, ir::operation::Gather::INDICES);
copyInputInitialize(node, ir::operation::HashtableLookup::KEYS);
}
-void ConstantInitializer::visit(const ir::operation::LSTM &node)
-{
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::RNN &node)
-{
- copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
- copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
- copyInputInitialize(node, ir::operation::RNN::BIAS);
-}
-
void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
{
const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE);
}
}
-void ConstantInitializer::visit(const ir::operation::TransposeConv &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- registerPermuteInitializer(kernel_index, kernel_obj);
-}
-
} // namespace acl_cl
} // namespace backend
} // namespace onert
#ifndef __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
-#include "TensorBuilder.h"
+#include "AclConstantInitializer.h"
namespace onert
{
namespace acl_cl
{
-class ConstantInitializer : public IConstantInitializer
+class ConstantInitializer : public acl_common::AclConstantInitializer
{
public:
ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
+ const std::shared_ptr<ITensorRegistry> &tensor_reg);
public:
- void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::EmbeddingLookup &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::HashtableLookup &) override;
- void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::SpaceToBatchND &) override;
- void visit(const ir::operation::TransposeConv &) override;
-
-private:
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
- void copyInputInitialize(const ir::Operation &node, uint32_t index);
- void permuteInputInitialize(const ir::Operation &node, uint32_t index);
-
-private:
- std::shared_ptr<TensorBuilder> _tensor_builder;
+ using acl_common::AclConstantInitializer::visit;
+ void visit(const ir::operation::EmbeddingLookup &) final;
+ void visit(const ir::operation::Gather &) final;
+ void visit(const ir::operation::HashtableLookup &) final;
+ void visit(const ir::operation::SpaceToBatchND &) final;
};
} // namespace acl_cl
namespace acl_cl
{
-using ::onert::backend::acl_common::asAclClFunction;
+using ::onert::backend::acl_common::asAclFunction;
using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
- ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclClFunction>;
+ ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclFunction>;
-KernelGenerator::KernelGenerator(const ir::Operands &operands_ctx,
- const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
+KernelGenerator::KernelGenerator(
+ const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
: _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
- _current_op_seq_layout(ir::Layout::UNKNOWN)
+ _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
{
// DO NOTHING
}
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
assert(_ctx.at(block_size_index).data());
- auto fn = std::make_unique<::arm_compute::CLBatchToSpaceLayer>();
+ auto fn = acl_common::generateLayer<arm_compute::CLBatchToSpaceLayer>(
+ ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
- fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Cast &node)
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
+ const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ const auto activation = node.param().activation;
- std::unique_ptr<::arm_compute::IFunction> fn;
- if (ifm_tensor->data_type() == ofm_tensor->data_type())
- {
- auto l = std::make_unique<::arm_compute::CLCopy>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+ const auto act_info = acl_common::asActivationLayerInfo(activation);
- fn = std::move(l);
- }
- else
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().arithmetic_type)
{
- auto l = std::make_unique<::arm_compute::CLCast>();
-
- // TODO Support converting float to int32 as round down
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
-
- fn = std::move(l);
+ case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLArithmeticAddition>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+ arm_compute::ConvertPolicy::SATURATE, act_info);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLArithmeticSubtraction>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+ arm_compute::ConvertPolicy::SATURATE, act_info);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLPixelWiseMultiplication>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
+ arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
+ act_info);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLArithmeticDivision>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), act_info);
+ break;
+ }
+ default:
+ assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
+ break;
}
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Conv2D &node)
ker_width, ker_height);
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
- auto fn = std::make_unique<::arm_compute::CLConvolutionLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
- ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
- ::arm_compute::Size2D(1U, 1U), act_info);
+ auto fn = acl_common::generateLayer<arm_compute::CLConvolutionLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+ ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
+ ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
- _return_fn = asAclClFunction(std::move(fn));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
{
- auto fn = std::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
-
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
- ofm_tensor->handle(), conv_info, multiplier, act_info);
+ auto fn = acl_common::generateLayer<arm_compute::CLDepthwiseConvolutionLayer>(
+ ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
+ conv_info, multiplier, act_info);
- _return_fn = asAclClFunction(std::move(fn));
+ _return_fn = asAclFunction(std::move(fn));
}
}
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
-
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
-
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
void KernelGenerator::visit(const ir::operation::Concat &node)
{
const auto ofm_index{node.getOutputs().at(0)};
return;
}
- auto output_tensor = _tensor_builder->at(ofm_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get();
std::vector<::arm_compute::ICLTensor *> input_tensors;
for (auto &ifm_ind : input_indexes)
- input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
+ input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
std::unique_ptr<::arm_compute::IFunction> fn;
if (input_indexes.size() < 2)
{
- auto l = std::make_unique<::arm_compute::CLCopy>();
- l->configure(input_tensors.at(0), output_tensor->handle());
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensors.at(0),
+ output_tensor->handle());
}
else
{
- auto l = std::make_unique<::arm_compute::CLConcatenateLayer>();
const auto rank = _ctx.at(ofm_index).shape().rank();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = output_tensor->layout();
const auto fixed_axis =
acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
- l->configure(input_tensors, output_tensor->handle(), fixed_axis);
- fn = std::move(l);
+ fn = acl_common::generateLayer<::arm_compute::CLConcatenateLayer>(
+ input_tensors, output_tensor->handle(), fixed_axis);
}
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::FullyConnected &node)
{
const auto output_index{node.getOutputs().at(0)};
- auto output_tensor = _tensor_builder->at(output_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
const auto activation = node.param().activation;
- auto fn = acl_common::kernelGenFullyConnected<acl_common::AclClFunction, ::arm_compute::ICLTensor,
+ auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor,
::arm_compute::CLFullyConnectedReshapingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout);
+ node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
_return_fn = std::make_unique<exec::FunctionSequence>(
std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
}
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLPixelWiseMultiplication>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
- arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
-
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
void KernelGenerator::visit(const ir::operation::Reduce &node)
{
const auto output_index{node.getOutputs().at(0)};
const auto keep_dims{node.param().keep_dims};
const auto reduce_type = node.param().reduce_type;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
// Convert to ACL axes taking into account negative values and possible duplicates.
const auto &axes = _ctx.at(axes_index);
std::unique_ptr<arm_compute::IFunction> fn;
if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
{
- auto l = std::make_unique<::arm_compute::CLReduceMean>();
-
const auto acl_axes =
acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
- l->configure(input_tensor->handle(), acl_axes, keep_dims, output_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::CLReduceMean>(input_tensor->handle(), acl_axes,
+ keep_dims, output_tensor->handle());
}
else
{
- auto l = std::make_unique<::arm_compute::CLReduceOperation>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
const auto acl_axes = acl_common::asSet(axes, input_rank, frontend_layout, backend_layout);
- l->configure(input_tensor->handle(), output_tensor->handle(), acl_axes, keep_dims,
- acl_common::convertReduceType(reduce_type));
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::CLReduceOperation>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ output_tensor->handle(), acl_axes, keep_dims, acl_common::convertReduceType(reduce_type));
}
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Reshape &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
// NOTE This operation must not be changed the layout from frontend to backend
// So, PermutationOperationPass makes layouts of frontend and backend the same.
UNUSED_RELEASE(frontend_layout);
UNUSED_RELEASE(backend_layout);
- auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Squeeze &node)
(void)dims;
(void)ndim;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
- auto fn = std::make_unique<arm_compute::CLReshapeLayer>();
- fn->configure(input_tensor->handle(), output_tensor->handle());
- auto acl_fn = asAclClFunction(std::move(fn));
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Tanh &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<arm_compute::CLActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Softmax &node)
const auto beta = node.param().beta;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLSoftmaxLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLSoftmaxLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ output_tensor->handle(), beta);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Slice &node)
const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
- auto outputData_tensor = _tensor_builder->at(output_index).get();
- auto inputData_tensor = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = inputData_tensor->layout();
ends_set.set(i, ends[i]);
}
- auto fn = std::make_unique<::arm_compute::CLSlice>();
-
- fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLSlice>(
+ inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::StridedSlice &node)
const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- auto outputData_tensor = _tensor_builder->at(output_index).get();
- auto inputData_tensor = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = inputData_tensor->layout();
strides_set.set(i, strides[i]);
}
- auto fn = std::make_unique<::arm_compute::CLStridedSlice>();
-
- fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
- strides_set, begin_mask, end_mask, shrink_axis_mask);
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLStridedSlice>(
+ inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
+ begin_mask, end_mask, shrink_axis_mask);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Transpose &node)
const auto rank = _ctx.at(ifm_idx).shape().rank();
- auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
- auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = ifm_tensor->layout();
auto backend_pv = ::onert::backend::acl_common::getARMComputePermutationVector(
rank, pv, frontend_layout, backend_layout);
- auto fn = std::make_unique<::arm_compute::CLPermute>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Add &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLArithmeticAddition>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
- arm_compute::ConvertPolicy::SATURATE);
+ auto fn = acl_common::generateLayer<::arm_compute::CLPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), backend_pv);
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Sub &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
- const auto activation = node.param().activation;
+ const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto fn = std::make_unique<::arm_compute::CLArithmeticSubtraction>();
+ const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
+ node.param().op_type, node.param().alpha, node.param().beta);
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
- arm_compute::ConvertPolicy::SATURATE);
+ auto fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), act_info);
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Div &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
- auto fn = std::make_unique<::arm_compute::CLArithmeticDivision>();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().op_type)
+ {
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLBinaryLogicalOp>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle(),
+ arm_compute::BinaryLogicalOperation::AND);
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLBitwiseOr>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLElementwiseMax>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLElementwiseMin>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ default:
+ {
+ std::string err_msg("acl_cl KernelGenerator : " + node.name() +
+ "is not elementwise-binary operations");
+ assert(false && err_msg.c_str());
+ break;
+ }
+ }
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Exp &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto fn = std::make_unique<::arm_compute::CLExpLayer>();
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().op_type)
+ {
+ case ir::operation::ElementwiseUnary::Type::ABS:
+ {
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
- fn->configure(input_tensor->handle(), output_tensor->handle());
+ fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
+ input_tensor->handle(), output_tensor->handle(), act_info);
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::CAST:
+ {
+ if (input_tensor->data_type() == output_tensor->data_type())
+ {
+ fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensor->handle(),
+ output_tensor->handle());
+ ;
+ }
+ else
+ {
+ // TODO Support converting float to int32 as round down
+ fn = acl_common::generateLayer<arm_compute::CLCast>(
+ input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+ }
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLDequantizationLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::EXP:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLExpLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::FLOOR:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLFloor>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLBitwiseNot>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::NEG:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLNeg>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::RSQRT:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLRsqrtLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::SQRT:
+ {
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
- auto acl_fn = asAclClFunction(std::move(fn));
+ fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
+ input_tensor->handle(), output_tensor->handle(), act_info);
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("acl_cl KernelGenerator : " + node.name() + "is not supported yet");
+ break;
+ }
+ }
+
+ auto acl_fn = asAclFunction(std::move(fn));
_return_fn = std::move(acl_fn);
}
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- fn->configure(input_tensor->handle(), output_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto gamma_tensor = _tensor_builder->at(gamma_index).get();
- auto beta_tensor = _tensor_builder->at(beta_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get();
+ auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get();
auto epsilon = node.param().epsilon;
auto activation = node.param().activation;
- auto fn = std::make_unique<::arm_compute::CLInstanceNormalizationLayerEx>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
- beta_tensor->handle(), epsilon);
+ auto fn = acl_common::generateLayer<arm_compute::CLInstanceNormalizationLayerEx>(
+ ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
+ epsilon);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Logistic &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
-
- auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLBinaryLogicalOp>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
- ::arm_compute::BinaryLogicalOperation::AND);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::LSTM &node)
{
- _return_fn = acl_common::kernelGenLSTM<acl_common::AclClFunction, ::arm_compute::ICLTensor,
- ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_builder);
+ _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ICLTensor,
+ ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_reg);
}
void KernelGenerator::visit(const ir::operation::Comparison &node)
const auto comparison_type = node.param().comparison_type;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLComparison>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
- (arm_compute::ComparisonOperation)comparison_type);
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get();
+ auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLComparison>(
+ input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
+ (arm_compute::ComparisonOperation)comparison_type);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Pack &node)
for (const auto &input_index : node.getInputs())
input_indexes.emplace_back(input_index);
- auto output = _tensor_builder->at(output_index).get()->handle();
+ auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
std::vector<arm_compute::ICLTensor *> inputs;
for (const auto &input_index : input_indexes)
- inputs.emplace_back(_tensor_builder->at(input_index)->handle());
+ inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(output_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout();
if (axis < 0)
axis += output_rank;
axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::CLStackLayer>();
-
// Disable applied dim_correction
std::vector<arm_compute::TensorShape> orig_inputs_acl_tensor_shapes;
for (const auto &input_index : input_indexes)
{
size_t input_rank = _ctx.at(input_index).shape().rank();
- const auto &input_tensor = _tensor_builder->at(input_index);
+ const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
orig_inputs_acl_tensor_shapes.emplace_back(input_tensor->info()->tensor_shape());
assert(input_rank == input_tensor->num_dimensions());
if (input_rank != input_tensor->info()->num_dimensions())
}
}
- fn->configure(inputs, axis, output);
+ auto fn = acl_common::generateLayer<arm_compute::CLStackLayer>(inputs, axis, output);
// Revert disabling applied dim_correction
assert(inputs.size() == orig_inputs_acl_tensor_shapes.size());
inputs.at(i)->info()->set_tensor_shape(orig_inputs_acl_tensor_shapes.at(i));
}
- _return_fn = asAclClFunction(std::move(fn));
+ _return_fn = asAclFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
+{
+ auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
+ node, _ctx, _tensor_reg, _current_op_seq_layout,
+ acl_common::convertPoolType(node.param().op_type));
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ const auto activation = node.param().activation;
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Permute &node)
const auto ofm_idx{node.getOutputs().at(0)};
const auto ifm_idx{node.getInputs().at(0)};
const auto permute_type = node.getPermuteType();
- auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
- auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
const auto rank = _ctx.at(ofm_idx).shape().rank();
assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
// WHCN -> CWHN
pv = arm_compute::PermutationVector{2, 0, 1};
- auto l = std::make_unique<::arm_compute::CLPermute>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::CLPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), pv);
}
else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4)
{
// CWHN -> WHCN
pv = arm_compute::PermutationVector{1, 2, 0};
- auto l = std::make_unique<::arm_compute::CLPermute>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<::arm_compute::CLPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), pv);
}
else
{
- auto l = std::make_unique<::arm_compute::CLCopy>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::CLCopy>(ifm_tensor->handle(), ofm_tensor->handle());
}
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLRsqrtLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- _return_fn = asAclClFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<arm_compute::CLActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto fn = std::make_unique<::arm_compute::CLScale>();
+ auto fn = acl_common::generateLayer<arm_compute::CLScale>(
+ ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
+ ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f),
+ ::arm_compute::SamplingPolicy::TOP_LEFT);
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
- ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
- ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU1 &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
-
- auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
+void KernelGenerator::visit(const ir::operation::ResizeNearestNeighbor &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
+ const auto ifm_index{node.getInputs().at(ir::operation::ResizeNearestNeighbor::Input::INPUT)};
- auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
+ auto fn = acl_common::generateLayer<arm_compute::CLScale>(
+ ifm_tensor->handle(), ofm_tensor->handle(),
+ ::arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, ::arm_compute::BorderMode::REPLICATE,
+ ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::RNN &node)
const auto activation = node.param().activation;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
- auto weights_tensor = _tensor_builder->at(weights_index).get();
- auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
- auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get();
+ auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+ auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get();
auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
- auto copy_layer = std::make_unique<::arm_compute::CLCopy>();
- copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
- _return_fn = asAclClFunction(std::move(copy_layer));
+ auto copy_layer = acl_common::generateLayer<arm_compute::CLCopy>(
+ hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
+ _return_fn = asAclFunction(std::move(copy_layer));
- auto fn = std::make_unique<::arm_compute::CLRNNLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(input_tensor->handle(), weights_tensor->handle(),
- recurrent_weights_tensor->handle(), bias_tensor->handle(),
- hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
- _return_fn = asAclClFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Floor &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLFloor>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ auto fn = acl_common::generateLayer<arm_compute::CLRNNLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
+ hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto block_size_tensor = _tensor_builder->at(block_size_index).get();
- auto paddings_tensor = _tensor_builder->at(paddings_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
+ auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get();
assert(_ctx.at(block_size_index).data());
assert(_ctx.at(paddings_index).data());
- std::unique_ptr<::arm_compute::IFunction> fn;
-
- auto l = std::make_unique<::arm_compute::CLSpaceToBatchLayer>();
- l->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
- ofm_tensor->handle());
- fn = std::move(l);
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLSpaceToBatchLayer>(
+ ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+ ofm_tensor->handle());
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
auto block_size = node.param().block_size;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLSpaceToDepthLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
-void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
+ auto fn = acl_common::generateLayer<arm_compute::CLSpaceToDepthLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), block_size);
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto lookups_tensor = _tensor_builder->at(lookups_index).get();
- auto values_tensor = _tensor_builder->at(values_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLEmbeddingLookup>();
-
- fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+ auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLEmbeddingLookup>(
+ values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::L2Normalization &node)
float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
float bias = 0.0f; // Don't offset the reduction.
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
radius, alpha, beta, bias, false);
- auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto hits_tensor = _tensor_builder->at(hits_index).get();
-
- auto lookups_tensor = _tensor_builder->at(lookups_index).get();
- auto keys_tensor = _tensor_builder->at(keys_index).get();
- auto values_tensor = _tensor_builder->at(values_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLHashtableLookup>();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get();
- fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
- output_tensor->handle(), hits_tensor->handle());
+ auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+ auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get();
+ auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLHashtableLookup>(
+ lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+ output_tensor->handle(), hits_tensor->handle());
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::PReLU &node)
const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto alpha_tensor = _tensor_builder->at(alpha_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLPReluLayer>();
-
- fn->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLPReluLayer>(
+ ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::TransposeConv &node)
invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
}
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
- auto fn = std::make_unique<::arm_compute::CLTransposeConvLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
- tconv_info, invalid_horizontal, invalid_vertical);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::SQRT &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
-
- auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLBitwiseOr>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLBitwiseNot>();
+ auto fn = acl_common::generateLayer<arm_compute::CLTransposeConvLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+ ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info, invalid_horizontal,
+ invalid_vertical);
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- auto fn = std::make_unique<::arm_compute::CLElementwiseSquaredDiff>();
+ auto fn = acl_common::generateLayer<arm_compute::CLElementwiseSquaredDiff>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::TopKV2 &node)
const auto k = node.param().k;
- auto values_tensor = _tensor_builder->at(outputValues_index).get();
- auto indices_tensor = _tensor_builder->at(outputIndices_index).get();
- auto input_tensor = _tensor_builder->at(inputData_index).get();
+ auto values_tensor = _tensor_reg->getAclTensor(outputValues_index).get();
+ auto indices_tensor = _tensor_reg->getAclTensor(outputIndices_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(inputData_index).get();
- auto fn = std::make_unique<::arm_compute::CLTopKV2>();
+ auto fn = acl_common::generateLayer<arm_compute::CLTopKV2>(
+ input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
- fn->configure(input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Gather &node)
const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto indices_tensor = _tensor_builder->at(indices_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get();
// NOTE The frontend layout and backend layout must be the same for this operation.
// If not the same, we have to add a stage(?) to perform permutation of output tensor. It
assert(backend_layout == indices_tensor->layout());
assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
- auto fn = std::make_unique<::arm_compute::CLGatherEx>();
-
// input is n-D, indices k-D, output is (n + k - 1)-D
size_t n = ifm_rank;
assert(n == ifm_tensor->num_dimensions());
acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
}
- fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
+ auto fn = acl_common::generateLayer<arm_compute::CLGatherEx>(
+ ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
// Revert disabling applied dim_correction
ifm_tensor->info()->set_tensor_shape(orig_ifm_acl_tensor_shape);
indices_tensor->info()->set_tensor_shape(orig_indice_acl_tensor_shape);
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLNeg>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
-
- auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::ArgMax &node)
assert((ifm_shape.rank() - 1) == ofm_shape.rank());
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
auto frontend_layout = _current_op_seq_layout;
auto backend_layout = ifm_tensor->layout();
auto acl_axis =
acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::CLArgMinMaxLayer>();
+ auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayer>(
+ ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
+ ::arm_compute::ReductionOperation::ARG_IDX_MAX);
- fn->configure(ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
- ::arm_compute::ReductionOperation::ARG_IDX_MAX);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Dequantize &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLDequantizationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
auto beta = node.param().beta;
auto bias = node.param().bias;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(
::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
- auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
auto block_size = node.param().block_size;
assert(block_size > 0);
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLDepthToSpaceLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLDepthToSpaceLayer>(
+ input_tensor->handle(), output_tensor->handle(), block_size);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Split &node)
for (const auto &output : node.getOutputs())
output_indexes.emplace_back(output);
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
std::vector<arm_compute::ICLTensor *> output_tensors;
for (const auto &ofm_ind : output_indexes)
- output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+ output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle());
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = ifm_tensor->layout();
axis += ifm_rank;
axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::CLSplit>();
-
- fn->configure(ifm_tensor->handle(), output_tensors, axis);
+ auto fn =
+ acl_common::generateLayer<arm_compute::CLSplit>(ifm_tensor->handle(), output_tensors, axis);
- _return_fn = asAclClFunction(std::move(fn));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Unpack &node)
for (const auto &output_index : node.getOutputs())
output_indexes.emplace_back(output_index);
- auto input = _tensor_builder->at(input_index).get()->handle();
+ auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
std::vector<arm_compute::ICLTensor *> outputs;
for (const auto &output_index : output_indexes)
- outputs.emplace_back(_tensor_builder->at(output_index)->handle());
+ outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
if (axis < 0)
axis += input_rank;
axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
for (const auto &output_index : output_indexes)
{
size_t output_rank = _ctx.at(output_index).shape().rank();
- const auto &output_tensor = _tensor_builder->at(output_index);
+ const auto &output_tensor = _tensor_reg->getAclTensor(output_index);
orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
assert(output_rank == output_tensor->num_dimensions());
if (output_rank != output_tensor->info()->num_dimensions())
}
}
- auto fn = std::make_unique<::arm_compute::CLUnstack>();
-
- fn->configure(input, outputs, axis);
+ auto fn = acl_common::generateLayer<arm_compute::CLUnstack>(input, outputs, axis);
- _return_fn = asAclClFunction(std::move(fn));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Pad &node)
auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset());
const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info);
- auto input = _tensor_builder->at(input_index).get()->handle();
- auto output = _tensor_builder->at(output_index).get()->handle();
+ auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
+ auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
::arm_compute::PaddingList padding_list;
padding_list.resize(rank);
acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
}
- auto fn = std::make_unique<::arm_compute::CLPadLayer>();
// Disable applied dim_correction
size_t input_rank = _ctx.at(input_index).shape().rank();
- const auto &input_tensor = _tensor_builder->at(input_index);
+ const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
assert(input_rank == input_tensor->num_dimensions());
if (input_rank != input_tensor->info()->num_dimensions())
{
_ctx.at(input_index).shape(), frontend_layout, backend_layout, false));
}
- fn->configure(input, output, padding_list, pixel_value);
+ auto fn =
+ acl_common::generateLayer<arm_compute::CLPadLayer>(input, output, padding_list, pixel_value);
// Do not revert disabling applied dim_correction CLPadKernel has cl kernel for 4-dimension
// It would produce a mistach of result
- _return_fn = asAclClFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLElementwiseMin>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLElementwiseMax>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp32ToFp16::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
- 0);
+ auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp16ToFp32::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
- 0);
+ auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
} // namespace acl_cl
#include "ir/Operands.h"
#include "TensorBuilder.h"
+#include "AclTensorRegistry.h"
+#include "TensorManager.h"
namespace onert
{
{
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
void visit(const ir::OpSequence &) override;
void visit(const ir::operation::BatchToSpaceND &) override;
+ void visit(const ir::operation::BinaryArithmetic &) override;
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::MaxPool2D &) override;
- void visit(const ir::operation::AvgPool2D &) override;
void visit(const ir::operation::Concat &) override;
void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Mul &) override;
void visit(const ir::operation::Reduce &) override;
void visit(const ir::operation::Reshape &) override;
void visit(const ir::operation::Squeeze &) override;
- void visit(const ir::operation::Tanh &) override;
void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::Slice &) override;
void visit(const ir::operation::StridedSlice &) override;
void visit(const ir::operation::Transpose &) override;
- void visit(const ir::operation::Add &) override;
- void visit(const ir::operation::Sub &) override;
- void visit(const ir::operation::Cast &) override;
- void visit(const ir::operation::Div &) override;
- void visit(const ir::operation::Exp &) override;
+ void visit(const ir::operation::ElementwiseActivation &) override;
+ void visit(const ir::operation::ElementwiseBinary &) override;
+ void visit(const ir::operation::ElementwiseUnary &) override;
void visit(const ir::operation::ExpandDims &) override;
void visit(const ir::operation::InstanceNorm &) override;
- void visit(const ir::operation::Logistic &) override;
void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::LogicalAnd &) override;
void visit(const ir::operation::LSTM &) override;
void visit(const ir::operation::Pack &) override;
+ void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Permute &) override;
- void visit(const ir::operation::RSQRT &) override;
- void visit(const ir::operation::ReLU &) override;
void visit(const ir::operation::ResizeBilinear &) override;
- void visit(const ir::operation::ReLU1 &) override;
- void visit(const ir::operation::ReLU6 &) override;
+ void visit(const ir::operation::ResizeNearestNeighbor &) override;
void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::Floor &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
void visit(const ir::operation::SpaceToDepth &) override;
- void visit(const ir::operation::L2Pool2D &) override;
void visit(const ir::operation::EmbeddingLookup &) override;
void visit(const ir::operation::L2Normalization &) override;
void visit(const ir::operation::HashtableLookup &) override;
void visit(const ir::operation::PReLU &) override;
void visit(const ir::operation::TransposeConv &) override;
- void visit(const ir::operation::SQRT &) override;
- void visit(const ir::operation::LogicalOr &) override;
- void visit(const ir::operation::LogicalNot &) override;
void visit(const ir::operation::SquaredDifference &) override;
void visit(const ir::operation::TopKV2 &) override;
void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::Neg &) override;
- void visit(const ir::operation::Abs &) override;
void visit(const ir::operation::ArgMax &) override;
- void visit(const ir::operation::Dequantize &) override;
void visit(const ir::operation::LocalResponseNormalization &) override;
void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::Split &) override;
void visit(const ir::operation::Unpack &) override;
void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::Min &) override;
- void visit(const ir::operation::Max &) override;
void visit(const ir::operation::ConvertFp32ToFp16 &) override;
void visit(const ir::operation::ConvertFp16ToFp32 &) override;
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
ir::Layout _current_op_seq_layout;
};
#include "ParentInfo.h"
#include <cassert>
-#include <ir/LoweredGraph.h>
+#include <compiler/LoweredGraph.h>
#include <util/logging.h>
#include "AclSubTensorAnalyzer.h"
using TensorManager =
acl_common::AclTensorManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
-TensorManager *createTensorManager(bool is_linear_executor)
+inline TensorManager *createTensorManager(bool is_linear_executor)
{
if (is_linear_executor)
{
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AclConstantInitializer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+AclConstantInitializer::AclConstantInitializer(const ir::Operands &operands,
+ const std::shared_ptr<ITensorRegistry> &tensor_reg)
+ : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
+{
+ // DO NOTHING
+}
+
+void AclConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
+{
+ assert(node.getInputs().size() > index);
+
+ const auto &input_index = node.getInputs().at(index);
+ const auto &input_obj = _operands.at(input_index);
+ registerCopyInitializer(input_index, input_obj);
+}
+
+void AclConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
+{
+ assert(node.getInputs().size() > index);
+
+ const auto &input_index = node.getInputs().at(index);
+ const auto &input_obj = _operands.at(input_index);
+ registerPermuteInitializer(input_index, input_obj);
+}
+
+void AclConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
+{
+ const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
+ const auto &block_size_obj = _operands.at(block_size_index);
+
+ if (block_size_obj.isConstant())
+ {
+ _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
+ assert(model_obj.data());
+ const auto &shape = model_obj.shape();
+ const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
+ assert(model_obj.shape().rank() == 1);
+ obj.access([&](ITensor &tensor) {
+ for (size_t i = 0; i < shape.num_elements(); ++i)
+ {
+ const int32_t value = base[shape.num_elements() - i - 1];
+ int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
+ tensor.calcOffset({static_cast<int32_t>(i)}));
+ *into = value;
+ }
+ });
+ };
+ }
+}
+
+void AclConstantInitializer::visit(const ir::operation::Conv2D &node)
+{
+ permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
+ copyInputInitialize(node, ir::operation::Conv2D::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
+{
+ permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
+ copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::FullyConnected &node)
+{
+ copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
+ copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::LSTM &node)
+{
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
+ copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
+ copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
+ copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::RNN &node)
+{
+ copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
+ copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::RNN::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::TransposeConv &node)
+{
+ permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL);
+}
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
+#define __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
+
+#include <backend/IConstantInitializer.h>
+#include <ir/Operands.h>
+#include "AclTensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+class AclConstantInitializer : public IConstantInitializer
+{
+public:
+ AclConstantInitializer(const ir::Operands &operands,
+ const std::shared_ptr<ITensorRegistry> &tensor_reg);
+
+public:
+ void visit(const ir::operation::BatchToSpaceND &) override;
+ void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::DepthwiseConv2D &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+ void visit(const ir::operation::LSTM &) override;
+ void visit(const ir::operation::RNN &) override;
+ void visit(const ir::operation::TransposeConv &) override;
+
+protected:
+ void copyInputInitialize(const ir::Operation &node, uint32_t index);
+ void permuteInputInitialize(const ir::Operation &node, uint32_t index);
+
+private:
+ std::shared_ptr<ITensorRegistry> tensor_registry() const final { return _tensor_reg; }
+
+protected:
+ std::shared_ptr<ITensorRegistry> _tensor_reg;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
std::unique_ptr<::arm_compute::IFunction> _func;
};
-class AclClFunction : public AclFunction
-{
-public:
- using AclFunction::AclFunction;
-};
-
} // namespace acl_common
} // namespace backend
} // namespace onert
namespace acl_common
{
+template <typename Layer, typename... Args>
+std::unique_ptr<arm_compute::IFunction> generateLayer(Args &&... args)
+{
+ auto l = std::make_unique<Layer>();
+
+ l->configure(std::forward<Args>(args)...);
+
+ return l;
+}
+
+template <typename Layer, typename... Args>
+std::unique_ptr<arm_compute::IFunction>
+generateLayer(std::shared_ptr<arm_compute::IMemoryManager> memory_manager, Args &&... args)
+{
+ auto l = std::make_unique<Layer>(memory_manager);
+
+ l->configure(std::forward<Args>(args)...);
+
+ return l;
+}
+
template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
- typename T_TensorBuilder>
-std::unique_ptr<exec::IFunction>
-kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
- const std::shared_ptr<T_TensorBuilder> &tensor_builder)
+ typename T_TensorRegistry>
+std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
+ const ir::Operands &operands,
+ const std::shared_ptr<T_TensorRegistry> &tensor_reg)
{
// TODO Support dynamic rnn
// TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
const auto projection_clip = projection_threshold;
assert(cell_clip >= 0.f && projection_clip >= 0.f);
- auto scratch_buffer_tensor = tensor_builder->at(scratch_buffer_index).get();
- auto output_state_out_tensor = tensor_builder->at(output_state_out_index).get();
- auto cell_state_out_tensor = tensor_builder->at(cell_state_out_index).get();
- auto output_tensor = tensor_builder->at(output_index).get();
+ auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index).get();
+ auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index).get();
+ auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index).get();
+ auto output_tensor = tensor_reg->getAclTensor(output_index).get();
- auto input_tensor = tensor_builder->at(input_index).get();
+ auto input_tensor = tensor_reg->getAclTensor(input_index).get();
- auto input_to_forget_weights_tensor = tensor_builder->at(input_to_forget_weights_index).get();
- auto input_to_cell_weights_tensor = tensor_builder->at(input_to_cell_weights_index).get();
- auto input_to_output_weights_tensor = tensor_builder->at(input_to_output_weights_index).get();
+ auto input_to_forget_weights_tensor =
+ tensor_reg->getAclTensor(input_to_forget_weights_index).get();
+ auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index).get();
+ auto input_to_output_weights_tensor =
+ tensor_reg->getAclTensor(input_to_output_weights_index).get();
auto recurrent_to_forget_weights_tensor =
- tensor_builder->at(recurrent_to_forget_weights_index).get();
- auto recurrent_to_cell_weights_tensor = tensor_builder->at(recurrent_to_cell_weights_index).get();
+ tensor_reg->getAclTensor(recurrent_to_forget_weights_index).get();
+ auto recurrent_to_cell_weights_tensor =
+ tensor_reg->getAclTensor(recurrent_to_cell_weights_index).get();
auto recurrent_to_output_weights_tensor =
- tensor_builder->at(recurrent_to_output_weights_index).get();
+ tensor_reg->getAclTensor(recurrent_to_output_weights_index).get();
- auto forget_gate_bias_tensor = tensor_builder->at(forget_gate_bias_index).get();
- auto cell_bias_tensor = tensor_builder->at(cell_bias_index).get();
- auto output_gate_bias_tensor = tensor_builder->at(output_gate_bias_index).get();
- auto output_state_in_tensor = tensor_builder->at(output_state_in_index).get();
- auto cell_state_in_tensor = tensor_builder->at(cell_state_in_index).get();
+ auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index).get();
+ auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index).get();
+ auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index).get();
+ auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index).get();
+ auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index).get();
- auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
-
- auto fn = std::make_unique<T_ACLLayer>();
+ auto act_info = asActivationLayerInfo(activation);
::arm_compute::LSTMParams<T_Tensor> lstm_params{};
if (has_cifg_param)
{
auto input_to_input_weights_tensor =
- tensor_builder->at(input_to_input_weights_index).get(); // optional
+ tensor_reg->getAclTensor(input_to_input_weights_index).get(); // optional
auto recurrent_to_input_weights_tensor =
- tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
+ tensor_reg->getAclTensor(recurrent_to_input_weights_index).get(); // optional
auto cell_to_input_weights_handle =
- has_peephole_param ? tensor_builder->at(cell_to_input_weights_index).get()->handle()
+ has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index).get()->handle()
: nullptr; // optional (non-cifg && peephole)
- auto input_gate_bias_tensor = tensor_builder->at(input_gate_bias_index).get(); // optional
+ auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index).get(); // optional
lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(),
recurrent_to_input_weights_tensor->handle(),
cell_to_input_weights_handle, input_gate_bias_tensor->handle());
if (has_peephole_param)
{
auto cell_to_forget_weights_tensor =
- tensor_builder->at(cell_to_forget_weights_index).get(); // optional
+ tensor_reg->getAclTensor(cell_to_forget_weights_index).get(); // optional
auto cell_to_output_weights_tensor =
- tensor_builder->at(cell_to_output_weights_index).get(); // optional
+ tensor_reg->getAclTensor(cell_to_output_weights_index).get(); // optional
lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(),
cell_to_output_weights_tensor->handle());
}
if (has_projection_param)
{
- auto projection_weights_tensor = tensor_builder->at(projection_weights_index).get(); // optional
- auto projection_bias_handle = has_projection_bias
- ? tensor_builder->at(projection_bias_index).get()->handle()
- : nullptr; // optional
+ auto projection_weights_tensor =
+ tensor_reg->getAclTensor(projection_weights_index).get(); // optional
+ auto projection_bias_handle =
+ has_projection_bias ? tensor_reg->getAclTensor(projection_bias_index).get()->handle()
+ : nullptr; // optional
lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle);
}
- fn->configure(input_tensor->handle(), input_to_forget_weights_tensor->handle(),
- input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
- recurrent_to_forget_weights_tensor->handle(),
- recurrent_to_cell_weights_tensor->handle(),
- recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
- cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
- output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
- scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
- cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info,
- cell_clip, projection_clip);
+ auto fn = generateLayer<T_ACLLayer>(
+ input_tensor->handle(), input_to_forget_weights_tensor->handle(),
+ input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
+ recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(),
+ recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
+ cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
+ output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
+ scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
+ cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info, cell_clip,
+ projection_clip);
return std::make_unique<T_FunctionWrapper>(std::move(fn));
}
template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
- typename T_TensorBuilder>
+ typename T_TensorBuilder, typename T_TensorRegistry>
std::unique_ptr<exec::IFunction>
kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Operands &operands,
- const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout)
+ const std::shared_ptr<T_TensorBuilder> &tensor_builder,
+ const std::shared_ptr<T_TensorRegistry> &tensor_reg, ir::Layout layout)
{
using ir::operation::FullyConnected;
reshape.dim(1) = input_size; /* W */
}
- auto output_tensor = tensor_builder->at(output_index).get();
- const auto input_tensor = tensor_builder->at(input_index).get();
- const auto weight_tensor = tensor_builder->at(weight_index).get();
- const auto bias_tensor = tensor_builder->at(bias_index).get();
+ auto output_tensor = tensor_reg->getAclTensor(output_index).get();
+ const auto input_tensor = tensor_reg->getAclTensor(input_index).get();
+ const auto weight_tensor = tensor_reg->getAclTensor(weight_index).get();
+ const auto bias_tensor = tensor_reg->getAclTensor(bias_index).get();
const auto frontend_layout = layout;
const auto acl_layout = output_tensor->handle()->info()->data_layout();
- auto fn =
- std::make_unique<T_ACLLayer>(tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
typename T_ACLLayer::KernelType kernel_type = T_ACLLayer::KernelType::GENERAL;
if (operands.at(weight_index).isConstant())
{
assert(operands.at(weight_index).data());
}
- fn->configure(
- input_tensor->handle(), weight_tensor->handle(), bias_tensor->handle(),
- output_tensor->handle(), needs_reshape,
- ::onert::backend::acl_common::asTensorShape(
- reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)),
- kernel_type);
+ auto fn = generateLayer<T_ACLLayer>(
+ tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ weight_tensor->handle(), bias_tensor->handle(), output_tensor->handle(), needs_reshape,
+ asTensorShape(reshape, frontend_layout, asRuntimeLayout(acl_layout)), kernel_type);
return std::make_unique<T_FunctionWrapper>(std::move(fn));
}
-template <typename T_ACLLayer, typename T_PoolOp, typename T_TensorBuilder>
+template <typename T_ACLLayer, typename T_PoolOp, typename T_AclTensorRegistry>
std::unique_ptr<::arm_compute::IFunction>
kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
- const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout,
+ const std::shared_ptr<T_AclTensorRegistry> &tensor_reg, ir::Layout layout,
::arm_compute::PoolingType pooling_type)
{
const auto ofm_index{node.getOutputs().at(0)};
VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl;
VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl;
- auto ofm_tensor = tensor_builder->at(ofm_index).get();
- auto ifm_tensor = tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = tensor_reg->getAclTensor(ifm_index).get();
::arm_compute::PoolingLayerInfo info{
pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
- acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
-
- auto fn = std::make_unique<T_ACLLayer>();
+ asPadStrideInfo(padding, stride), true /* exclude_padding */};
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), info);
+ auto fn = generateLayer<T_ACLLayer>(ifm_tensor->handle(), ofm_tensor->handle(), info);
return fn;
}
#include "ir/OperandIndexMap.h"
#include <ir/Operands.h>
#include "AclTensorManager.h"
+#include "AclTensorRegistry.h"
#include <memory>
#include "ParentInfo.h"
#include <util/Utils.h>
public:
using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>;
- AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr);
+ AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
+ const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg);
/**
* @brief Register tensor information to allocate on ACL-CL backend
void notifyLastUse(const ir::OperandIndex &) override;
bool isRegistered(const ir::OperandIndex &) const override;
- std::shared_ptr<backend::ITensorRegistry> tensorRegistry() override { return nullptr; }
void prepare(void) override;
void allocate() override;
void postFunctionPrepare() override;
- std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override;
- void iterate(const IterateFunction &fn) override;
-
std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
- std::shared_ptr<T_ITensor> at(const ir::OperandIndex &ind);
-
T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); }
void setUsesCount(const ir::OperandIndex &index, size_t num_uses)
*/
bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child);
- bool supportDynamicTensor() override { return false; }
-
private:
void buildTensors(void);
ir::OperandIndex findRootParent(ir::OperandIndex index);
ir::OperandIndexMap<size_t> _uses_count_map;
std::unique_ptr<T_AclTensorManager> _tensor_mgr;
+ std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> _tensor_reg;
// for linear executor
std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq;
{
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(const ir::Operands &operands,
- T_AclTensorManager *tensor_mgr)
- : _operands{operands}, _tensor_mgr{tensor_mgr}
+AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(
+ const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
+ const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg)
+ : _operands{operands}, _tensor_mgr{tensor_mgr}, _tensor_reg{tensor_reg}
{
assert(_tensor_mgr);
}
}
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::shared_ptr<ITensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::tensorAt(const ir::OperandIndex &ind)
-{
- return _tensor_mgr->at(ind);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::iterate(const IterateFunction &fn)
-{
- _tensor_mgr->iterate(fn);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::shared_ptr<T_ITensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::at(const ir::OperandIndex &ind)
-{
- auto ret = _tensor_mgr->at(ind);
- assert(ret != nullptr);
- return ret;
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
std::unique_ptr<ITensorManager>
AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::releaseStaticTensorManager(void)
{
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__
+#define __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__
+
+#include "backend/ITensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+/**
+ * @brief Tensor registry class for acl backends
+ *
+ * This is implemented as a wrapper of AclTensorManager.
+ */
+template <typename T_AclTensorManager> class AclTensorRegistry : public ITensorRegistry
+{
+public:
+ AclTensorRegistry(T_AclTensorManager *tensor_mgr) : _tensor_mgr{tensor_mgr} {}
+
+ std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override
+ {
+ return _tensor_mgr->at(ind);
+ }
+
+ std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override
+ {
+ return getITensor(ind);
+ }
+
+ auto getAclTensor(const ir::OperandIndex &ind) { return _tensor_mgr->at(ind); }
+
+private:
+ T_AclTensorManager *_tensor_mgr;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__
#include "Swizzle.h"
#include "ir/DataType.h"
+#include "ir/operation/ElementwiseActivation.h"
#include <memory>
namespace
}
}
+::arm_compute::ActivationLayerInfo
+asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type, float alpha,
+ float beta)
+{
+ switch (op_type)
+ {
+ case ir::operation::ElementwiseActivation::Type::RELU:
+ if (beta == 0.f)
+ {
+ if (alpha == ir::operation::ElementwiseActivation::infinity)
+ {
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+ }
+ else
+ {
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, alpha};
+ }
+ }
+ else
+ {
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, alpha, beta};
+ }
+ case ir::operation::ElementwiseActivation::Type::TANH:
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, alpha, beta};
+ case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+ // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
+ // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
+ // 0(always sigmoid) regardless of values of the parameter.
+ // If ACL support non-sigmoid logistic, should fix param values.
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
+ case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
+ default:
+ throw std::runtime_error{"Not supported, yet"};
+ break;
+ }
+}
+
arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank,
ir::Layout frontend_layout, ir::Layout backend_layout)
{
return std::make_unique<AclFunction>(std::move(layer));
}
-std::unique_ptr<AclClFunction> asAclClFunction(std::unique_ptr<::arm_compute::IFunction> &&layer)
-{
- return std::make_unique<AclClFunction>(std::move(layer));
-}
-
ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout)
{
switch (data_layout)
}
}
+arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir)
+{
+ switch (pool_type_ir)
+ {
+ case ir::operation::Pool2D::PoolType::AVG:
+ return arm_compute::PoolingType::AVG;
+ case ir::operation::Pool2D::PoolType::L2:
+ return arm_compute::PoolingType::L2;
+ case ir::operation::Pool2D::PoolType::MAX:
+ return arm_compute::PoolingType::MAX;
+ default:
+ throw std::runtime_error("convertPoolType: Not supported operation yet");
+ }
+}
+
arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
{
switch (reduce_type_ir)
#include "ir/Layout.h"
#include "ir/InternalType.h"
#include "ir/Operand.h"
+#include "ir/operation/Pool2D.h"
#include "ir/operation/Reduce.h"
+#include "ir/operation/ElementwiseActivation.h"
#include "ir/Shape.h"
#include "ir/TypeInfo.h"
#include "ir/Coordinates.h"
const ir::Stride &stride);
::arm_compute::ActivationLayerInfo asActivationLayerInfo(ir::Activation act_code);
+::arm_compute::ActivationLayerInfo
+asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type, float alpha,
+ float beta);
arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank,
ir::Layout frontend_layout, ir::Layout backend_layout);
ir::Layout backend_layout);
std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunction> &&layer);
-std::unique_ptr<AclClFunction> asAclClFunction(std::unique_ptr<::arm_compute::IFunction> &&layer);
template <typename T_Function>
std::unique_ptr<T_Function> asFunction(std::unique_ptr<::arm_compute::IFunction> &&fn)
ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout);
ir::DataType asRuntimeDataType(::arm_compute::DataType data_type);
+arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir);
arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir);
} // namespace acl_common
const auto &operands = graph.operands();
const auto &operations = graph.operations();
auto context = std::make_unique<BackendContext>(this, &graph);
- auto tb = std::make_shared<TensorBuilder>(operands, createTensorManager(is_linear_executor));
+ auto tm = createTensorManager(is_linear_executor);
+ auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
+ auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+ context->tensor_registry = tr;
context->tensor_builder = tb;
- context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
- context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb);
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
context->tensor_register = nullptr;
context->optimizer = std::make_shared<Optimizer>(context.get());
return context;
{
ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+ const std::shared_ptr<ITensorRegistry> &tensor_reg)
+ : acl_common::AclConstantInitializer{operands, tensor_reg}
{
// DO NOTHING
}
-void ConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
-{
- assert(node.getInputs().size() > index);
-
- const auto &input_index = node.getInputs().at(index);
- const auto &input_obj = _operands.at(input_index);
- registerCopyInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
-{
- assert(node.getInputs().size() > index);
-
- const auto &input_index = node.getInputs().at(index);
- const auto &input_obj = _operands.at(input_index);
- registerPermuteInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
-{
- const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
- const auto &block_size_obj = _operands.at(block_size_index);
-
- if (block_size_obj.isConstant())
- {
- _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
- assert(model_obj.data());
- const auto &shape = model_obj.shape();
- const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
- assert(model_obj.shape().rank() == 1);
- obj.access([&](ITensor &tensor) {
- for (size_t i = 0; i < shape.num_elements(); ++i)
- {
- const int32_t value = base[shape.num_elements() - i - 1];
- int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
- tensor.calcOffset({static_cast<int32_t>(i)}));
- *into = value;
- }
- });
- };
- }
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
- permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
- copyInputInitialize(node, ir::operation::Conv2D::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
- permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
- copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
- copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
- copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::LSTM &node)
-{
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::RNN &node)
-{
- copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
- copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
- copyInputInitialize(node, ir::operation::RNN::BIAS);
-}
-
void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
{
const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE);
}
}
-void ConstantInitializer::visit(const ir::operation::TransposeConv &node)
-{
- permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL);
-}
-
} // namespace acl_neon
} // namespace backend
} // namespace onert
#ifndef __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
-#include "TensorBuilder.h"
+#include "AclConstantInitializer.h"
namespace onert
{
namespace acl_neon
{
-class ConstantInitializer : public IConstantInitializer
+class ConstantInitializer : public acl_common::AclConstantInitializer
{
public:
ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
+ const std::shared_ptr<ITensorRegistry> &tensor_reg);
public:
- void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::SpaceToBatchND &) override;
- void visit(const ir::operation::TransposeConv &) override;
-
-private:
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
- void copyInputInitialize(const ir::Operation &node, uint32_t index);
- void permuteInputInitialize(const ir::Operation &node, uint32_t index);
-
-private:
- std::shared_ptr<TensorBuilder> _tensor_builder;
+ using acl_common::AclConstantInitializer::visit;
+ void visit(const ir::operation::SpaceToBatchND &node) final;
};
} // namespace acl_neon
using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
::arm_compute::ITensor, ::arm_compute::NEActivationLayer, acl_common::AclFunction>;
-KernelGenerator::KernelGenerator(const ir::Operands &operands_ctx,
- const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
+KernelGenerator::KernelGenerator(
+ const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
: _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
- _current_op_seq_layout(ir::Layout::UNKNOWN)
+ _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
{
// DO NOTHING
}
}
}
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
-
- auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
void KernelGenerator::visit(const ir::operation::ArgMax &node)
{
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
auto frontend_layout = _current_op_seq_layout;
auto backend_layout = ifm_tensor->layout();
const auto fixed_axis =
acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::NEArgMinMaxLayer>();
-
- fn->configure(ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
- arm_compute::ReductionOperation::ARG_IDX_MAX);
+ auto fn = acl_common::generateLayer<arm_compute::NEArgMinMaxLayer>(
+ ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
+ arm_compute::ReductionOperation::ARG_IDX_MAX);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
assert(_ctx.at(block_size_index).data());
- auto fn = std::make_unique<::arm_compute::NEBatchToSpaceLayer>();
-
- fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::NEBatchToSpaceLayer>(
+ ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Cast &node)
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
+ const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- std::unique_ptr<::arm_compute::IFunction> fn;
- if (ifm_tensor->data_type() == ofm_tensor->data_type())
- {
- auto l = std::make_unique<::arm_compute::NECopy>();
+ const auto activation = node.param().activation;
- l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- fn = std::move(l);
- }
- else
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().arithmetic_type)
{
- auto l = std::make_unique<::arm_compute::NECast>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
-
- fn = std::move(l);
+ case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEArithmeticAddition>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+ arm_compute::ConvertPolicy::SATURATE);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEArithmeticSubtraction>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+ arm_compute::ConvertPolicy::SATURATE);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+ {
+ // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
+ fn = acl_common::generateLayer<arm_compute::NEPixelWiseMultiplication>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
+ arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEElementwiseDivision>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+ break;
+ }
+ default:
+ assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
+ break;
}
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Conv2D &node)
ker_width, ker_height);
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
- auto fn = std::make_unique<::arm_compute::NEConvolutionLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
- ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
- ::arm_compute::Size2D(1U, 1U), act_info);
+ auto fn = acl_common::generateLayer<arm_compute::NEConvolutionLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+ ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
+ ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
_return_fn = asAclFunction(std::move(fn));
}
auto block_size = node.param().block_size;
assert(block_size > 0);
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto fn = std::make_unique<::arm_compute::NEDepthToSpaceLayer>();
+ auto fn = acl_common::generateLayer<arm_compute::NEDepthToSpaceLayer>(
+ input_tensor->handle(), output_tensor->handle(), block_size);
- fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
{
- auto fn = std::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
-
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
- ofm_tensor->handle(), conv_info, multiplier, act_info);
+ auto fn = acl_common::generateLayer<arm_compute::NEDepthwiseConvolutionLayer>(
+ ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
+ conv_info, multiplier, act_info);
_return_fn = asAclFunction(std::move(fn));
}
}
-void KernelGenerator::visit(const ir::operation::Dequantize &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEDequantizationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
-
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
-
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
void KernelGenerator::visit(const ir::operation::Concat &node)
{
const auto ofm_index{node.getOutputs().at(0)};
return;
}
- auto output_tensor = _tensor_builder->at(ofm_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get();
std::vector<::arm_compute::ITensor *> input_tensors;
for (const auto &ifm_ind : input_indexes)
- input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
+ input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
std::unique_ptr<::arm_compute::IFunction> fn;
if (input_indexes.size() < 2)
{
- auto l = std::make_unique<::arm_compute::NECopy>();
- l->configure(input_tensors.at(0), output_tensor->handle());
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensors.at(0),
+ output_tensor->handle());
}
else
{
- auto l = std::make_unique<::arm_compute::NEConcatenateLayer>();
const auto rank = _ctx.at(ofm_index).shape().rank();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = output_tensor->layout();
const auto fixed_axis =
acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
- l->configure(input_tensors, output_tensor->handle(), fixed_axis);
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEConcatenateLayer>(
+ input_tensors, output_tensor->handle(), fixed_axis);
}
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
{
- const auto output_index{node.getOutputs().at(0)};
- const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
- const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
+
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+
+ const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
+ node.param().op_type, node.param().alpha, node.param().beta);
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto lookups_tensor = _tensor_builder->at(lookups_index).get();
- auto values_tensor = _tensor_builder->at(values_index).get();
+ std::unique_ptr<arm_compute::IFunction> fn;
+ if (node.param().op_type == ir::operation::ElementwiseActivation::Type::LOGISTIC)
+ {
+ // NOTE NEActivationLayer can generate produce erroneous results. it were caused by
+ // 'vexpq_f32()'.
+ // The neon function returns a value outside of the limit of representation in float as 'NaN'
+ // instead of 'INF', and then the result of this op will be errors due to the 'NaN'.
+ fn = acl_common::generateLayer<arm_compute::NEActivationLayerEx>(
+ ifm_tensor->handle(), ofm_tensor->handle(), act_info);
+ }
+ else
+ {
+ fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(ifm_tensor->handle(),
+ ofm_tensor->handle(), act_info);
+ }
- auto fn = std::make_unique<::arm_compute::NEEmbeddingLookup>();
+ _return_fn = asAclFunction(std::move(fn));
+}
- fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
- auto acl_fn = asAclFunction(std::move(fn));
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- _return_fn = std::move(acl_fn);
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().op_type)
+ {
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+ {
+ fn = acl_common::generateLayer<arm_compute::NELogicalAnd>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+ {
+ fn = acl_common::generateLayer<arm_compute::NELogicalOr>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEElementwiseMax>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEElementwiseMin>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ default:
+ {
+ std::string err_msg("acl_neon KernelGenerator : " + node.name() +
+ "is not elementwise-binary operations");
+ assert(false && err_msg.c_str());
+ break;
+ }
+ }
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Floor &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
+
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().op_type)
+ {
+ case ir::operation::ElementwiseUnary::Type::ABS:
+ {
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
+ input_tensor->handle(), output_tensor->handle(), act_info);
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::CAST:
+ {
+ if (input_tensor->data_type() == output_tensor->data_type())
+ {
+ fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensor->handle(),
+ output_tensor->handle());
+ }
+ else
+ {
+ fn = acl_common::generateLayer<arm_compute::NECast>(
+ input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+ }
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEDequantizationLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::EXP:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEExpLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::FLOOR:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEFloor>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEBitwiseNot>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::NEG:
+ {
+ fn = acl_common::generateLayer<arm_compute::NENegLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::RSQRT:
+ {
+ fn = acl_common::generateLayer<arm_compute::NERsqrtLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::SQRT:
+ {
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
- auto fn = std::make_unique<::arm_compute::NEFloor>();
+ fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
+ input_tensor->handle(), output_tensor->handle(), act_info);
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("acl_neon KernelGenerator : " + node.name() +
+ "is not supported yet");
+ break;
+ }
+ }
+ _return_fn = asAclFunction(std::move(fn));
+}
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
+void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
+ const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
- auto acl_fn = asAclFunction(std::move(fn));
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+ auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
- _return_fn = std::move(acl_fn);
+ auto fn = acl_common::generateLayer<arm_compute::NEEmbeddingLookup>(
+ values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::FullyConnected &node)
{
const auto output_index{node.getOutputs().at(0)};
- auto output_tensor = _tensor_builder->at(output_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
const auto activation = node.param().activation;
auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
::arm_compute::NEFullyConnectedReshapingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout);
+ node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
_return_fn = std::make_unique<exec::FunctionSequence>(
std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
}
const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto hits_tensor = _tensor_builder->at(hits_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get();
- auto lookups_tensor = _tensor_builder->at(lookups_index).get();
- auto keys_tensor = _tensor_builder->at(keys_index).get();
- auto values_tensor = _tensor_builder->at(values_index).get();
+ auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+ auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get();
+ auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
- auto fn = std::make_unique<::arm_compute::NEHashtableLookup>();
+ auto fn = acl_common::generateLayer<arm_compute::NEHashtableLookup>(
+ lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+ output_tensor->handle(), hits_tensor->handle());
- fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
- output_tensor->handle(), hits_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Gather &node)
// Converting in reverse order
const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto indices_tensor = _tensor_builder->at(indices_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get();
const auto backend_layout = ofm_tensor->layout();
UNUSED_RELEASE(backend_layout);
assert(backend_layout == indices_tensor->layout());
assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
- auto fn = std::make_unique<::arm_compute::NEGatherEx>();
-
// input is n-D, indices k-D, output is (n + k - 1)-D
size_t n = ifm_rank;
assert(n == ifm_tensor->num_dimensions());
acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
}
- fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
+ auto fn = acl_common::generateLayer<arm_compute::NEGatherEx>(
+ ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
// acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
// use arm_compute::TensorInfo::offset_element_in_bytes()
// It would create an error when the kernel accesses high dimension that its value is 1
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto gamma_tensor = _tensor_builder->at(gamma_index).get();
- auto beta_tensor = _tensor_builder->at(beta_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get();
+ auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get();
auto epsilon = node.param().epsilon;
auto activation = node.param().activation;
- auto fn = std::make_unique<::arm_compute::NEInstanceNormalizationLayerEx>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
- beta_tensor->handle(), epsilon);
+ auto fn = acl_common::generateLayer<arm_compute::NEInstanceNormalizationLayerEx>(
+ ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
+ epsilon);
_return_fn = std::make_unique<exec::FunctionSequence>(
asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
float bias = 0.0f; // Don't offset the reduction.
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
radius, alpha, beta, bias, false);
- auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
-
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
auto beta = node.param().beta;
auto bias = node.param().bias;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(
::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
- auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::NELogicalAnd>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEBitwiseNot>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::NELogicalOr>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Logistic &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
-
- // NOTE NEActivationLayer can generate produce erroneous results. it were caused by 'vexpq_f32()'.
- // The neon function returns a value outside of the limit of representation in float as 'NaN'
- // instead of 'INF', and then the result of this op will be errors due to the 'NaN'.
- auto fn = std::make_unique<::arm_compute::NEActivationLayerEx>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::LSTM &node)
{
_return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ITensor,
- ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_builder);
-}
-
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEPixelWiseMultiplication>();
-
- // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
- arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
-
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::NENegLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_reg);
}
void KernelGenerator::visit(const ir::operation::Pack &node)
for (const auto &input_index : node.getInputs())
input_indexes.emplace_back(input_index);
- auto output = _tensor_builder->at(output_index).get()->handle();
+ auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
std::vector<arm_compute::ITensor *> inputs;
for (const auto &input_index : input_indexes)
- inputs.emplace_back(_tensor_builder->at(input_index)->handle());
+ inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(output_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout();
if (axis < 0)
axis += output_rank;
axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::NEStackLayer>();
-
// Disable applied dim_correction
for (const auto &input_index : input_indexes)
{
size_t input_rank = _ctx.at(input_index).shape().rank();
- const auto &input_tensor = _tensor_builder->at(input_index);
+ const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
assert(input_rank == input_tensor->num_dimensions());
if (input_rank != input_tensor->info()->num_dimensions())
{
}
}
- fn->configure(inputs, axis, output);
+ auto fn = acl_common::generateLayer<arm_compute::NEStackLayer>(inputs, axis, output);
// acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
// use arm_compute::TensorInfo::offset_element_in_bytes()
auto rank = _ctx.at(input_index).shape().rank();
auto pad_base = _ctx.at(pad_index).data()->base();
- auto input = _tensor_builder->at(input_index).get()->handle();
- auto output = _tensor_builder->at(output_index).get()->handle();
+ auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
+ auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
::arm_compute::PaddingList padding_list;
padding_list.resize(rank);
const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
const auto axis =
acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
const auto pixel_value =
::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
- auto fn = std::make_unique<::arm_compute::NEPadLayer>();
- fn->configure(input, output, padding_list, pixel_value);
+ auto fn =
+ acl_common::generateLayer<arm_compute::NEPadLayer>(input, output, padding_list, pixel_value);
_return_fn = asAclFunction(std::move(fn));
}
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
+{
+ auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
+ node, _ctx, _tensor_reg, _current_op_seq_layout,
+ acl_common::convertPoolType(node.param().op_type));
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ const auto activation = node.param().activation;
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
+}
+
void KernelGenerator::visit(const ir::operation::Permute &node)
{
const auto ofm_idx{node.getOutputs().at(0)};
const auto ifm_idx{node.getInputs().at(0)};
const auto permute_type = node.getPermuteType();
- auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
- auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
const auto rank = _ctx.at(ofm_idx).shape().rank();
assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
// WHCN -> CWHN
pv = arm_compute::PermutationVector{2, 0, 1};
- auto l = std::make_unique<::arm_compute::NEPermute>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), pv);
}
else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4)
{
// CWHN -> WHCN
pv = arm_compute::PermutationVector{1, 2, 0};
- auto l = std::make_unique<::arm_compute::NEPermute>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), pv);
}
else
{
- auto l = std::make_unique<::arm_compute::NECopy>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NECopy>(ifm_tensor->handle(), ofm_tensor->handle());
}
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::PReLU &node)
const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto alpha_tensor = _tensor_builder->at(alpha_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get();
- std::unique_ptr<::arm_compute::IFunction> fn;
-
- auto l = std::make_unique<::arm_compute::NEPReluLayer>();
-
- l->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
-
- fn = std::move(l);
+ auto fn = acl_common::generateLayer<arm_compute::NEPReluLayer>(
+ ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Reduce &node)
const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
// Convert to ACL axes taking into account negative values and possible duplicates.
const auto &axes = _ctx.at(axes_index);
std::unique_ptr<::arm_compute::IFunction> fn;
if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
{
- auto l = std::make_unique<::arm_compute::NEReduceMean>();
-
- l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEReduceMean>(input_tensor->handle(), reduce_axes,
+ keep_dims, output_tensor->handle());
}
else if (reduce_type == ir::operation::Reduce::ReduceType::SUM)
{
- auto l = std::make_unique<::arm_compute::NEReduceSum>();
-
- l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEReduceSum>(input_tensor->handle(), reduce_axes,
+ keep_dims, output_tensor->handle());
}
else
{
- auto l = std::make_unique<::arm_compute::NEReduceOperation>();
-
- l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
- acl_common::convertReduceType(reduce_type));
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEReduceOperation>(
+ input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
+ acl_common::convertReduceType(reduce_type));
}
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<arm_compute::NEActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU1 &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
-
- auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
-
- auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Reshape &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
// NOTE This operation must not be changed the layout from frontend to backend
// So, PermutationOperationPass makes layouts of frontend and backend the same.
UNUSED_RELEASE(frontend_layout);
UNUSED_RELEASE(backend_layout);
- auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEScale>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
- ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
- ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
+ auto fn = acl_common::generateLayer<arm_compute::NEScale>(
+ ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
+ ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f),
+ ::arm_compute::SamplingPolicy::TOP_LEFT);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::RNN &node)
const auto activation = node.param().activation;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
- auto weights_tensor = _tensor_builder->at(weights_index).get();
- auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
- auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get();
+ auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+ auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get();
auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
- auto copy_layer = std::make_unique<::arm_compute::NECopy>();
- copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
+ auto copy_layer = acl_common::generateLayer<arm_compute::NECopy>(
+ hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
_return_fn = asAclFunction(std::move(copy_layer));
- auto fn = std::make_unique<::arm_compute::NERNNLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(input_tensor->handle(), weights_tensor->handle(),
- recurrent_weights_tensor->handle(), bias_tensor->handle(),
- hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
- _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::NERsqrtLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
+ auto fn = acl_common::generateLayer<arm_compute::NERNNLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
+ hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
_return_fn = asAclFunction(std::move(fn));
}
(void)dims;
(void)ndim;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
- auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
- fn->configure(input_tensor->handle(), output_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Tanh &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<arm_compute::NEActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Softmax &node)
const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
const auto beta = node.param().beta;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = input_tensor->layout();
acl_common::asTensorShape(input.shape(), frontend_layout, backend_layout, false));
}
- auto fn = std::make_unique<::arm_compute::NESoftmaxLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
+ auto fn = acl_common::generateLayer<arm_compute::NESoftmaxLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ output_tensor->handle(), beta);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto block_size_tensor = _tensor_builder->at(block_size_index).get();
- auto paddings_tensor = _tensor_builder->at(paddings_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
+ auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get();
assert(_ctx.at(block_size_index).data());
assert(_ctx.at(paddings_index).data());
- auto fn = std::make_unique<::arm_compute::NESpaceToBatchLayer>();
-
- fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
- ofm_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::NESpaceToBatchLayer>(
+ ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+ ofm_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
auto block_size = node.param().block_size;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::NESpaceToDepthLayer>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
+ auto fn = acl_common::generateLayer<arm_compute::NESpaceToDepthLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), block_size);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Split &node)
for (const auto &output : node.getOutputs())
output_indexes.emplace_back(output);
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
std::vector<arm_compute::ITensor *> output_tensors;
for (const auto &ofm_ind : output_indexes)
- output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+ output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle());
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = ifm_tensor->layout();
axis += ifm_rank;
axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::NESplit>();
-
- fn->configure(ifm_tensor->handle(), output_tensors, axis);
+ auto fn =
+ acl_common::generateLayer<arm_compute::NESplit>(ifm_tensor->handle(), output_tensors, axis);
_return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::SQRT &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
-
- auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
{
const auto ofm_index{node.getOutputs().at(0)};
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEElementwiseSquaredDiff>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::NEElementwiseSquaredDiff>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Sub &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEArithmeticSubtraction>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
- arm_compute::ConvertPolicy::SATURATE);
-
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Slice &node)
const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
- auto outputData_tensor = _tensor_builder->at(output_index).get();
- auto inputData_tensor = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = inputData_tensor->layout();
ends_set.set(i, ends[i]);
}
- auto fn = std::make_unique<::arm_compute::NESlice>();
-
- fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
+ auto fn = acl_common::generateLayer<arm_compute::NESlice>(
+ inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::StridedSlice &node)
const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- auto outputData_tensor = _tensor_builder->at(output_index).get();
- auto inputData_tensor = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = inputData_tensor->layout();
strides_set.set(i, strides[i]);
}
- auto fn = std::make_unique<::arm_compute::NEStridedSlice>();
+ auto fn = acl_common::generateLayer<arm_compute::NEStridedSlice>(
+ inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
+ begin_mask, end_mask, shrink_axis_mask);
- fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
- strides_set, begin_mask, end_mask, shrink_axis_mask);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::TransposeConv &node)
invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
}
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
- auto fn = std::make_unique<::arm_compute::NETransposeConvLayer>();
+ auto fn = acl_common::generateLayer<arm_compute::NETransposeConvLayer>(
+ ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info,
+ invalid_horizontal, invalid_vertical);
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
- tconv_info, invalid_horizontal, invalid_vertical);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Transpose &node)
const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
const auto &perm{node.param().perm};
- auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
- const auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+ const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = ifm_tensor->layout();
rank, pv, frontend_layout, backend_layout);
std::unique_ptr<::arm_compute::IFunction> fn;
-
if (ifm_tensor->num_dimensions() <= 2 && ofm_tensor->num_dimensions() <= 2)
{
- auto l = std::make_unique<::arm_compute::NETranspose>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NETranspose>(ifm_tensor->handle(),
+ ofm_tensor->handle());
}
else
{
- auto l = std::make_unique<::arm_compute::NEPermute>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), backend_pv);
}
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Unpack &node)
for (const auto &output_index : node.getOutputs())
output_indexes.emplace_back(output_index);
- auto input = _tensor_builder->at(input_index).get()->handle();
+ auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
std::vector<arm_compute::ITensor *> outputs;
for (const auto &output_index : output_indexes)
- outputs.emplace_back(_tensor_builder->at(output_index)->handle());
+ outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
if (axis < 0)
axis += input_rank;
axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::NEUnstack>();
-
// Disable applied dim_correction
std::vector<arm_compute::TensorShape> orig_outputs_acl_tensor_shapes;
for (const auto &output_index : output_indexes)
{
size_t output_rank = _ctx.at(output_index).shape().rank();
- const auto &output_tensor = _tensor_builder->at(output_index);
+ const auto &output_tensor = _tensor_reg->getAclTensor(output_index);
orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
assert(output_rank == output_tensor->num_dimensions());
if (output_rank != output_tensor->info()->num_dimensions())
}
}
- fn->configure(input, outputs, axis);
+ auto fn = acl_common::generateLayer<arm_compute::NEUnstack>(input, outputs, axis);
_return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Add &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEArithmeticAddition>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
- arm_compute::ConvertPolicy::SATURATE);
-
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Div &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEElementwiseDivision>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Exp &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEExpLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
void KernelGenerator::visit(const ir::operation::ExpandDims &node)
{
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto fn = std::make_unique<::arm_compute::NEReshapeLayer>();
+ auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Comparison &node)
const auto comparison_type = node.param().comparison_type;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEElementwiseComparison>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
- (arm_compute::ComparisonOperation)comparison_type);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEElementwiseMin>();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get();
+ auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get();
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::NEElementwiseComparison>(
+ input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
+ (arm_compute::ComparisonOperation)comparison_type);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEElementwiseMax>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::OneHot &node)
const auto offvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
const auto axis = node.param().axis;
- auto output_tensor = _tensor_builder->at(out_idx).get();
- auto indices_tensor = _tensor_builder->at(indices_idx).get();
- auto depth_tensor = _tensor_builder->at(depth_idx).get();
- auto onvalue_tensor = _tensor_builder->at(onvalue_idx).get();
- auto offvalue_tensor = _tensor_builder->at(offvalue_idx).get();
-
- auto fn = std::make_unique<::arm_compute::CPPOneHotEx>();
- fn->configure(indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
- offvalue_tensor->handle(), output_tensor->handle(), axis);
- auto acl_fn = asAclFunction(std::move(fn));
- _return_fn = std::move(acl_fn);
+ auto output_tensor = _tensor_reg->getAclTensor(out_idx).get();
+ auto indices_tensor = _tensor_reg->getAclTensor(indices_idx).get();
+ auto depth_tensor = _tensor_reg->getAclTensor(depth_idx).get();
+ auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx).get();
+ auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx).get();
+
+ auto fn = acl_common::generateLayer<arm_compute::CPPOneHotEx>(
+ indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
+ offvalue_tensor->handle(), output_tensor->handle(), axis);
+ _return_fn = asAclFunction(std::move(fn));
}
} // namespace acl_neon
#include "ir/Operands.h"
#include "TensorBuilder.h"
+#include "AclTensorRegistry.h"
+#include "TensorManager.h"
namespace onert
{
{
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
void visit(const ir::OpSequence &) override;
- void visit(const ir::operation::Abs &) override;
void visit(const ir::operation::ArgMax &) override;
void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::Cast &) override;
+ void visit(const ir::operation::BinaryArithmetic &) override;
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::Dequantize &) override;
- void visit(const ir::operation::MaxPool2D &) override;
- void visit(const ir::operation::AvgPool2D &) override;
void visit(const ir::operation::Concat &) override;
+ void visit(const ir::operation::ElementwiseActivation &) override;
+ void visit(const ir::operation::ElementwiseBinary &) override;
+ void visit(const ir::operation::ElementwiseUnary &) override;
void visit(const ir::operation::EmbeddingLookup &) override;
- void visit(const ir::operation::Floor &) override;
void visit(const ir::operation::FullyConnected &) override;
void visit(const ir::operation::Gather &) override;
void visit(const ir::operation::HashtableLookup &) override;
void visit(const ir::operation::InstanceNorm &) override;
void visit(const ir::operation::L2Normalization &) override;
- void visit(const ir::operation::L2Pool2D &) override;
void visit(const ir::operation::LocalResponseNormalization &) override;
- void visit(const ir::operation::LogicalAnd &) override;
- void visit(const ir::operation::LogicalNot &) override;
- void visit(const ir::operation::LogicalOr &) override;
- void visit(const ir::operation::Logistic &) override;
void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::Mul &) override;
- void visit(const ir::operation::Neg &) override;
void visit(const ir::operation::Pack &) override;
void visit(const ir::operation::Pad &) override;
+ void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Permute &) override;
void visit(const ir::operation::PReLU &) override;
void visit(const ir::operation::Reduce &) override;
- void visit(const ir::operation::ReLU &) override;
- void visit(const ir::operation::ReLU1 &) override;
- void visit(const ir::operation::ReLU6 &) override;
void visit(const ir::operation::Reshape &) override;
void visit(const ir::operation::ResizeBilinear &) override;
void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::RSQRT &) override;
void visit(const ir::operation::Squeeze &) override;
- void visit(const ir::operation::Tanh &) override;
void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
void visit(const ir::operation::SpaceToDepth &) override;
void visit(const ir::operation::Split &) override;
- void visit(const ir::operation::SQRT &) override;
void visit(const ir::operation::SquaredDifference &) override;
- void visit(const ir::operation::Sub &) override;
void visit(const ir::operation::Slice &) override;
void visit(const ir::operation::StridedSlice &) override;
void visit(const ir::operation::TransposeConv &) override;
void visit(const ir::operation::Transpose &) override;
void visit(const ir::operation::Unpack &) override;
- void visit(const ir::operation::Add &) override;
- void visit(const ir::operation::Div &) override;
- void visit(const ir::operation::Exp &) override;
void visit(const ir::operation::ExpandDims &) override;
void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::Min &) override;
- void visit(const ir::operation::Max &) override;
void visit(const ir::operation::OneHot &) override;
private:
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
ir::Layout _current_op_seq_layout;
};
#include "ParentInfo.h"
#include <cassert>
-#include <ir/LoweredGraph.h>
+#include <compiler/LoweredGraph.h>
#include <util/logging.h>
#include "AclSubTensorAnalyzer.h"
using TensorManager = acl_common::AclTensorManager<acl_neon::operand::INETensor, operand::NETensor,
operand::NESubTensor>;
-TensorManager *createTensorManager(bool is_linear_executor)
+inline TensorManager *createTensorManager(bool is_linear_executor)
{
if (is_linear_executor)
{
const auto &operands = graph.operands();
const auto &operations = graph.operations();
auto context = std::make_unique<BackendContext>(this, &graph);
- auto tb = std::make_shared<TensorBuilder>();
+ auto tr = std::make_shared<cpu_common::TensorRegistry>();
+ auto tb = std::make_shared<TensorBuilder>(tr);
+ context->tensor_registry = tr;
context->tensor_builder = tb;
- context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
- context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, kb,
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
context->external_context());
context->tensor_register = nullptr;
context->optimizer = nullptr;
{
public:
BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
std::shared_ptr<ITensorRegister> tensor_register = nullptr,
std::shared_ptr<IOptimizer> optimizer = nullptr)
- : onert::backend::BackendContext(backend, graph, tensor_builder, constant_initializer,
- kernel_gen, tensor_register, optimizer),
+ : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder,
+ constant_initializer, kernel_gen, tensor_register,
+ optimizer),
_external_context(new ExternalContext)
{
}
{
ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+ const std::shared_ptr<ITensorRegistry> &tensor_reg)
+ : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
{
// DO NOTHING
}
#ifndef __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
-#include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
#include <backend/IConstantInitializer.h>
#include <ir/Operands.h>
{
public:
ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
+ const std::shared_ptr<ITensorRegistry> &tensor_reg);
public:
void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
void visit(const ir::operation::FullyConnected &) override;
private:
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
+ std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
private:
- std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<ITensorRegistry> _tensor_reg;
};
} // namespace cpu
#include "KernelGenerator.h"
-#include "ops/AbsLayer.h"
-#include "ops/AddLayer.h"
#include "ops/ArgMinMaxLayer.h"
-#include "ops/AvgPoolLayer.h"
#include "ops/BatchToSpaceNDLayer.h"
-#include "ops/CastLayer.h"
+#include "ops/BinaryArithmeticLayer.h"
#include "ops/CompareLayer.h"
#include "ops/ConcatLayer.h"
#include "ops/ConvolutionLayer.h"
-#include "ops/CosLayer.h"
#include "ops/DepthwiseConvolutionLayer.h"
-#include "ops/DivLayer.h"
#include "ops/EinsumLayer.h"
-#include "ops/ExpLayer.h"
+#include "ops/ElementwiseActivationLayer.h"
+#include "ops/ElementwiseBinaryLayer.h"
+#include "ops/ElementwiseUnaryLayer.h"
#include "ops/ExpandDimsLayer.h"
#include "ops/FillLayer.h"
#include "ops/FullyConnectedLayer.h"
#include "ops/GatherLayer.h"
-#include "ops/LogLayer.h"
-#include "ops/LogisticLayer.h"
-#include "ops/MaxLayer.h"
-#include "ops/MaxPoolLayer.h"
#include "ops/MeanLayer.h"
-#include "ops/MinLayer.h"
-#include "ops/MulLayer.h"
-#include "ops/NegLayer.h"
#include "ops/OneHotLayer.h"
#include "ops/OperationUtils.h"
#include "ops/PackLayer.h"
#include "ops/PadLayer.h"
+#include "ops/PoolLayer.h"
#include "ops/PowLayer.h"
#include "ops/RangeLayer.h"
+#include "ops/RankLayer.h"
#include "ops/ReduceLayer.h"
-#include "ops/ReLULayer.h"
-#include "ops/ReLU6Layer.h"
#include "ops/ReshapeLayer.h"
#include "ops/ResizeBilinearLayer.h"
#include "ops/ReverseLayer.h"
-#include "ops/RoundLayer.h"
-#include "ops/RsqrtLayer.h"
#include "ops/SelectLayer.h"
#include "ops/ShapeLayer.h"
-#include "ops/SinLayer.h"
#include "ops/SliceLayer.h"
#include "ops/SoftMaxLayer.h"
#include "ops/StridedSliceLayer.h"
#include "ops/SpaceToDepthLayer.h"
#include "ops/SplitLayer.h"
#include "ops/SplitVLayer.h"
-#include "ops/SubLayer.h"
-#include "ops/TanhLayer.h"
#include "ops/TileLayer.h"
#include "ops/TransposeLayer.h"
#include "ops/UnpackLayer.h"
-#include "ops/LogicalNotLayer.h"
-#include "ops/ZerosLikeLayer.h"
#include "ops/SquaredDiffLayer.h"
-#include "ops/LogicalOrLayer.h"
#include "ops/L2NormLayer.h"
#include "ops/MatrixBandPartLayer.h"
#include "ops/BatchMatMulLayer.h"
#include "ops/BroadcastToLayer.h"
#include "ops/FusedBatchNormLayer.h"
#include "ops/LogSoftMaxLayer.h"
-#include "ops/QuantizeLayer.h"
#include "ops/StatelessRandomUniformLayer.h"
#include <backend/Backend.h>
namespace
{
+ops::ArithmeticType
+convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
+{
+ switch (arithmetic_type_ir)
+ {
+ case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+ return ops::ArithmeticType::kAdd;
+ case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+ return ops::ArithmeticType::kSub;
+ case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+ return ops::ArithmeticType::kMul;
+ case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+ return ops::ArithmeticType::kDiv;
+ default:
+ throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+ }
+}
+
+ops::ElementwiseActivationType
+convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
+{
+ switch (type_ir)
+ {
+ case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+ return ops::ElementwiseActivationType::kLogistic;
+ case ir::operation::ElementwiseActivation::Type::RELU:
+ return ops::ElementwiseActivationType::kReLU;
+ case ir::operation::ElementwiseActivation::Type::TANH:
+ return ops::ElementwiseActivationType::kTanh;
+ default:
+ throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+ }
+}
+
+ops::ElementwiseBinaryType
+convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
+{
+ switch (type_ir)
+ {
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+ return ops::ElementwiseBinaryType::kLogicalOr;
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+ return ops::ElementwiseBinaryType::kMax;
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+ return ops::ElementwiseBinaryType::kMin;
+ default:
+ throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+ }
+}
+
+ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
+{
+ switch (type_ir)
+ {
+ case ir::operation::ElementwiseUnary::Type::ABS:
+ return ops::ElementwiseUnaryType::kAbs;
+ case ir::operation::ElementwiseUnary::Type::CAST:
+ return ops::ElementwiseUnaryType::kCast;
+ case ir::operation::ElementwiseUnary::Type::COS:
+ return ops::ElementwiseUnaryType::kCos;
+ case ir::operation::ElementwiseUnary::Type::ERF:
+ return ops::ElementwiseUnaryType::kErf;
+ case ir::operation::ElementwiseUnary::Type::EXP:
+ return ops::ElementwiseUnaryType::kExp;
+ case ir::operation::ElementwiseUnary::Type::LOG:
+ return ops::ElementwiseUnaryType::kLog;
+ case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+ return ops::ElementwiseUnaryType::kLogicalNot;
+ case ir::operation::ElementwiseUnary::Type::NEG:
+ return ops::ElementwiseUnaryType::kNeg;
+ case ir::operation::ElementwiseUnary::Type::QUANTIZE:
+ return ops::ElementwiseUnaryType::kQuantize;
+ case ir::operation::ElementwiseUnary::Type::ROUND:
+ return ops::ElementwiseUnaryType::kRound;
+ case ir::operation::ElementwiseUnary::Type::RSQRT:
+ return ops::ElementwiseUnaryType::kRSqrt;
+ case ir::operation::ElementwiseUnary::Type::SIN:
+ return ops::ElementwiseUnaryType::kSin;
+ case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
+ return ops::ElementwiseUnaryType::kZerosLike;
+ default:
+ throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+ }
+}
+
+ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
+{
+ switch (type_ir)
+ {
+ case ir::operation::Pool2D::PoolType::AVG:
+ return ops::PoolType::kAvg;
+ case ir::operation::Pool2D::PoolType::MAX:
+ return ops::PoolType::kMax;
+ default:
+ throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+ }
+}
+
ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
{
switch (reduce_type_ir)
KernelGenerator::KernelGenerator(
const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
const std::shared_ptr<ExternalContext> &external_context)
: _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
- _kernel_builder(kernel_builder), _current_op_seq_layout(ir::Layout::UNKNOWN),
- _external_context(external_context)
+ _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+ _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context)
{
// DO NOTHING
}
{
assert(!_return_fn_seq);
assert(_tensor_builder->dynamicTensorManager());
- assert(_tensor_builder->tensorRegistry());
+ assert(_tensor_reg);
- auto dyn_tensor_manager = _tensor_builder->dynamicTensorManager();
- auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(
- _ctx, dyn_tensor_manager, _tensor_builder->tensorRegistry());
+ auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
_return_fn_seq = std::make_unique<exec::FunctionSequence>();
dyn_ctx->op_seq = &op_seq;
dyn_ctx->operations = &_operations_ctx;
dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
- dyn_ctx->tensor_registry = _tensor_builder->tensorRegistry();
+ dyn_ctx->tensor_registry = _tensor_reg;
dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
_return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
{
- auto portable_tensor = _tensor_builder->portableAt(ind);
+ auto portable_tensor = _tensor_reg->getPortableTensor(ind);
if (portable_tensor)
{
assert(portable_tensor->layout() == ir::Layout::NHWC);
}
- auto tensor = _tensor_builder->at(ind);
+ auto tensor = _tensor_reg->getNativeTensor(ind);
if (tensor)
{
tensor->increase_ref();
const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
- auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
- auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
const auto stride = node.param().stride;
const auto activation = node.param().activation;
const auto param_padding = node.param().padding;
+ const auto dilation = node.param().dilation;
auto fn = std::make_unique<ops::ConvolutionLayer>();
if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
{
fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
- stride.horizontal, stride.vertical, activation, ofm_tensor);
+ stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
+ activation, ofm_tensor);
_return_fn = std::move(fn);
return;
const auto ker_width = ker_shape.dim(2);
const auto padding =
- ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ dilation.width_factor, dilation.height_factor);
fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
- activation, ofm_tensor);
+ dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
_return_fn = std::move(fn);
}
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
- auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
- auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
-
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
-
- const auto stride = node.param().stride;
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::MaxPoolLayer>();
-
- fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
- stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
- const auto stride = node.param().stride;
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::AvgPoolLayer>();
-
- fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
- stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::Concat &node)
{
const auto ofm_index{node.getOutputs().at(0)};
const auto rank = _ctx.at(ofm_index).shape().rank();
const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
- auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
auto fn = std::make_unique<ops::ConcatLayer>();
const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto block_size_alloc = _tensor_builder->portableAt(block_size_index).get();
+ auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
+ auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index).get();
auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
if (node.getInputs().size() != NNApiInputs)
{
const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
- crops_alloc = _tensor_builder->portableAt(crops_data_index).get();
+ crops_alloc = _tensor_reg->getPortableTensor(crops_data_index).get();
}
fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto value_tensor = _tensor_builder->portableAt(value_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto value_tensor = _tensor_reg->getPortableTensor(value_index).get();
auto fn = std::make_unique<ops::FillLayer>();
const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
const auto activation = node.param().activation;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto weight_tensor = _tensor_builder->portableAt(weight_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto weight_tensor = _tensor_reg->getPortableTensor(weight_index).get();
auto bias_tensor =
- bias_index.undefined() ? nullptr : _tensor_builder->portableAt(bias_index).get();
+ bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index).get();
auto fn = std::make_unique<ops::FullyConnectedLayer>();
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
// optional 2nd input
IPortableTensor *shape_tensor = nullptr;
if (node.getInputs().size() == 2)
{
const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
- shape_tensor = _tensor_builder->portableAt(shape_index).get();
+ shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
}
auto fn = std::make_unique<ops::ReshapeLayer>();
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
// Squeeze can share same kernel with reshape
auto fn = std::make_unique<ops::ReshapeLayer>();
const auto beta = node.param().beta;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::SoftMaxLayer>();
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Add &node)
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
+ const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
- auto fn = std::make_unique<ops::AddLayer>();
+ auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
- fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
+ fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
+ convertArithmeticType(node.param().arithmetic_type));
_return_fn = std::move(fn);
}
const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
auto comparison_type = node.param().comparison_type;
const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
const auto backend_layout = output_tensor->layout();
UNUSED_RELEASE(backend_layout);
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Sub &node)
-{
- // The same as Add
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::SubLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
- // The same as Add
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::MulLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::OneHot &node)
{
const auto output_index{node.getOutputs().at(0)};
const auto axis = node.param().axis;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
- auto depth_tensor = _tensor_builder->portableAt(depth_index).get();
- auto onvalue_tensor = _tensor_builder->portableAt(onvalue_index).get();
- auto offvalue_tensor = _tensor_builder->portableAt(offvalue_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
+ auto depth_tensor = _tensor_reg->getPortableTensor(depth_index).get();
+ auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index).get();
+ auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index).get();
assert(indices_tensor->data_type() == OperandType::INT32);
assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Div &node)
-{
- // The same as Add
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::DivLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::Einsum &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
const auto equation = node.param().equation;
const auto &operand = _ctx.at(idx);
// TODO make sure using `_current_op_seq_layout` is correct for custom operations
types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
- auto in_tensor = _tensor_builder->portableAt(idx);
+ auto in_tensor = _tensor_reg->getPortableTensor(idx);
tensors.emplace_back(in_tensor);
}
};
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Exp &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
- auto fn = std::make_unique<ops::ExpLayer>();
+ auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
- fn->configure(input_tensor, output_tensor);
+ fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
+ convertElementwiseActivationType(node.param().op_type));
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::ExpandDims &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
+ const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
- auto fn = std::make_unique<ops::ExpandDimsLayer>();
+ auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
- fn->configure(input_tensor, axis_tensor, output_tensor);
+ fn->configure(lhs_tensor, rhs_tensor, output_tensor,
+ convertElementwiseBinaryType(node.param().op_type));
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Logistic &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
- auto fn = std::make_unique<ops::LogisticLayer>();
+ auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
- fn->configure(input_tensor, output_tensor);
+ fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Tanh &node)
+void KernelGenerator::visit(const ir::operation::ExpandDims &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
+ const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
- auto fn = std::make_unique<ops::TanhLayer>();
+ auto fn = std::make_unique<ops::ExpandDimsLayer>();
- fn->configure(input_tensor, output_tensor);
+ fn->configure(input_tensor, axis_tensor, output_tensor);
_return_fn = std::move(fn);
}
assert(-rank <= axis && axis < rank);
- auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
auto fn = std::make_unique<ops::PackLayer>();
assert(rank == 0 || (-rank <= axis && axis < rank));
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
std::vector<IPortableTensor *> output_tensors;
for (auto &output_idx : node.getOutputs())
- output_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+ output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
auto fn = std::make_unique<ops::UnpackLayer>();
const auto output_index{node.getOutputs().at(0)};
assert(_ctx.at(pad_index).data());
- auto input = _tensor_builder->portableAt(input_index).get();
- auto output = _tensor_builder->portableAt(output_index).get();
+ auto input = _tensor_reg->getPortableTensor(input_index).get();
+ auto output = _tensor_reg->getPortableTensor(output_index).get();
auto pad_rank = _ctx.at(pad_index).shape().dim(0);
auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::MaxLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::MinLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Cast &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::CastLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::Transpose &node)
{
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::TransposeLayer>();
const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
const auto keep_dims = node.param().keep_dims;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto axes_tensor = _tensor_builder->portableAt(axes_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto axes_tensor = _tensor_reg->getPortableTensor(axes_index).get();
if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
{
}
}
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(0)};
-
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
-
- auto fn = std::make_unique<ops::ReLULayer>();
-
- fn->configure(input_tensor, output_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(0)};
-
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
-
- auto fn = std::make_unique<ops::ReLU6Layer>();
-
- fn->configure(input_tensor, output_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::Select &node)
{
const auto output_index{node.getOutputs().at(0)};
const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto condition_tensor = _tensor_builder->portableAt(condition_index).get();
- auto true_tensor = _tensor_builder->portableAt(true_index).get();
- auto false_tensor = _tensor_builder->portableAt(false_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto condition_tensor = _tensor_reg->getPortableTensor(condition_index).get();
+ auto true_tensor = _tensor_reg->getPortableTensor(true_index).get();
+ auto false_tensor = _tensor_reg->getPortableTensor(false_index).get();
auto fn = std::make_unique<ops::SelectLayer>();
const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto begins_tensor = _tensor_builder->portableAt(begins_index).get();
- auto sizes_tensor = _tensor_builder->portableAt(sizes_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto begins_tensor = _tensor_reg->getPortableTensor(begins_index).get();
+ auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index).get();
auto fn = std::make_unique<ops::SliceLayer>();
const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto starts_tensor = _tensor_builder->portableAt(starts_index).get();
- auto ends_tensor = _tensor_builder->portableAt(ends_index).get();
- auto strides_tensor = _tensor_builder->portableAt(strides_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto starts_tensor = _tensor_reg->getPortableTensor(starts_index).get();
+ auto ends_tensor = _tensor_reg->getPortableTensor(ends_index).get();
+ auto strides_tensor = _tensor_reg->getPortableTensor(strides_index).get();
auto begin_mask = node.param().begin_mask;
auto end_mask = node.param().end_mask;
const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
auto axis_resolved = axis < 0 ? axis + rank : axis;
- auto in_tensor = _tensor_builder->portableAt(input_idx).get();
+ auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
std::vector<IPortableTensor *> out_tensors;
for (auto &output_idx : node.getOutputs())
- out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+ out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
auto fn = std::make_unique<ops::SplitLayer>();
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::AbsLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Sin &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Sin::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::SinLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Cos &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Cos::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::CosLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::RsqrtLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::Shape &node)
{
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
auto fn = std::make_unique<ops::ShapeLayer>();
auto align_corners = node.param().align_corners;
auto half_pixel_centers = node.param().half_pixel_centers;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::ResizeBilinearLayer>();
const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
auto fn = std::make_unique<ops::ReverseLayer>();
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::NegLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::ArgMax &node)
{
const auto output_index{node.getOutputs().at(0)};
const auto axis = node.param().axis;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::ArgMinMaxLayer>();
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Pow &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
-
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::PowLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Log &node)
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Log::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::LogLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
+ const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
-void KernelGenerator::visit(const ir::operation::Round &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Round::INPUT)};
+ const auto kh = node.param().kh;
+ const auto kw = node.param().kw;
+ const auto stride = node.param().stride;
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto padding =
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto activation = node.param().activation;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
- auto fn = std::make_unique<ops::RoundLayer>();
+ auto fn = std::make_unique<ops::PoolLayer>();
- fn->configure(input_tensor, output_tensor);
+ fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
+ stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
+ convertPoolType(node.param().op_type));
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
+void KernelGenerator::visit(const ir::operation::Pow &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::LogicalNot::INPUT)};
-
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
-
- auto fn = std::make_unique<ops::LogicalNotLayer>();
-
- fn->configure(input_tensor, output_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(0)};
- const auto rhs_index{node.getInputs().at(1)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
- auto fn = std::make_unique<ops::LogicalOrLayer>();
+ auto fn = std::make_unique<ops::PowLayer>();
- fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
+ fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
_return_fn = std::move(fn);
}
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(0)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::L2NormLayer>();
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::ZerosLike &node)
+void KernelGenerator::visit(const ir::operation::Range &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ZerosLike::INPUT)};
+ const auto start_index{node.getInputs().at(ir::operation::Range::START)};
+ const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
+ const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto start_tensor = _tensor_reg->getPortableTensor(start_index).get();
+ auto limit_tensor = _tensor_reg->getPortableTensor(limit_index).get();
+ auto delta_tensor = _tensor_reg->getPortableTensor(delta_index).get();
- auto fn = std::make_unique<ops::ZerosLikeLayer>();
+ auto fn = std::make_unique<ops::RangeLayer>();
- fn->configure(input_tensor, output_tensor);
+ fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Range &node)
+void KernelGenerator::visit(const ir::operation::Rank &node)
{
- const auto output_index{node.getOutputs().at(0)};
- const auto start_index{node.getInputs().at(ir::operation::Range::START)};
- const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
- const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto start_tensor = _tensor_builder->portableAt(start_index).get();
- auto limit_tensor = _tensor_builder->portableAt(limit_index).get();
- auto delta_tensor = _tensor_builder->portableAt(delta_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
- auto fn = std::make_unique<ops::RangeLayer>();
+ auto fn = std::make_unique<ops::RankLayer>();
+
+ fn->configure(ifm_tensor, ofm_tensor);
- fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
_return_fn = std::move(fn);
}
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
auto fn = std::make_unique<ops::SqDiffLayer>();
const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto multiples_tensor = _tensor_builder->portableAt(multiples_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index).get();
auto fn = std::make_unique<ops::TileLayer>();
const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto num_lower_tensor = _tensor_builder->portableAt(num_lower_index).get();
- auto num_upper_tensor = _tensor_builder->portableAt(num_upper_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index).get();
+ auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index).get();
auto fn = std::make_unique<ops::MatrixBandPartLayer>();
const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
const auto adj_x = node.param().adj_x;
const auto adj_y = node.param().adj_y;
const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto shape_tensor = _tensor_builder->portableAt(shape_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
auto fn = std::make_unique<ops::BroadcastToLayer>();
{
const auto ofm_index{node.getOutputs().at(0)};
- auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
const auto epsilon = node.param().epsilon;
const auto is_training = node.param().is_training;
const auto beta = node.param().beta;
const auto axis = node.param().axis;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::LogSoftMaxLayer>();
const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto block_shape_tensor = _tensor_builder->portableAt(block_shape_index).get();
- auto padding_tensor = _tensor_builder->portableAt(padding_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index).get();
+ auto padding_tensor = _tensor_reg->getPortableTensor(padding_index).get();
auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Quantize &node)
-{
- const auto input_index{node.getInputs().at(ir::operation::Quantize::Input::INPUT)};
- const auto output_index{node.getOutputs().at(0)};
-
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
-
- auto fn = std::make_unique<ops::QuantizeLayer>();
-
- fn->configure(input_tensor, output_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
{
const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
const auto output_index{node.getOutputs().at(0)};
auto block_size = node.param().block_size;
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
auto fn = std::make_unique<ops::SpaceToDepthLayer>();
const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto shape_alloc = _tensor_builder->portableAt(shape_index).get();
- auto seed_alloc = _tensor_builder->portableAt(seed_index).get();
+ auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
+ auto shape_alloc = _tensor_reg->getPortableTensor(shape_index).get();
+ auto seed_alloc = _tensor_reg->getPortableTensor(seed_index).get();
auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
- auto in_tensor = _tensor_builder->portableAt(input_idx).get();
- auto in_size_splits = _tensor_builder->portableAt(size_splits).get();
- auto in_split_dim = _tensor_builder->portableAt(split_dim).get();
+ auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
+ auto in_size_splits = _tensor_reg->getPortableTensor(size_splits).get();
+ auto in_split_dim = _tensor_reg->getPortableTensor(split_dim).get();
std::vector<IPortableTensor *> out_tensors;
for (auto &output_idx : node.getOutputs())
- out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+ out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
auto fn = std::make_unique<ops::SplitVLayer>();
#include "ExternalContext.h"
#include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
#include "Tensor.h"
#include <backend/CustomKernelBuilder.h>
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
const std::shared_ptr<ExternalContext> &external_context);
void visit(const ir::OpSequence &) override;
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::MaxPool2D &) override;
- void visit(const ir::operation::AvgPool2D &) override;
void visit(const ir::operation::Concat &) override;
void visit(const ir::operation::Fill &) override;
void visit(const ir::operation::FullyConnected &) override;
void visit(const ir::operation::Squeeze &) override;
void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::Add &) override;
- void visit(const ir::operation::Sub &) override;
- void visit(const ir::operation::Mul &) override;
- void visit(const ir::operation::Div &) override;
+ void visit(const ir::operation::BinaryArithmetic &) override;
void visit(const ir::operation::Einsum &) override;
void visit(const ir::operation::Gather &) override;
void visit(const ir::operation::Custom &node) override;
- void visit(const ir::operation::Exp &) override;
+ void visit(const ir::operation::ElementwiseActivation &) override;
+ void visit(const ir::operation::ElementwiseBinary &) override;
+ void visit(const ir::operation::ElementwiseUnary &) override;
void visit(const ir::operation::ExpandDims &) override;
- void visit(const ir::operation::Logistic &) override;
void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::Max &) override;
- void visit(const ir::operation::Min &) override;
- void visit(const ir::operation::Tanh &) override;
void visit(const ir::operation::Pack &) override;
void visit(const ir::operation::Unpack &) override;
void visit(const ir::operation::OneHot &) override;
- void visit(const ir::operation::Cast &) override;
void visit(const ir::operation::Transpose &) override;
void visit(const ir::operation::Reduce &) override;
- void visit(const ir::operation::ReLU &) override;
- void visit(const ir::operation::ReLU6 &) override;
void visit(const ir::operation::Select &) override;
void visit(const ir::operation::Slice &) override;
void visit(const ir::operation::StridedSlice &) override;
void visit(const ir::operation::Split &) override;
- void visit(const ir::operation::Abs &) override;
- void visit(const ir::operation::Cos &) override;
- void visit(const ir::operation::Sin &) override;
- void visit(const ir::operation::RSQRT &) override;
void visit(const ir::operation::Shape &) override;
void visit(const ir::operation::ResizeBilinear &node) override;
void visit(const ir::operation::Reverse &) override;
- void visit(const ir::operation::Neg &) override;
void visit(const ir::operation::ArgMax &) override;
- void visit(const ir::operation::Log &) override;
- void visit(const ir::operation::Round &) override;
+ void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Pow &) override;
- void visit(const ir::operation::LogicalNot &) override;
- void visit(const ir::operation::ZerosLike &) override;
void visit(const ir::operation::SquaredDifference &) override;
void visit(const ir::operation::Tile &) override;
- void visit(const ir::operation::LogicalOr &) override;
void visit(const ir::operation::L2Normalization &) override;
void visit(const ir::operation::Range &) override;
+ void visit(const ir::operation::Rank &) override;
void visit(const ir::operation::MatrixBandPart &) override;
void visit(const ir::operation::BatchMatMul &) override;
void visit(const ir::operation::BatchToSpaceND &) override;
void visit(const ir::operation::FusedBatchNorm &) override;
void visit(const ir::operation::LogSoftmax &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
- void visit(const ir::operation::Quantize &) override;
void visit(const ir::operation::SpaceToDepth &) override;
void visit(const ir::operation::StatelessRandomUniform &) override;
void visit(const ir::operation::SplitV &) override;
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
ir::Layout _current_op_seq_layout;
const std::shared_ptr<ExternalContext> _external_context;
namespace cpu
{
-TensorBuilder::TensorBuilder()
- : _tensor_reg{new cpu_common::TensorRegistry()},
+TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
+ : _tensor_reg{tensor_reg},
_dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
_static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
{
assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
const auto tensor_info = _tensor_info_map.at(ind);
- if (!at(ind)->is_dynamic())
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
{
const auto size = tensor_info.total_size();
_static_tensor_mgr->claimPlan(ind, size);
void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
{
- if (!at(ind)->is_dynamic())
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
{
_static_tensor_mgr->releasePlan(ind);
}
// This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
}
-std::shared_ptr<ITensor> TensorBuilder::tensorAt(const ir::OperandIndex &ind)
-{
- return _tensor_reg->getITensor(ind);
-}
-
-std::shared_ptr<IPortableTensor> TensorBuilder::portableAt(const ir::OperandIndex &ind)
-{
- return _tensor_reg->getPortableTensor(ind);
-}
-
-bool TensorBuilder::setMigrantTensor(const ir::OperandIndex &ind,
- const std::shared_ptr<IPortableTensor> &tensor)
-{
- return _tensor_reg->setMigrantTensor(ind, tensor);
-}
-
-void TensorBuilder::iterate(const IterateFunction &fn) { _static_tensor_mgr->iterate(fn); }
-
-std::shared_ptr<Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
-{
- return _tensor_reg->getNativeTensor(ind);
-}
-
std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
{
return std::move(_static_tensor_mgr);
class TensorBuilder : public ITensorBuilder
{
public:
- TensorBuilder();
-
- bool supportDynamicTensor() override { return true; }
+ TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
/**
* @brief Register tensor information to allocate on CPU backend
void allocate() override;
void postFunctionPrepare() override { /* DO NOTHING */}
- /**
- * @brief Get tensor with a specific OperandIndex
- *
- * @return shared_ptr<ITensor> if a tensor with given OperandIndex exists. nullptr otherwise.
- */
- std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override;
-
- void iterate(const IterateFunction &fn) override;
-
std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) override;
- /**
- * @brief Get tensor with a specific OperandIndex.
- * @param ind OperandIndex for the tensor. There must exist a tensor with this ind.
- * If not, program will crash with assert or exception.
- * @return shared_ptr<Tensor>
- */
- std::shared_ptr<Tensor> at(const ir::OperandIndex &ind);
- std::shared_ptr<IPortableTensor> portableAt(const ir::OperandIndex &ind);
- bool setMigrantTensor(const ir::OperandIndex &ind,
- const std::shared_ptr<IPortableTensor> &tensor) override;
-
- std::shared_ptr<ITensorRegistry> tensorRegistry() override { return _tensor_reg; }
-
private:
const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AbsLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-AbsLayer::AbsLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void AbsLayer::absFloat32()
-{
- nnfw::cker::Abs(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void AbsLayer::absQuant8() { throw std::runtime_error{"NYI"}; }
-
-void AbsLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void AbsLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- absFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- absQuant8();
- }
- else
- {
- throw std::runtime_error{"Abs: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__
-
-#include "backend/IPortableTensor.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class AbsLayer : public ::onert::exec::IFunction
-{
-public:
- AbsLayer();
-
-public:
- void absFloat32();
-
- void absQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AddLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void AddLayer::addFloat32()
-{
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.float_activation_max = output_activation_max;
- op_params.float_activation_min = output_activation_min;
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void AddLayer::addInt32()
-{
- int32_t output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.quantized_activation_max = output_activation_max;
- op_params.quantized_activation_min = output_activation_min;
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
-}
-
-void AddLayer::addQuant8()
-{
- int32_t output_activation_min, output_activation_max;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.quantized_activation_max = output_activation_max;
- op_params.quantized_activation_min = output_activation_min;
- // Parameters for scaled quantized computation
- op_params.left_shift = 20;
- // Zero-points of input and output tensors
- op_params.input1_offset = -_lhs->data_offset();
- op_params.input2_offset = -_rhs->data_offset();
- op_params.output_offset = _output->data_offset();
- assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
- assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
- assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
-
- // Compute normalized scale for _lhs and _rhs values,
- // and represent in 32-bit fixed point
- const double norm_max_scale = 2 * std::max(_lhs->data_scale(), _rhs->data_scale());
- const double real_lhs_scale = _lhs->data_scale() / norm_max_scale;
- const double real_rhs_scale = _rhs->data_scale() / norm_max_scale;
- // output scale is used to normalize final result, so we invert the scale here
- const double real_output_scale =
- norm_max_scale / (_output->data_scale() * (1 << op_params.left_shift));
-
- // Represent the scales as fixed int32_t multipliers, and int32_t shifts
- QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
- QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift);
- QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift);
-
- // cker quant8 add is not implemented yet
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void AddLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output)
-{
- assert(lhs != nullptr);
- assert(rhs != nullptr);
- assert(output != nullptr);
-
- _lhs = lhs;
- _rhs = rhs;
- _activation = activation;
- _output = output;
-}
-
-void AddLayer::run()
-{
- if (_lhs->data_type() == OperandType::FLOAT32)
- {
- addFloat32();
- }
- else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- addQuant8();
- }
- else if (_output->data_type() == OperandType::INT32)
- {
- addInt32();
- }
- else
- {
- throw std::runtime_error{"Add: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class AddLayer : public ::onert::exec::IFunction
-{
-public:
- AddLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
- {
- // DO NOTHING
- }
-
-public:
- void addFloat32();
-
- void addQuant8();
-
- void addInt32();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_lhs;
- const IPortableTensor *_rhs;
- IPortableTensor *_output;
-
- ir::Activation _activation{ir::Activation::NONE};
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AvgPoolLayer.h"
-
-#include <cker/operation/AveragePool.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-#define AVGPOOLING_PARAMETERS \
- nnfw::cker::PoolParams op_params; \
- op_params.stride_height = _strideHeight; \
- op_params.stride_width = _strideWidth; \
- op_params.filter_height = _kernelHeight; \
- op_params.filter_width = _kernelWidth; \
- op_params.padding_values.height = (int8_t)_paddingTop; \
- op_params.padding_values.width = (int8_t)_paddingLeft;
-
-AvgPoolLayer::AvgPoolLayer()
- : _input(nullptr), _output(nullptr), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
- _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), _kernelHeight(0),
- _activation(ir::Activation::NONE)
-{
- // DO NOTHING
-}
-
-void AvgPoolLayer::averagePoolFloat32()
-{
- AVGPOOLING_PARAMETERS
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- op_params.float_activation_min = output_activation_min;
- op_params.float_activation_max = output_activation_max;
-
- nnfw::cker::AveragePool(op_params, getTensorShape(_input),
- reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-void AvgPoolLayer::averagePoolQuant8()
-{
- AVGPOOLING_PARAMETERS
- int32_t output_activation_min = 0;
- int32_t output_activation_max = 0;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- op_params.quantized_activation_min = output_activation_min;
- op_params.quantized_activation_max = output_activation_max;
-
- nnfw::cker::AveragePool(op_params, getTensorShape(_input),
- reinterpret_cast<const uint8_t *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void AvgPoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft,
- const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideWidth,
- const uint32_t strideHeight, const uint32_t kernelWidth,
- const uint32_t kernelHeight, const ir::Activation activation,
- IPortableTensor *output)
-{
- assert(input != nullptr);
- assert(output != nullptr);
-
- _input = input;
- _paddingLeft = paddingLeft;
- _paddingRight = paddingRight;
- _paddingTop = paddingTop;
- _paddingBottom = paddingBottom;
- _strideWidth = strideWidth;
- _strideHeight = strideHeight;
- _kernelWidth = kernelWidth;
- _kernelHeight = kernelHeight;
- _activation = activation;
- _output = output;
-}
-
-void AvgPoolLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- averagePoolFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- averagePoolQuant8();
- }
- else
- {
- throw std::runtime_error{"AvgPool: unsupported data type"};
- }
-}
-
-#undef AVGPOOLING_PARAMETERS
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class AvgPoolLayer : public ::onert::exec::IFunction
-{
-public:
- AvgPoolLayer();
-
-public:
- void averagePoolFloat32();
-
- void averagePoolQuant8();
-
- void configure(const IPortableTensor *input, const uint32_t paddingLeft,
- const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideWidth,
- const uint32_t strideHeight, const uint32_t kernelWidth,
- const uint32_t kernelHeight, const ir::Activation activation,
- IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-
- uint32_t _paddingLeft;
- uint32_t _paddingTop;
- uint32_t _paddingRight;
- uint32_t _paddingBottom;
-
- uint32_t _strideWidth;
- uint32_t _strideHeight;
- uint32_t _kernelWidth;
- uint32_t _kernelHeight;
-
- ir::Activation _activation;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BinaryArithmeticLayer.h"
+
+#include <cker/operation/BinaryArithmeticOps.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+
+template <nnfw::cker::BinaryArithmeticOpType arithmetic_type, typename T>
+void eval(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+ nnfw::cker::BinaryArithmeticOpParam op_params)
+{
+ const bool need_broadcast =
+ nnfw::cker::ProcessBroadcastShapes(getTensorShape(lhs), getTensorShape(rhs), &op_params);
+ if (need_broadcast)
+ {
+ nnfw::cker::BroadcastBinaryArithmeticOp<arithmetic_type>(
+ op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+ return;
+ }
+
+ nnfw::cker::BinaryArithmeticOp<arithmetic_type>(
+ op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+}
+
+template <nnfw::cker::BinaryArithmeticOpType arithmetic_type>
+std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)>
+generateKernelGeneric(const IPortableTensor *lhs, const ir::Activation activation,
+ nnfw::cker::BinaryArithmeticOpParam op_params)
+{
+ switch (lhs->data_type())
+ {
+ case OperandType::FLOAT32:
+ {
+ float output_activation_min = 0, output_activation_max = 0;
+ CalculateActivationRange(activation, &output_activation_min, &output_activation_max);
+ op_params.float_activation_max = output_activation_max;
+ op_params.float_activation_min = output_activation_min;
+ return std::bind(&eval<arithmetic_type, float>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, op_params);
+ break;
+ }
+ case OperandType::INT32:
+ {
+ int32_t output_activation_min = 0, output_activation_max = 0;
+ CalculateActivationRange(activation, &output_activation_min, &output_activation_max);
+ op_params.quantized_activation_max = output_activation_max;
+ op_params.quantized_activation_min = output_activation_min;
+ return std::bind(eval<arithmetic_type, int32_t>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, op_params);
+ break;
+ }
+ default:
+ throw std::runtime_error{"BinaryArithmetic(generic): Unsupported data type"};
+ }
+}
+
+void setAddOrSubQuant8Params(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output, ir::Activation activation,
+ nnfw::cker::BinaryArithmeticOpParam *params)
+{
+ int32_t output_activation_min, output_activation_max;
+ CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max);
+ nnfw::cker::BinaryArithmeticOpParam &op_params = *params;
+ op_params.quantized_activation_max = output_activation_max;
+ op_params.quantized_activation_min = output_activation_min;
+ // Parameters for scaled quantized computation
+ op_params.left_shift = 20;
+ // Zero-points of input and output tensors
+ op_params.input1_offset = -lhs->data_offset();
+ op_params.input2_offset = -rhs->data_offset();
+ op_params.output_offset = output->data_offset();
+ assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
+ assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
+ assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
+
+ // Compute normalized scale for _lhs and _rhs values,
+ // and represent in 32-bit fixed point
+ const double norm_max_scale = 2 * std::max(lhs->data_scale(), rhs->data_scale());
+ const double real_lhs_scale = lhs->data_scale() / norm_max_scale;
+ const double real_rhs_scale = rhs->data_scale() / norm_max_scale;
+ // output scale is used to normalize final result, so we invert the scale here
+ const double real_output_scale =
+ norm_max_scale / (output->data_scale() * (1 << op_params.left_shift));
+
+ // Represent the scales as fixed int32_t multipliers, and int32_t shifts
+ QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
+ QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift);
+ QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift);
+}
+
+void setMulQuant8Params(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output, ir::Activation activation,
+ nnfw::cker::BinaryArithmeticOpParam *params)
+{
+ int32_t output_activation_min, output_activation_max;
+ CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max);
+ nnfw::cker::BinaryArithmeticOpParam &op_params = *params;
+
+ op_params.quantized_activation_max = output_activation_max;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.input1_offset = -lhs->data_offset();
+ op_params.input2_offset = -rhs->data_offset();
+ op_params.output_offset = output->data_offset();
+
+ double real_multiplier = lhs->data_scale() * rhs->data_scale() / output->data_scale();
+ QuantizeMultiplier(real_multiplier, &op_params.output_multiplier, &op_params.output_shift);
+}
+
+} // namespace
+
+void BinaryArithmeticLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output, const ir::Activation activation,
+ const ArithmeticType arithmetic_type)
+{
+ assert(lhs != nullptr);
+ assert(rhs != nullptr);
+ assert(output != nullptr);
+
+ _lhs = lhs;
+ _rhs = rhs;
+ _output = output;
+
+ nnfw::cker::BinaryArithmeticOpParam op_params;
+ switch (arithmetic_type)
+ {
+ case ArithmeticType::kAdd:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+ _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::ADD, uint8_t>,
+ std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ op_params);
+ }
+ else
+ {
+ _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::ADD>(_lhs, activation,
+ op_params);
+ }
+ break;
+ case ArithmeticType::kSub:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+ op_params.input2_multiplier *= -1;
+ _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::SUB, uint8_t>,
+ std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ op_params);
+ }
+ else
+ {
+ _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::SUB>(_lhs, activation,
+ op_params);
+ }
+ break;
+ case ArithmeticType::kMul:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ nnfw::cker::BinaryArithmeticOpParam op_params;
+ setMulQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+ _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::MUL, uint8_t>,
+ std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ op_params);
+ }
+ else
+ {
+ _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::MUL>(_lhs, activation,
+ op_params);
+ }
+ break;
+ case ArithmeticType::kDiv:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ throw std::runtime_error{
+ "BinaryArithmetic(Div): Div operation does not support quantization"};
+ }
+ else if (_lhs->data_type() == OperandType::INT32)
+ {
+ throw std::runtime_error{"BinaryArithmetic(Div): Unsupported data type"};
+ }
+ else
+ {
+ _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::DIV>(_lhs, activation,
+ op_params);
+ }
+ break;
+ default:
+ throw std::runtime_error{"BinaryArithmetic: Unsupported BinaryArithmetic type"};
+ }
+}
+
+void BinaryArithmeticLayer::run() { _kernel(_lhs, _rhs, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__
#include <backend/IPortableTensor.h>
#include "OperationUtils.h"
namespace ops
{
-class DivLayer : public ::onert::exec::IFunction
+enum class ArithmeticType
+{
+ kAdd,
+ kSub,
+ kMul,
+ kDiv,
+};
+
+class BinaryArithmeticLayer : public ::onert::exec::IFunction
{
public:
- DivLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
+ BinaryArithmeticLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
{
// DO NOTHING
}
public:
- void divFloat32();
-
- void divQuant8();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output);
+ void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+ const ir::Activation activation, const ArithmeticType arithmetic_type);
void run() override;
const IPortableTensor *_rhs;
IPortableTensor *_output;
- ir::Activation _activation{ir::Activation::NONE};
+ std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)> _kernel;
};
} // namespace ops
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "CastLayer.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-CastLayer::CastLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void CastLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-template <typename FromT, typename ToT> void CastLayer::castTensor(const FromT *in, ToT *out)
-{
- auto input_shape = getTensorShape(_input);
- auto output_shape = getTensorShape(_output);
- const auto num_elements = MatchingFlatSize(input_shape, output_shape);
-
- std::transform(in, in + num_elements, out, [](FromT a) { return static_cast<ToT>(a); });
-}
-
-template <typename FromT> void CastLayer::castPtr(const FromT *in, DataPtr out)
-{
- switch (_output->data_type())
- {
- case ir::DataType::FLOAT32:
- castTensor(in, out.f);
- return;
- case ir::DataType::INT32:
- castTensor(in, out.i32);
- return;
- case ir::DataType::UINT32:
- castTensor(in, out.u32);
- return;
- case ir::DataType::UINT8:
- castTensor(in, out.u8);
- return;
- case ir::DataType::BOOL8:
- castTensor(in, out.b);
- return;
- case ir::DataType::INT64:
- castTensor(in, out.i64);
- return;
- default:
- throw std::runtime_error("Not supported output type" +
- std::to_string((int)_output->data_type()));
- }
-}
-
-void CastLayer::run()
-{
- auto input_buf = _input->buffer();
- auto output_buf = _output->buffer();
- const auto in = *reinterpret_cast<const DataPtr *>(&input_buf);
- auto out = *reinterpret_cast<DataPtr *>(&output_buf);
-
- switch (_input->data_type())
- {
- case ir::DataType::FLOAT32:
- castPtr(in.f, out);
- return;
- case ir::DataType::INT32:
- castPtr(in.i32, out);
- return;
- case ir::DataType::UINT32:
- castPtr(in.u32, out);
- return;
- case ir::DataType::UINT8:
- castPtr(in.u8, out);
- return;
- case ir::DataType::BOOL8:
- castPtr(in.b, out);
- return;
- case ir::DataType::INT64:
- castPtr(in.i64, out);
- return;
- default:
- throw std::runtime_error("Cast: unsupported data type" +
- std::to_string((int)_input->data_type()));
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class CastLayer : public ::onert::exec::IFunction
-{
-public:
- CastLayer();
-
-public:
- template <typename FromT, typename ToT> void castTensor(const FromT *in, ToT *out);
- template <typename FromT> void castPtr(const FromT *in, DataPtr out);
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__
ConvolutionLayer::ConvolutionLayer()
: _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
_paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
- _paddingBottom(0), _strideWidth(0), _strideHeight(0), _activation(ir::Activation::NONE),
+ _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
+ _dilationHeightFactor(1), _activation(ir::Activation::NONE),
_conv_kernel(new nnfw::cker::Conv()), _prepare(false)
{
// DO NOTHING
op_params.padding_values.height = _paddingTop;
op_params.stride_width = _strideWidth;
op_params.stride_height = _strideHeight;
- op_params.dilation_width_factor = 1;
- op_params.dilation_height_factor = 1;
+ op_params.dilation_width_factor = _dilationWidthFactor;
+ op_params.dilation_height_factor = _dilationHeightFactor;
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
nnfw::cker::ConvParams op_params;
op_params.stride_width = _strideWidth;
op_params.stride_height = _strideHeight;
- op_params.dilation_width_factor = 1;
- op_params.dilation_height_factor = 1;
+ op_params.dilation_width_factor = _dilationWidthFactor;
+ op_params.dilation_height_factor = _dilationHeightFactor;
op_params.padding_type = getPaddingType(_paddingType);
op_params.padding_values.width = _paddingLeft;
op_params.padding_values.height = _paddingTop;
const uint32_t paddingLeft, const uint32_t paddingRight,
const uint32_t paddingTop, const uint32_t paddingBottom,
const uint32_t strideWidth, const uint32_t strideHeight,
+ const uint32_t dilationWidthFactor,
+ const uint32_t dilationHeightFactor,
const ir::Activation activation, IPortableTensor *output)
{
_input = input;
_paddingBottom = paddingBottom;
_strideWidth = strideWidth;
_strideHeight = strideHeight;
+ _dilationWidthFactor = dilationWidthFactor;
+ _dilationHeightFactor = dilationHeightFactor;
_activation = activation;
_output = output;
}
param_padding.param.bottom = _paddingBottom;
const auto padding =
- ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ _dilationWidthFactor, _dilationHeightFactor);
_paddingLeft = padding.left;
_paddingRight = padding.right;
{
bool is_transposed = false;
kernel.prepare(getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
- getPaddingType(_paddingType), is_transposed);
+ getPaddingType(_paddingType), is_transposed, _dilationWidthFactor,
+ _dilationHeightFactor);
// Decrease reference of _kernel(weights) only when _kernel is constant
if (is_transposed)
const IPortableTensor *bias, ir::PaddingType _paddingType,
const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
const uint32_t paddingBottom, const uint32_t strideWidth,
- const uint32_t strideHeight, const ir::Activation activation,
+ const uint32_t strideHeight, const uint32_t dilationWidthFactor,
+ const uint32_t dilationHeightFactor, const ir::Activation activation,
IPortableTensor *output);
void run() override;
uint32_t _strideWidth;
uint32_t _strideHeight;
+ uint32_t _dilationWidthFactor;
+ uint32_t _dilationHeightFactor;
ir::Activation _activation;
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "CosLayer.h"
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-CosLayer::CosLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void CosLayer::cosFloat32()
-{
- nnfw::cker::Cos(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void CosLayer::cosQuant8() { throw std::runtime_error{"NYI"}; }
-
-void CosLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void CosLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- cosFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- cosQuant8();
- }
- else
- {
- throw std::runtime_error{"Cos: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_COSLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_COSLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class CosLayer : public ::onert::exec::IFunction
-{
-public:
- CosLayer();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- void cosFloat32();
- void cosQuant8();
-
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_COSLAYER_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DivLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void DivLayer::divFloat32()
-{
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.float_activation_max = output_activation_max;
- op_params.float_activation_min = output_activation_min;
-
- const bool requires_broadcast = !HaveSameShapes(_lhs, _rhs);
- if (requires_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::DIV>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
- }
- else
- {
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::DIV>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
- }
-}
-
-void DivLayer::divQuant8()
-{
- int32_t output_activation_min, output_activation_max;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- // op_params.quantized_activation_max = output_activation_max;
- // op_params.quantized_activation_min = output_activation_min;
-
- // cker quant8 div is not implemented yet
- throw std::runtime_error{"Div NYI for quantized"};
-}
-
-void DivLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output)
-{
- _lhs = lhs;
- _rhs = rhs;
- _activation = activation;
- _output = output;
-}
-
-void DivLayer::run()
-{
- if (_output->data_type() == OperandType::FLOAT32)
- {
- divFloat32();
- }
- else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- divQuant8();
- }
- else
- {
- throw std::runtime_error{"Div: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseActivationLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Logistic.h>
+#include <cker/operation/ReLU.h>
+#include <cker/operation/ReLU6.h>
+#include <cker/operation/Tanh.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+ElementwiseActivationLayer::ElementwiseActivationLayer()
+ : _input(nullptr), _output(nullptr), _kernel()
+{
+ // DO NOTHING
+}
+
+void ElementwiseActivationLayer::PopulateLookupTable(const ElementwiseActivationType op_type)
+{
+ const auto input_scale = static_cast<double>(_input->data_scale());
+ const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
+ const auto output_scale = static_cast<double>(_output->data_scale());
+ const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
+ const float inverse_scale = 1 / output_scale;
+ int32_t maxval = std::numeric_limits<uint8_t>::max();
+ int32_t minval = std::numeric_limits<uint8_t>::min();
+ for (int32_t val = minval; val <= maxval; ++val)
+ {
+ const float dequantized = input_scale * (val - input_zero_point);
+ float transformed = 0.f;
+ if (op_type == ElementwiseActivationType::kTanh)
+ {
+ transformed = std::tanh(dequantized);
+ }
+ else if (op_type == ElementwiseActivationType::kLogistic)
+ {
+ transformed = 1.0f / (1.0f + std::exp(-dequantized));
+ }
+ else
+ {
+ throw std::runtime_error("ElementwiseActivationLayer : unsupported activation type");
+ }
+ const float rescaled = std::round(transformed * inverse_scale);
+ const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
+ _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval));
+ }
+}
+
+void ElementwiseActivationLayer::EvalUsingLookupTable(const IPortableTensor *input,
+ IPortableTensor *output)
+{
+ const int size = MatchingFlatSize(getTensorShape(input), getTensorShape(output));
+ const uint8_t *input_data = reinterpret_cast<const uint8_t *>(input->buffer());
+ uint8_t *output_data = reinterpret_cast<uint8_t *>(output->buffer());
+
+ for (int i = 0; i < size; ++i)
+ {
+ output_data[i] = _table[input_data[i]];
+ }
+}
+
+void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+ float alpha, float beta,
+ ElementwiseActivationType op_type)
+{
+ _input = input;
+ _output = output;
+
+ switch (op_type)
+ {
+ case ElementwiseActivationType::kLogistic:
+ if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ PopulateLookupTable(op_type);
+ _kernel = std::bind(&ElementwiseActivationLayer::EvalUsingLookupTable, this,
+ std::placeholders::_1, std::placeholders::_2);
+ }
+ else if (_input->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::Logistic(getTensorShape(input),
+ reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else
+ {
+ throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"};
+ }
+ break;
+ case ElementwiseActivationType::kReLU:
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ if (alpha == std::numeric_limits<float>::infinity() && beta == 0.f)
+ {
+ _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::ReLU(getTensorShape(input),
+ reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else if (alpha == 6.f && beta == 0.f)
+ {
+ _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::ReLU6(getTensorShape(input),
+ reinterpret_cast<const float *>(input->buffer()),
+ reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else
+ {
+ throw std::runtime_error(
+ "ElementwiseActivationLayer : This layer suppports only ReLU(0-inf) and ReLU6(0-6)");
+ }
+ }
+ else
+ {
+ throw std::runtime_error{"ElementwiseActivationLayer(ReLU): unsupported data type"};
+ }
+ break;
+ case ElementwiseActivationType::kTanh:
+ if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ PopulateLookupTable(op_type);
+ _kernel = std::bind(&ElementwiseActivationLayer::EvalUsingLookupTable, this,
+ std::placeholders::_1, std::placeholders::_2);
+ }
+ else if (_input->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::Tanh(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else
+ {
+ throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"};
+ }
+ break;
+ default:
+ throw std::runtime_error("ElementwiseActivationLayer: unsupported op type");
+ }
+}
+
+void ElementwiseActivationLayer::run() { _kernel(_input, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__
#include <backend/IPortableTensor.h>
namespace ops
{
-class TanhLayer : public ::onert::exec::IFunction
+enum class ElementwiseActivationType
{
-public:
- TanhLayer();
+ kLogistic,
+ kReLU,
+ kTanh
+};
+class ElementwiseActivationLayer : public ::onert::exec::IFunction
+{
public:
- void tanhFloat32();
+ ElementwiseActivationLayer();
- void tanhQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
+public:
+ void configure(const IPortableTensor *input, IPortableTensor *output, float alpha, float beta,
+ const ElementwiseActivationType op_type);
void run() override;
- void PopulateLookupTable();
+ void PopulateLookupTable(const ElementwiseActivationType op_type);
+
+ void EvalUsingLookupTable(const IPortableTensor *input, IPortableTensor *output);
private:
const IPortableTensor *_input;
IPortableTensor *_output;
uint8_t _table[256];
+ std::function<void(const IPortableTensor *input, IPortableTensor *output)> _kernel;
};
} // namespace ops
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseBinaryLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/LogicalOr.h>
+#include <cker/operation/MaxMin.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+template <typename T>
+void logicalOrGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output)
+{
+ if (!HaveSameShapes(lhs, rhs))
+ {
+ nnfw::cker::LogicalOrBroadcast<T>(
+ getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs),
+ reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+ }
+ else
+ {
+ nnfw::cker::LogicalOrElementwise<T>(
+ getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer()));
+ }
+}
+
+template <typename T>
+void maximumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
+{
+ nnfw::cker::Max<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
+}
+
+template <typename T>
+void minimumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
+{
+ nnfw::cker::Min<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
+}
+
+bool haveSameQauntInfo(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ const IPortableTensor *output)
+{
+ return (lhs->data_scale() == rhs->data_scale() && lhs->data_scale() == output->data_scale()) &&
+ (lhs->data_offset() == rhs->data_offset() && lhs->data_offset() == output->data_offset());
+}
+} // namespace
+
+void ElementwiseBinaryLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output, const ElementwiseBinaryType op_type)
+{
+ assert(lhs != nullptr);
+ assert(rhs != nullptr);
+ assert(output != nullptr);
+
+ _lhs = lhs;
+ _rhs = rhs;
+ _output = output;
+
+ switch (op_type)
+ {
+ case ElementwiseBinaryType::kLogicalOr:
+ if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
+ {
+ _kernel = logicalOrGeneric<bool>;
+ }
+ else
+ {
+ throw std::runtime_error{"LogicalOr: Unsupported data type"};
+ }
+ break;
+ case ElementwiseBinaryType::kMax:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ if (!haveSameQauntInfo(_lhs, _rhs, _output))
+ {
+ throw std::runtime_error("Max NYI for quantized");
+ }
+ _kernel = maximumGeneric<uint8_t>;
+ }
+ else if (_lhs->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = maximumGeneric<float>;
+ }
+ else
+ {
+ throw std::runtime_error{"Max: unsupported data type"};
+ }
+ break;
+ case ElementwiseBinaryType::kMin:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ if (!haveSameQauntInfo(_lhs, _rhs, _output))
+ {
+ throw std::runtime_error("Min NYI for quantized");
+ }
+ _kernel = minimumGeneric<uint8_t>;
+ }
+ else if (_lhs->data_type() == OperandType::INT32)
+ {
+ _kernel = minimumGeneric<int32_t>;
+ }
+ else if (_lhs->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = minimumGeneric<float>;
+ }
+ else
+ {
+ throw std::runtime_error{"Min: unsupported data type"};
+ }
+ break;
+ default:
+ throw std::runtime_error{"ElementwiseBinary: Unsupported ElementwiseBinary type"};
+ }
+}
+
+void ElementwiseBinaryLayer::run() { _kernel(_lhs, _rhs, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__
#include <backend/IPortableTensor.h>
namespace ops
{
-class MaxLayer : public ::onert::exec::IFunction
+enum class ElementwiseBinaryType
+{
+ kLogicalAnd,
+ kLogicalOr,
+ kMax,
+ kMin,
+};
+
+class ElementwiseBinaryLayer : public ::onert::exec::IFunction
{
public:
- MaxLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
+ ElementwiseBinaryLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
{
// DO NOTHING
}
public:
- template <typename T> void maximum();
-
- void maxQuant8();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output);
+ void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+ const ElementwiseBinaryType op_type);
void run() override;
const IPortableTensor *_lhs;
const IPortableTensor *_rhs;
IPortableTensor *_output;
+ std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)> _kernel;
};
} // namespace ops
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseUnaryLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Elementwise.h>
+#include <cker/operation/Erf.h>
+#include <cker/operation/Exp.h>
+#include <cker/operation/LogicalNot.h>
+#include <cker/operation/Quantize.h>
+#include <cker/operation/Round.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+void absFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Abs(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+template <typename FromT>
+void castPtr(const FromT *in, DataPtr out, int num_elements, ir::DataType data_type_out)
+{
+ switch (data_type_out)
+ {
+ case ir::DataType::FLOAT32:
+ std::transform(in, in + num_elements, out.f, [](FromT a) { return static_cast<float>(a); });
+ return;
+ case ir::DataType::INT32:
+ std::transform(in, in + num_elements, out.i32,
+ [](FromT a) { return static_cast<int32_t>(a); });
+ return;
+ case ir::DataType::UINT32:
+ std::transform(in, in + num_elements, out.u32,
+ [](FromT a) { return static_cast<uint32_t>(a); });
+ return;
+ case ir::DataType::UINT8:
+ std::transform(in, in + num_elements, out.u8,
+ [](FromT a) { return static_cast<uint8_t>(a); });
+ return;
+ case ir::DataType::BOOL8:
+ std::transform(in, in + num_elements, out.b, [](FromT a) { return static_cast<bool>(a); });
+ return;
+ case ir::DataType::INT64:
+ std::transform(in, in + num_elements, out.i64,
+ [](FromT a) { return static_cast<int64_t>(a); });
+ return;
+ default:
+ throw std::runtime_error("Cast: Not supported output type" +
+ std::to_string((int)data_type_out));
+ }
+}
+
+void cast(const IPortableTensor *input, IPortableTensor *output)
+{
+ auto input_buf = input->buffer();
+ auto output_buf = output->buffer();
+ const auto in = *reinterpret_cast<const DataPtr *>(&input_buf);
+ auto out = *reinterpret_cast<DataPtr *>(&output_buf);
+
+ auto input_shape = getTensorShape(input);
+ auto output_shape = getTensorShape(output);
+ const auto num_elements = MatchingFlatSize(input_shape, output_shape);
+
+ switch (input->data_type())
+ {
+ case ir::DataType::FLOAT32:
+ castPtr(in.f, out, num_elements, output->data_type());
+ return;
+ case ir::DataType::INT32:
+ castPtr(in.i32, out, num_elements, output->data_type());
+ return;
+ case ir::DataType::UINT32:
+ castPtr(in.u32, out, num_elements, output->data_type());
+ return;
+ case ir::DataType::UINT8:
+ castPtr(in.u8, out, num_elements, output->data_type());
+ return;
+ case ir::DataType::BOOL8:
+ castPtr(in.b, out, num_elements, output->data_type());
+ return;
+ case ir::DataType::INT64:
+ castPtr(in.i64, out, num_elements, output->data_type());
+ return;
+ default:
+ throw std::runtime_error("Cast: unsupported data type" +
+ std::to_string((int)input->data_type()));
+ }
+}
+
+void cosFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Cos(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void expFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Exp(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void erfFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Erf(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void logFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Log(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void logicalNot(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::LogicalNot(getTensorShape(input), reinterpret_cast<const bool *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
+}
+
+void negFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Neg(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+template <typename InputT, typename OutputT>
+void affineQuantize(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Quantize(getTensorShape(input), reinterpret_cast<const InputT *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<OutputT *>(output->buffer()),
+ output->data_scale(), output->data_offset());
+}
+
+void roundFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Round(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void rsqrtFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Rsqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void sinFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Sin(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ if (!HaveSameShapes(input, output))
+ throw std::runtime_error{"ZerosLike: input and output shape don't match."};
+
+ auto element_size = getTensorShape(input).FlatSize();
+
+ memset(reinterpret_cast<T *>(output->buffer()), 0, element_size * sizeof(T));
+}
+} // namespace
+
+void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+ const ElementwiseUnaryType op_type)
+{
+ assert(input != nullptr);
+ assert(output != nullptr);
+
+ _input = input;
+ _output = output;
+
+ switch (op_type)
+ {
+ case ElementwiseUnaryType::kAbs:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = absFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Abs: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kCast:
+ _kernel = cast;
+ break;
+ case ElementwiseUnaryType::kCos:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = cosFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Cos: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kExp:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = expFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Exp: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kErf:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = erfFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Exp: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kLog:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = logFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Log: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kLogicalNot:
+ if ((input->data_type() == OperandType::BOOL8))
+ {
+ _kernel = logicalNot;
+ }
+ else
+ {
+ throw std::runtime_error{"LogicalNot: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kNeg:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = negFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Neg: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kQuantize:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = affineQuantize<float, uint8_t>;
+ }
+ else
+ {
+ throw std::runtime_error{"Quantize: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kRound:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = roundFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Round: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kRSqrt:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = rsqrtFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"RSqrt: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kSin:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = sinFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Sin: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kZerosLike:
+ if (input->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = zerosLikeFloat32<float>;
+ }
+ else if (input->data_type() == OperandType::INT32)
+ {
+ _kernel = zerosLikeFloat32<int32_t>;
+ }
+ else
+ {
+ throw std::runtime_error{"ZerosLike: Unsupported data type"};
+ }
+ break;
+ default:
+ throw std::runtime_error{"ElementwiseBinary: Unsupported ElementwiseBinary type"};
+ }
+}
+
+void ElementwiseUnaryLayer::run() { _kernel(_input, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__
#include <backend/IPortableTensor.h>
namespace ops
{
-class ReLU6Layer : public ::onert::exec::IFunction
+enum class ElementwiseUnaryType
{
-public:
- ReLU6Layer();
+ kAbs,
+ kCast,
+ kCos,
+ kErf,
+ kExp,
+ kLog,
+ kLogicalNot,
+ kNeg,
+ kQuantize,
+ kRound,
+ kRSqrt,
+ kSin,
+ kZerosLike
+};
+class ElementwiseUnaryLayer : public ::onert::exec::IFunction
+{
public:
- void relu6Float32();
+ ElementwiseUnaryLayer() : _input(nullptr), _output(nullptr), _kernel()
+ {
+ // DO NOTHING
+ }
- void relu6Quant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
+public:
+ void configure(const IPortableTensor *input, IPortableTensor *output,
+ const ElementwiseUnaryType op_type);
void run() override;
private:
const IPortableTensor *_input;
IPortableTensor *_output;
+ std::function<void(const IPortableTensor *, IPortableTensor *)> _kernel;
};
} // namespace ops
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ExpLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Exp.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-ExpLayer::ExpLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void ExpLayer::expFloat32()
-{
- nnfw::cker::Exp(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void ExpLayer::expQuant8()
-{
- // cker quant8 exp is not implemented yet
- throw std::runtime_error{"NYI"};
-}
-
-void ExpLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void ExpLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- expFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- expQuant8();
- }
- else
- {
- throw std::runtime_error{"Exp: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-LogLayer::LogLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void LogLayer::logFloat32()
-{
- nnfw::cker::Log(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void LogLayer::logQuant8() { throw std::runtime_error{"NYI"}; }
-
-void LogLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void LogLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- logFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- logQuant8();
- }
- else
- {
- throw std::runtime_error{"Log: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class LogLayer : public ::onert::exec::IFunction
-{
-public:
- LogLayer();
-
-public:
- void logFloat32();
-
- void logQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__
// DO NOTHING
}
+void LogSoftMaxLayer::PopulateLookupTable(const float kBeta)
+{
+ const float scale = -_input->data_scale() * kBeta;
+ const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+ for (int32_t val = 0; val <= max_uint8; ++val)
+ {
+ _table[max_uint8 - val] = expf(scale * val);
+ }
+}
+
void LogSoftMaxLayer::logsoftmaxFloat32()
{
nnfw::cker::SoftmaxParams op_params;
void LogSoftMaxLayer::logsoftmaxQuant8()
{
- // NYI
+ nnfw::cker::SoftmaxParams op_params;
+ op_params.beta = _beta;
+ op_params.axis = _axis;
+ op_params.table = _table;
+ op_params.zero_point = _output->data_offset();
+ op_params.scale = _output->data_scale();
+ nnfw::cker::LogSoftmax(op_params, _input->data_scale(), getTensorShape(_input),
+ reinterpret_cast<const uint8_t *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
}
void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta, const int axis,
_output = output;
_beta = beta;
_axis = axis;
+ if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ PopulateLookupTable(_beta);
+ }
}
void LogSoftMaxLayer::run()
}
else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
- throw std::runtime_error{"LogSoftmax : NYI"};
+ logsoftmaxQuant8();
}
else
{
void run();
+ void PopulateLookupTable(const float kBeta);
+
private:
const IPortableTensor *_input;
IPortableTensor *_output;
float _beta;
int _axis;
+ float _table[256];
};
} // namespace ops
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogicalNotLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/LogicalNot.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-LogicalNotLayer::LogicalNotLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void LogicalNotLayer::logicalNotBool8()
-{
- nnfw::cker::LogicalNot(getTensorShape(_input), reinterpret_cast<const bool *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<bool *>(_output->buffer()));
-}
-
-void LogicalNotLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void LogicalNotLayer::run()
-{
- if (_input->data_type() == OperandType::BOOL8)
- {
- logicalNotBool8();
- }
- else
- {
- throw std::runtime_error{"LogicalNot: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class LogicalNotLayer : public ::onert::exec::IFunction
-{
-public:
- LogicalNotLayer();
-
-public:
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- void logicalNotBool8();
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogicalOrLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/LogicalOr.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-void LogicalOrLayer::lorBool8()
-{
- if (!HaveSameShapes(_lhs, _rhs))
- {
- nnfw::cker::LogicalOrBroadcast<bool>(
- getTensorShape(_lhs), reinterpret_cast<const bool *>(_lhs->buffer()), getTensorShape(_rhs),
- reinterpret_cast<const bool *>(_rhs->buffer()), getTensorShape(_output),
- reinterpret_cast<bool *>(_output->buffer()));
- }
- else
- {
- nnfw::cker::LogicalOrElementwise<bool>(getTensorShape(_lhs),
- reinterpret_cast<const bool *>(_lhs->buffer()),
- reinterpret_cast<const bool *>(_rhs->buffer()),
- reinterpret_cast<bool *>(_output->buffer()));
- }
-}
-
-void LogicalOrLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- IPortableTensor *output)
-{
- assert(lhs != nullptr);
- assert(rhs != nullptr);
- assert(output != nullptr);
-
- _lhs = lhs;
- _rhs = rhs;
- _output = output;
-}
-
-void LogicalOrLayer::run()
-{
- if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
- {
- lorBool8();
- }
- else
- {
- throw std::runtime_error{"LogicalOr: Unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class LogicalOrLayer : public ::onert::exec::IFunction
-{
-public:
- LogicalOrLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
- {
- // Nothing
- }
-
-public:
- void configure(const IPortableTensor *_lhs, const IPortableTensor *_rhs, IPortableTensor *output);
-
- void run() override;
-
-private:
- void lorBool8();
-
-private:
- const IPortableTensor *_lhs;
- const IPortableTensor *_rhs;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogisticLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Logistic.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-LogisticLayer::LogisticLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void LogisticLayer::populateLookupTable()
-{
- const auto input_scale = static_cast<double>(_input->data_scale());
- const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
- const auto output_scale = static_cast<double>(_output->data_scale());
- const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
- const float inverse_scale = 1 / output_scale;
- int32_t maxval = std::numeric_limits<uint8_t>::max();
- int32_t minval = std::numeric_limits<uint8_t>::min();
- for (int32_t val = minval; val <= maxval; ++val)
- {
- const float dequantized = input_scale * (val - input_zero_point);
- const float transformed = 1.0f / (1.0f + std::exp(-dequantized));
- const float rescaled = std::round(transformed * inverse_scale);
- const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
- _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval));
- }
-}
-
-void LogisticLayer::logisticFloat32()
-{
- nnfw::cker::Logistic(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void LogisticLayer::logisticQuant8()
-{
- const int size = MatchingFlatSize(getTensorShape(_input), getTensorShape(_output));
- const uint8_t *input_data = reinterpret_cast<const uint8_t *>(_input->buffer());
- uint8_t *output_data = reinterpret_cast<uint8_t *>(_output->buffer());
-
- for (int i = 0; i < size; ++i)
- {
- output_data[i] = _table[input_data[i]];
- }
-}
-
-void LogisticLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-
- if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- if (_output->data_scale() != 1.f / 256)
- {
- throw std::runtime_error{"incorrect scale for output"};
- }
- populateLookupTable();
- }
-}
-
-void LogisticLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- logisticFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- logisticQuant8();
- }
- else
- {
- throw std::runtime_error{"Logistic: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class LogisticLayer : public ::onert::exec::IFunction
-{
-public:
- LogisticLayer();
-
-public:
- void logisticFloat32();
-
- void logisticQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
- void populateLookupTable();
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-
- uint8_t _table[256];
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MaxLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/MaxMin.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-template <typename T> void MaxLayer::maximum()
-{
- nnfw::cker::Max<T>(getTensorShape(_lhs), reinterpret_cast<const T *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const T *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
-}
-
-void MaxLayer::maxQuant8()
-{
- if (_lhs->data_scale() == _rhs->data_scale() && _lhs->data_scale() == _output->data_scale())
- {
- if (_lhs->data_offset() == _rhs->data_offset() && _lhs->data_offset() == _output->data_offset())
- {
- return nnfw::cker::Max<uint8_t>(
- getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
- }
- }
- throw std::runtime_error("Max NYI for quantized");
-}
-
-void MaxLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- IPortableTensor *output)
-{
- assert(lhs != nullptr);
- assert(rhs != nullptr);
- assert(output != nullptr);
-
- _lhs = lhs;
- _rhs = rhs;
- _output = output;
-}
-
-void MaxLayer::run()
-{
- if (_lhs->data_type() == OperandType::FLOAT32)
- {
- maximum<float>();
- }
- else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- maxQuant8();
- }
- else
- {
- throw std::runtime_error{"Max: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MaxPoolLayer.h"
-
-#include <cker/operation/MaxPool.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-#define MAXPOOLING_PARAMETERS \
- nnfw::cker::PoolParams op_params; \
- op_params.stride_height = _strideHeight; \
- op_params.stride_width = _strideWidth; \
- op_params.filter_height = _kernelHeight; \
- op_params.filter_width = _kernelWidth; \
- op_params.padding_values.height = (int8_t)_paddingTop; \
- op_params.padding_values.width = (int8_t)_paddingLeft;
-
-MaxPoolLayer::MaxPoolLayer()
- : _input(nullptr), _output(nullptr), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
- _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), _kernelHeight(0),
- _activation(ir::Activation::NONE)
-{
- // DO NOTHING
-}
-
-void MaxPoolLayer::maxPoolFloat32()
-{
- MAXPOOLING_PARAMETERS
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- op_params.float_activation_min = output_activation_min;
- op_params.float_activation_max = output_activation_max;
-
- nnfw::cker::MaxPool(op_params, getTensorShape(_input),
- reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
- reinterpret_cast<float *>(_output->buffer()));
-}
-void MaxPoolLayer::maxPoolQuant8()
-{
- MAXPOOLING_PARAMETERS
- int32_t output_activation_min = 0;
- int32_t output_activation_max = 0;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- op_params.quantized_activation_min = output_activation_min;
- op_params.quantized_activation_max = output_activation_max;
-
- nnfw::cker::MaxPool(op_params, getTensorShape(_input),
- reinterpret_cast<const uint8_t *>(_input->buffer()), getTensorShape(_output),
- reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void MaxPoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft,
- const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideWidth,
- const uint32_t strideHeight, const uint32_t kernelWidth,
- const uint32_t kernelHeight, const ir::Activation activation,
- IPortableTensor *output)
-{
- _input = input;
- _paddingLeft = paddingLeft;
- _paddingRight = paddingRight;
- _paddingTop = paddingTop;
- _paddingBottom = paddingBottom;
- _strideWidth = strideWidth;
- _strideHeight = strideHeight;
- _kernelWidth = kernelWidth;
- _kernelHeight = kernelHeight;
- _activation = activation;
- _output = output;
-}
-
-void MaxPoolLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- maxPoolFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- maxPoolQuant8();
- }
- else
- {
- throw std::runtime_error{"MaxPool: unsupported data type"};
- }
-}
-
-#undef MAXPOOLING_PARAMETERS
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MinLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/MaxMin.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-template <typename T> void MinLayer::minimum()
-{
- nnfw::cker::Min<T>(getTensorShape(_lhs), reinterpret_cast<const T *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const T *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
-}
-
-void MinLayer::minQuant8()
-{
- if (_lhs->data_scale() == _rhs->data_scale() && _lhs->data_scale() == _output->data_scale())
- {
- if (_lhs->data_offset() == _rhs->data_offset() && _lhs->data_offset() == _output->data_offset())
- {
- return nnfw::cker::Min<uint8_t>(
- getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
- }
- }
- throw std::runtime_error("Min NYI for quantized");
-}
-
-void MinLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- IPortableTensor *output)
-{
- assert(lhs != nullptr);
- assert(rhs != nullptr);
- assert(output != nullptr);
-
- _lhs = lhs;
- _rhs = rhs;
- _output = output;
-}
-
-void MinLayer::run()
-{
- if (_lhs->data_type() == OperandType::FLOAT32)
- {
- minimum<float>();
- }
- else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- minQuant8();
- }
- else if (_lhs->data_type() == OperandType::INT32)
- {
- minimum<int32_t>();
- }
- else
- {
- throw std::runtime_error{"Min: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_MINLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MINLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class MinLayer : public ::onert::exec::IFunction
-{
-public:
- MinLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
- {
- // DO NOTHING
- }
-
-public:
- template <typename T> void minimum();
-
- void minQuant8();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_lhs;
- const IPortableTensor *_rhs;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_MINLAYER_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MulLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void MulLayer::mulFloat32()
-{
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.float_activation_max = output_activation_max;
- op_params.float_activation_min = output_activation_min;
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void MulLayer::mulQuant8()
-{
- int32_t output_activation_min, output_activation_max;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
-
- op_params.quantized_activation_max = output_activation_max;
- op_params.quantized_activation_min = output_activation_min;
- op_params.input1_offset = -_lhs->data_offset();
- op_params.input2_offset = -_rhs->data_offset();
- op_params.output_offset = _output->data_offset();
-
- double real_multiplier = _lhs->data_scale() * _rhs->data_scale() / _output->data_scale();
- QuantizeMultiplier(real_multiplier, &op_params.output_multiplier, &op_params.output_shift);
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void MulLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output)
-{
- _lhs = lhs;
- _rhs = rhs;
- _activation = activation;
- _output = output;
-}
-
-void MulLayer::run()
-{
- if (_output->data_type() == OperandType::FLOAT32)
- {
- mulFloat32();
- }
- else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- mulQuant8();
- }
- else
- {
- throw std::runtime_error{"Mul: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_MULLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MULLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class MulLayer : public ::onert::exec::IFunction
-{
-public:
- MulLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
- {
- // DO NOTHING
- }
-
-public:
- void mulFloat32();
-
- void mulQuant8();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_lhs;
- const IPortableTensor *_rhs;
- IPortableTensor *_output;
-
- ir::Activation _activation{ir::Activation::NONE};
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_MULLAYER_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "NegLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-NegLayer::NegLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void NegLayer::negFloat32()
-{
- nnfw::cker::Neg(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void NegLayer::negQuant8() { throw std::runtime_error{"NYI"}; }
-
-void NegLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void NegLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- negFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- negQuant8();
- }
- else
- {
- throw std::runtime_error{"Neg: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class NegLayer : public ::onert::exec::IFunction
-{
-public:
- NegLayer();
-
-public:
- void negFloat32();
-
- void negQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PoolLayer.h"
+
+#include <cker/operation/AveragePool.h>
+#include <cker/operation/MaxPool.h>
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+template <typename T>
+void avgPool2D(const nnfw::cker::PoolParams ¶ms, const IPortableTensor *input,
+ IPortableTensor *output)
+{
+ nnfw::cker::AveragePool<T>(params, getTensorShape(input),
+ reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+}
+
+template <typename T>
+void maxPool2D(const nnfw::cker::PoolParams ¶ms, const IPortableTensor *input,
+ IPortableTensor *output)
+{
+ nnfw::cker::MaxPool<T>(params, getTensorShape(input),
+ reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+}
+
+template <typename T>
+std::function<void(const IPortableTensor *, IPortableTensor *)>
+generateKernelGeneric(const nnfw::cker::PoolParams ¶ms, PoolType op_type)
+{
+ if (op_type == PoolType::kAvg)
+ {
+ return std::bind(&avgPool2D<T>, params, std::placeholders::_1, std::placeholders::_2);
+ }
+ else if (op_type == PoolType::kMax)
+ {
+ return std::bind(&maxPool2D<T>, params, std::placeholders::_1, std::placeholders::_2);
+ }
+ else
+ {
+ throw std::runtime_error{"Pool: unsupported pool type"};
+ }
+}
+} // namespace
+
+PoolLayer::PoolLayer() : _input(nullptr), _output(nullptr), _kernel()
+{
+ // DO NOTHING
+}
+
+#define POOLING_PARAMETERS \
+ nnfw::cker::PoolParams op_params; \
+ op_params.stride_height = strideHeight; \
+ op_params.stride_width = strideWidth; \
+ op_params.filter_height = kernelHeight; \
+ op_params.filter_width = kernelWidth; \
+ op_params.padding_values.height = (int8_t)paddingTop; \
+ op_params.padding_values.width = (int8_t)paddingLeft;
+
+void PoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft, const uint32_t,
+ const uint32_t paddingTop, const uint32_t, const uint32_t strideWidth,
+ const uint32_t strideHeight, const uint32_t kernelWidth,
+ const uint32_t kernelHeight, const ir::Activation activation,
+ IPortableTensor *output, const PoolType op_type)
+{
+ assert(input != nullptr);
+ assert(output != nullptr);
+
+ _input = input;
+ _output = output;
+
+ POOLING_PARAMETERS
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ float output_activation_min = 0;
+ float output_activation_max = 0;
+ CalculateActivationRange<float>(activation, &output_activation_min, &output_activation_max);
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
+ _kernel = generateKernelGeneric<float>(op_params, op_type);
+ }
+ else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeUint8(activation, _output, &output_activation_min,
+ &output_activation_max);
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+ _kernel = generateKernelGeneric<uint8_t>(op_params, op_type);
+ }
+ else
+ {
+ throw std::runtime_error{"Pool: unsupported data type"};
+ }
+}
+
+void PoolLayer::run() { _kernel(_input, _output); }
+
+#undef AVGPOOLING_PARAMETERS
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__
#include <backend/IPortableTensor.h>
#include "OperationUtils.h"
namespace ops
{
-class MaxPoolLayer : public ::onert::exec::IFunction
+enum class PoolType
{
-public:
- MaxPoolLayer();
+ kAvg,
+ kL2,
+ kMax,
+};
+class PoolLayer : public ::onert::exec::IFunction
+{
public:
- void maxPoolFloat32();
-
- void maxPoolQuant8();
+ PoolLayer();
+public:
void configure(const IPortableTensor *input, const uint32_t paddingLeft,
const uint32_t paddingRight, const uint32_t paddingTop,
const uint32_t paddingBottom, const uint32_t strideWidth,
const uint32_t strideHeight, const uint32_t kernelWidth,
const uint32_t kernelHeight, const ir::Activation activation,
- IPortableTensor *output);
+ IPortableTensor *output, const PoolType op_type);
void run() override;
const IPortableTensor *_input;
IPortableTensor *_output;
- uint32_t _paddingLeft;
- uint32_t _paddingTop;
- uint32_t _paddingRight;
- uint32_t _paddingBottom;
-
- uint32_t _strideWidth;
- uint32_t _strideHeight;
- uint32_t _kernelWidth;
- uint32_t _kernelHeight;
-
- ir::Activation _activation;
+ std::function<void(const IPortableTensor *, IPortableTensor *)> _kernel;
};
} // namespace ops
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "QuantizeLayer.h"
-
-#include <cker/operation/Quantize.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-QuantizeLayer::QuantizeLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-template <typename InputT, typename OutputT> void QuantizeLayer::affineQuantize()
-{
- nnfw::cker::Quantize(getTensorShape(_input), reinterpret_cast<const InputT *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<OutputT *>(_output->buffer()),
- _output->data_scale(), _output->data_offset());
-}
-
-void QuantizeLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void QuantizeLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- affineQuantize<float, uint8_t>();
- }
- else
- {
- throw std::runtime_error{"Quantize: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class QuantizeLayer : public ::onert::exec::IFunction
-{
-public:
- QuantizeLayer();
-
-public:
- template <typename InputT, typename OutputT> void affineQuantize();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
* limitations under the License.
*/
-#include "RoundLayer.h"
+#include "RankLayer.h"
#include "OperationUtils.h"
-#include <cker/operation/Round.h>
-
namespace onert
{
namespace backend
{
namespace ops
{
-RoundLayer::RoundLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-void RoundLayer::roundFloat32()
+RankLayer::RankLayer() : _input(nullptr), _output(nullptr)
{
- nnfw::cker::Round(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ // DO NOTHING
}
-void RoundLayer::configure(const IPortableTensor *input, IPortableTensor *output)
+void RankLayer::configure(const IPortableTensor *input, IPortableTensor *output)
{
_input = input;
_output = output;
}
-void RoundLayer::run()
+void RankLayer::run()
{
- if (_input->data_type() == OperandType::FLOAT32)
+ if (_input->data_type() == OperandType::FLOAT32 || _input->data_type() == OperandType::INT32)
{
- roundFloat32();
+ int32_t *output_data = reinterpret_cast<int32_t *>(_output->buffer());
+ output_data[0] = _input->num_dimensions();
}
else
{
- throw std::runtime_error{"Round: unsupported data type"};
+ throw std::runtime_error{"Rank : unsupported data type"};
}
}
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__
#include <backend/IPortableTensor.h>
{
namespace ops
{
-class ZerosLikeLayer : public ::onert::exec::IFunction
+
+class RankLayer : public ::onert::exec::IFunction
{
public:
- ZerosLikeLayer();
+ RankLayer();
+public:
void configure(const IPortableTensor *input, IPortableTensor *output);
void run() override;
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ReLU6Layer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/ReLU6.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-ReLU6Layer::ReLU6Layer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void ReLU6Layer::relu6Float32()
-{
- nnfw::cker::ReLU6(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- reinterpret_cast<float *>(_output->buffer()));
-}
-
-void ReLU6Layer::relu6Quant8()
-{
- // cker quant8 relu is not implemented yet
- throw std::runtime_error{"NYI"};
-}
-
-void ReLU6Layer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void ReLU6Layer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- relu6Float32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- relu6Quant8();
- }
- else
- {
- throw std::runtime_error{"ReLU6: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ReLULayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/ReLU.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-ReLULayer::ReLULayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void ReLULayer::reluFloat32()
-{
- nnfw::cker::ReLU(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void ReLULayer::reluQuant8()
-{
- // cker quant8 relu is not implemented yet
- throw std::runtime_error{"NYI"};
-}
-
-void ReLULayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void ReLULayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- reluFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- reluQuant8();
- }
- else
- {
- throw std::runtime_error{"ReLU: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
}
template <typename T>
-void evalType(const IPortableTensor *input, IPortableTensor *output, const std::vector<int> &axes,
- bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
+std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)>
+evalType(bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
{
switch (reduce_type)
{
case ReduceType::kSum:
- return evalLogic<T>(input, output, axes, keep_dims, static_cast<T>(0), reduce_kernel,
- [](const T current, const T in) -> T { return in + current; });
+ return std::bind(&evalLogic<T>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, keep_dims, static_cast<T>(0), reduce_kernel,
+ [](const T current, const T in) -> T { return in + current; });
break;
case ReduceType::kProd:
- return evalLogic<T>(input, output, axes, keep_dims, static_cast<T>(1), reduce_kernel,
- [](const T current, const T in) -> T { return in * current; });
+ return std::bind(&evalLogic<T>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, keep_dims, static_cast<T>(1), reduce_kernel,
+ [](const T current, const T in) -> T { return in * current; });
break;
case ReduceType::kMax:
- return evalLogic<T>(
- input, output, axes, keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel,
+ return std::bind(
+ &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel,
[](const T current, const T in) -> T { return (in > current) ? in : current; });
break;
case ReduceType::kMin:
- return evalLogic<T>(
- input, output, axes, keep_dims, std::numeric_limits<T>::max(), reduce_kernel,
+ return std::bind(
+ &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ keep_dims, std::numeric_limits<T>::max(), reduce_kernel,
[](const T current, const T in) -> T { return (in < current) ? in : current; });
break;
default:
// Template specialization for bool type
template <>
-void evalType<bool>(const IPortableTensor *input, IPortableTensor *output,
- const std::vector<int> &axes, bool keep_dims, nnfw::cker::Reduce &reduce_kernel,
- ReduceType reduce_type)
+std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)>
+evalType<bool>(bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
{
switch (reduce_type)
{
case ReduceType::kAny:
- return evalLogic<bool>(
- input, output, axes, keep_dims, false, reduce_kernel,
- [](const bool current, const bool in) -> bool { return in || current; });
+ return std::bind(&evalLogic<bool>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, keep_dims, false, reduce_kernel,
+ [](const bool current, const bool in) -> bool { return in || current; });
break;
case ReduceType::kAll:
- return evalLogic<bool>(
- input, output, axes, keep_dims, true, reduce_kernel,
- [](const bool current, const bool in) -> bool { return in && current; });
+ return std::bind(&evalLogic<bool>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, keep_dims, true, reduce_kernel,
+ [](const bool current, const bool in) -> bool { return in && current; });
break;
default:
throw std::runtime_error{"Reduce: Unsupported reduce type"};
}
}
-template <ReduceType reduce_type>
-void evalGeneric(const IPortableTensor *input, IPortableTensor *output,
- const std::vector<int> &axes, bool keep_dims, nnfw::cker::Reduce &reduce_kernel)
+std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)>
+generateKernelGeneric(const IPortableTensor *input, bool keep_dims,
+ nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
{
switch (input->data_type())
{
case OperandType::FLOAT32:
- return evalType<float>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+ return evalType<float>(keep_dims, reduce_kernel, reduce_type);
case OperandType::INT32:
- return evalType<int32_t>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+ return evalType<int32_t>(keep_dims, reduce_kernel, reduce_type);
case OperandType::BOOL8:
- return evalType<bool>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+ return evalType<bool>(keep_dims, reduce_kernel, reduce_type);
default:
throw std::runtime_error{"Reduce(generic): unsupported data type"};
}
}
+// TODO Refine this function
void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
const std::vector<int> &axes, bool keep_dims,
nnfw::cker::Reduce &reduce_kernel)
return;
}
- evalGeneric<ReduceType::kSum>(input, output, axes, keep_dims, reduce_kernel);
+ const auto kernel = generateKernelGeneric(input, keep_dims, reduce_kernel, ReduceType::kSum);
+ kernel(input, output, axes);
}
} // namespace
ReduceLayer::ReduceLayer()
- : _input(nullptr), _axes(nullptr), _output(nullptr), _reduceType(ReduceType::kAny),
- _keep_dims(false), _reduce_kernel(new nnfw::cker::Reduce())
+ : _input(nullptr), _axes(nullptr), _output(nullptr), _reduce_kernel(new nnfw::cker::Reduce()),
+ _kernel()
{
// DO NOTHING
}
_input = input;
_axes = axes;
_output = output;
- _reduceType = reduceType;
- _keep_dims = keep_dims;
-}
-void ReduceLayer::run()
-{
- const auto axes = getReducerAxes(_axes);
- switch (_reduceType)
+ switch (reduceType)
{
case ReduceType::kSum:
if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
- evalSumQuantized(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = std::bind(&evalSumQuantized, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, keep_dims, *_reduce_kernel);
return;
}
- evalGeneric<ReduceType::kSum>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kSum);
break;
case ReduceType::kProd:
- evalGeneric<ReduceType::kProd>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kProd);
break;
case ReduceType::kMax:
- evalGeneric<ReduceType::kMax>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kMax);
break;
case ReduceType::kMin:
- evalGeneric<ReduceType::kMin>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kMin);
break;
case ReduceType::kAny:
- evalGeneric<ReduceType::kAny>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kAny);
break;
case ReduceType::kAll:
- evalGeneric<ReduceType::kAll>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kAll);
break;
default:
throw std::runtime_error{"ReduceSum: Unsupported reduce type"};
}
}
+void ReduceLayer::run()
+{
+ const auto axes = getReducerAxes(_axes);
+ _kernel(_input, _output, axes);
+}
+
} // namespace ops
} // namespace cpu
} // namespace backend
const IPortableTensor *_input;
const IPortableTensor *_axes;
IPortableTensor *_output;
- ReduceType _reduceType;
- bool _keep_dims;
std::unique_ptr<nnfw::cker::Reduce> _reduce_kernel;
+ std::function<void(const IPortableTensor *input, IPortableTensor *output,
+ const std::vector<int> &axes)>
+ _kernel;
};
} // namespace ops
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class RoundLayer : public ::onert::exec::IFunction
-{
-public:
- RoundLayer();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- void roundFloat32();
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "RsqrtLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-RsqrtLayer::RsqrtLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void RsqrtLayer::rsqrtFloat32()
-{
- nnfw::cker::Rsqrt(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void RsqrtLayer::rsqrtQuant8() { throw std::runtime_error{"NYI : QASYMM8 not supported"}; }
-
-void RsqrtLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void RsqrtLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- rsqrtFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- rsqrtQuant8();
- }
- else
- {
- throw std::runtime_error{"Rsqrt: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class RsqrtLayer : public ::onert::exec::IFunction
-{
-public:
- RsqrtLayer();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- void rsqrtFloat32();
- void rsqrtQuant8();
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SinLayer.h"
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-SinLayer::SinLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void SinLayer::sinFloat32()
-{
- nnfw::cker::Sin(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void SinLayer::sinQuant8() { throw std::runtime_error{"NYI"}; }
-
-void SinLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void SinLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- sinFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- sinQuant8();
- }
- else
- {
- throw std::runtime_error{"Sin: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_SINLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_SINLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class SinLayer : public ::onert::exec::IFunction
-{
-public:
- SinLayer();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- void sinFloat32();
- void sinQuant8();
-
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_SINLAYER_H__
// DO NOTHING
}
-// Performs softmax along the input of size (input_size * batch_size).
-void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
- float *out)
+void SoftMaxLayer::softmaxFloat32()
{
- assert(input_size > 0);
-
- // For each batch
- for (int b = 0; b < batch_size; b++)
+ if (getNumberOfDimensions(_input) == 1)
{
- // Find the max coeff.
- float max_coeff = in[0];
- for (int i = 1; i < input_size; i++)
- {
- if (in[i] > max_coeff)
- max_coeff = in[i];
- }
-
- // Compute the normalized sum of exps.
- float exp_sum = 0.0;
- for (int i = 0; i < input_size; i++)
- {
- out[i] = std::exp((in[i] - max_coeff) * beta);
- exp_sum += out[i];
- }
-
- // Divide by the sum of exps.
- float reciprocal_sum_exp = 1.f / exp_sum;
- for (int i = 0; i < input_size; i++)
- {
- out[i] *= reciprocal_sum_exp;
- }
-
- // Advance in and out pointers for the next batch.
- in += input_size;
- out += input_size;
+ uint32_t input_size = getNumberOfElements(_input);
+ nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, 1, _beta,
+ reinterpret_cast<float *>(_output->buffer()));
}
-}
-
-void SoftMaxLayer::softmaxFloat32()
-{
- if (getNumberOfDimensions(_input) == 2)
+ else if (getNumberOfDimensions(_input) == 2)
{
uint32_t batch_size = getSizeOfDimension(_input, 0);
if (batch_size == 0)
throw std::runtime_error("batch_size should not be 0");
uint32_t input_size = getNumberOfElements(_input) / batch_size;
- Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size, _beta,
- reinterpret_cast<float *>(_output->buffer()));
+ nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size,
+ _beta, reinterpret_cast<float *>(_output->buffer()));
}
else if (getNumberOfDimensions(_input) == 4)
{
}
else
{
- throw std::runtime_error{"only 2D and 4D tensors supported"};
+ throw std::runtime_error{"only 1D, 2D and 4D tensors supported"};
}
}
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SubLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void SubLayer::subFloat32()
-{
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.float_activation_max = output_activation_max;
- op_params.float_activation_min = output_activation_min;
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void SubLayer::subInt32()
-{
- int32_t output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.quantized_activation_max = output_activation_max;
- op_params.quantized_activation_min = output_activation_min;
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
-}
-
-void SubLayer::subQuant8()
-{
- int32_t output_activation_min, output_activation_max;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.quantized_activation_max = output_activation_max;
- op_params.quantized_activation_min = output_activation_min;
- // Parameters for scaled quantized computation
- op_params.left_shift = 20;
- // Zero-points of input and output tensors
- op_params.input1_offset = -_lhs->data_offset();
- op_params.input2_offset = -_rhs->data_offset();
- op_params.output_offset = _output->data_offset();
- assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
- assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
- assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
-
- // Compute normalized scale for _lhs and _rhs values,
- // and represent in 32-bit fixed point
- const double norm_max_scale = 2 * std::max(_lhs->data_scale(), _rhs->data_scale());
- const double real_lhs_scale = _lhs->data_scale() / norm_max_scale;
- const double real_rhs_scale = _rhs->data_scale() / norm_max_scale;
- // output scale is used to normalize final result, so we invert the scale here
- const double real_output_scale =
- norm_max_scale / (_output->data_scale() * (1 << op_params.left_shift));
-
- // Represent the scales as fixed int32_t multipliers, and int32_t shifts
- QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
- QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift);
- op_params.input2_multiplier *= -1;
- QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift);
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void SubLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output)
-{
- _lhs = lhs;
- _rhs = rhs;
- _activation = activation;
- _output = output;
-}
-
-void SubLayer::run()
-{
- if (_output->data_type() == OperandType::FLOAT32)
- {
- subFloat32();
- }
- else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- subQuant8();
- }
- else if (_output->data_type() == OperandType::INT32)
- {
- subInt32();
- }
- else
- {
- throw std::runtime_error{"Sub: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class SubLayer : public ::onert::exec::IFunction
-{
-public:
- SubLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
- {
- // DO NOTHING
- }
-
-public:
- void subFloat32();
-
- void subQuant8();
-
- void subInt32();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_lhs;
- const IPortableTensor *_rhs;
- IPortableTensor *_output;
-
- ir::Activation _activation{ir::Activation::NONE};
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TanhLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Tanh.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-TanhLayer::TanhLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void TanhLayer::PopulateLookupTable()
-{
- const auto input_scale = static_cast<double>(_input->data_scale());
- const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
- const auto output_scale = static_cast<double>(_output->data_scale());
- const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
- const float inverse_scale = 1 / output_scale;
- int32_t maxval = std::numeric_limits<uint8_t>::max();
- int32_t minval = std::numeric_limits<uint8_t>::min();
- for (int32_t val = minval; val <= maxval; ++val)
- {
- const float dequantized = input_scale * (val - input_zero_point);
- const float transformed = std::tanh(dequantized);
- const float rescaled = std::round(transformed * inverse_scale);
- const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
- _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval));
- }
-}
-
-void TanhLayer::tanhFloat32()
-{
- nnfw::cker::Tanh(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void TanhLayer::tanhQuant8()
-{
- const int size = MatchingFlatSize(getTensorShape(_input), getTensorShape(_output));
- const uint8_t *input_data = reinterpret_cast<const uint8_t *>(_input->buffer());
- uint8_t *output_data = reinterpret_cast<uint8_t *>(_output->buffer());
-
- for (int i = 0; i < size; ++i)
- {
- output_data[i] = _table[input_data[i]];
- }
-}
-
-void TanhLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
- if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- PopulateLookupTable();
- }
-}
-
-void TanhLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- tanhFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- tanhQuant8();
- }
- else
- {
- throw std::runtime_error{"Tanh: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ZerosLikeLayer.h"
-
-#include "OperationUtils.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-ZerosLikeLayer::ZerosLikeLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void ZerosLikeLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void ZerosLikeLayer::run()
-{
- if (!HaveSameShapes(_input, _output))
- throw std::runtime_error{"ZerosLike: input and output shape don't match."};
-
- auto element_size = getTensorShape(_input).FlatSize();
-
- switch (_input->data_type())
- {
- case OperandType::FLOAT32:
- memset(reinterpret_cast<float *>(_output->buffer()), 0, element_size * sizeof(float));
- break;
- case OperandType::INT32:
- memset(reinterpret_cast<int32_t *>(_output->buffer()), 0, element_size * sizeof(int32_t));
- break;
- default:
- throw std::runtime_error{"ZerosLike: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
class IConstantInitializer;
class IKernelGenerator;
class ITensorRegister;
+struct ITensorRegistry;
struct ITensorBuilder;
struct IOptimizer;
public:
BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
std::shared_ptr<ITensorRegister> tensor_register = nullptr,
std::shared_ptr<IOptimizer> optimizer = nullptr)
- : _backend{backend}, _graph{graph}, tensor_builder{tensor_builder},
- constant_initializer{constant_initializer}, kernel_gen{kernel_gen},
- tensor_register{tensor_register}, optimizer{optimizer}
+ : _backend{backend}, _graph{graph}, tensor_registry{tensor_registry},
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+ kernel_gen{kernel_gen}, tensor_register{tensor_register}, optimizer{optimizer}
{
}
std::vector<ir::OperandIndex> _operand_list;
public:
+ std::shared_ptr<ITensorRegistry> tensor_registry;
std::shared_ptr<ITensorBuilder> tensor_builder;
std::shared_ptr<IConstantInitializer> constant_initializer;
std::shared_ptr<IKernelGenerator> kernel_gen;
public:
void run()
{
- assert(tensor_builder().get());
+ assert(tensor_registry());
for (const auto &it : _init_map)
{
const auto &ind = it.first;
const auto &fn = it.second;
const auto &model_obj = _operands.at(ind);
- auto tensor_obj = tensor_builder()->tensorAt(ind);
+ auto tensor_obj = tensor_registry()->getNativeITensor(ind);
assert(tensor_obj != nullptr);
fn(model_obj, *tensor_obj);
VERBOSE(FillOperandData) << "Fill data for operand " << ind.value() << std::endl;
void setLayout(ir::Layout layout) { _current_op_seq_layout = layout; }
protected:
- using OperationVisitor::visit;
-
-protected:
- virtual std::shared_ptr<ITensorBuilder> tensor_builder() const = 0;
+ virtual std::shared_ptr<ITensorRegistry> tensor_registry() const = 0;
public:
virtual void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj)
virtual ~ITensorBuilder(void) = default;
/**
- * @brief Returns true if this TensorBuilder support dynamic tensor
- */
- virtual bool supportDynamicTensor() = 0;
-
- /**
* @brief Register tensor information to allocate on backend
*
* @param ind Index
*/
virtual bool isRegistered(const ir::OperandIndex &) const = 0;
- /**
- * @brief Get tensor registry
- *
- * @return std::shared_ptr<backend::ITensorRegistry> tensor registry object
- *
- * @note Backend should implement this when it has StaticTensorManager and DynamicTensorManager
- */
- virtual std::shared_ptr<backend::ITensorRegistry> tensorRegistry() = 0;
-
public: // methods for static tensor allocation
/**
* @brief Let the tensor builder know first use(start of lifetime) of a tensor
virtual void postFunctionPrepare() = 0;
/**
- * @brief Get the tensor object
- *
- * @param ind Index of the tensor
- * @return std::shared_ptr<ITensor> The tensor object
- */
- virtual std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) = 0;
-
- /**
- * @brief Set the migrant tensor object
- *
- * @return true if succeeded
- * @return false if failed or unsupported
- */
- virtual bool setMigrantTensor(const ir::OperandIndex &, const std::shared_ptr<IPortableTensor> &)
- {
- return false;
- }
-
- /**
- * @brief Iterate over tensors
- *
- * @param fn The function to be run
- */
- virtual void iterate(const IterateFunction &fn) = 0;
-
- /**
* @brief Release static @c ITensorManger object which was built
* Before calling this, @c allocate must have been called
*
* @note Since it is a pointer, its life time is from the cration of TensorBuilder
* to the end of execution
*/
- virtual IDynamicTensorManager *dynamicTensorManager(void)
- {
- throw std::runtime_error("dynamicTensorManager(): NYI");
- }
+ virtual IDynamicTensorManager *dynamicTensorManager(void) { return nullptr; }
/**
* @brief Release dynamic @c ITensorManger object which was built
*
* @return std::unique_ptr<ITensorManager> Tensor Manager object
*/
- virtual std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void)
- {
- throw std::runtime_error("releaseDynamicTensorManager() for this backend is not supported");
- }
+ virtual std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) { return nullptr; }
};
} // namespace backend
#include "ir/Index.h"
#include "backend/ITensor.h"
+#include "backend/IPortableTensor.h"
namespace onert
{
* @note Returned tensor cannot be used longer than dynamic tensor manager
*/
virtual std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &) = 0;
+ /**
+ * @brief Set the Migrant Tensor which are from other backends
+ *
+ * @return true if supported
+ * @return false if not supported
+ */
+ virtual bool setMigrantTensor(const ir::OperandIndex &, const std::shared_ptr<IPortableTensor> &)
+ {
+ return false;
+ }
};
} // namespace backend
} // namespace onert
#include "ir/OperandIndexMap.h"
-#include "backend/IPortableTensor.h"
namespace onert
{
return nullptr;
}
- bool setMigrantTensor(const ir::OperandIndex &ind, const std::shared_ptr<IPortableTensor> &tensor)
+ bool setMigrantTensor(const ir::OperandIndex &ind,
+ const std::shared_ptr<IPortableTensor> &tensor) override
{
- // TODO Uncomment this as two tensors for an index is not allowed.
- // But now it is temporarily allowed as a workaround. External one hides Managed one.
- // auto itr = _native.find(ind);
- // if (itr != _native.end() && itr->second != nullptr && tensor != nullptr)
- // throw std::runtime_error{
- // "Tried to set an migrant tensor but an native tensor already exists."};
+ assert(tensor != nullptr);
+ auto itr = _native.find(ind);
+ if (itr != _native.end())
+ throw std::runtime_error{"Tried to set a migrant tensor but a native tensor already exists."};
_migrant[ind] = tensor;
return true;
}
void setNativeTensor(const ir::OperandIndex &ind, const std::shared_ptr<T_Tensor> &tensor)
{
+ assert(tensor != nullptr);
auto itr = _migrant.find(ind);
- if (itr != _migrant.end() && itr->second != nullptr && tensor != nullptr)
- throw std::runtime_error{
- "Tried to set a native tensor but an migrant tensor already exists."};
+ if (itr != _migrant.end())
+ throw std::runtime_error{"Tried to set a native tensor but a migrant tensor already exists."};
_native[ind] = tensor;
}
#include "MemoryManager.h"
#include "backend/IStaticTensorManager.h"
+#include "backend/IDynamicTensorManager.h"
#include "ir/OperandIndexMap.h"
#include "ir/OperandInfo.h"
#include "TensorRegistry.h"
class StaticTensorManager : public backend::IStaticTensorManager
{
public:
- StaticTensorManager(const std::shared_ptr<TensorRegistry> ®);
+ StaticTensorManager(const std::shared_ptr<TensorRegistry> ®,
+ IDynamicTensorManager *dynamic_tensor_manager);
virtual ~StaticTensorManager() = default;
void allocateConsts(void);
std::unique_ptr<MemoryManager> _nonconst_mgr;
const std::shared_ptr<TensorRegistry> _tensors;
ir::OperandIndexMap<bool> _as_constants;
+ IDynamicTensorManager *_dynamic_tensor_manager;
};
} // namespace cpu_common
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_LOWERED_GRAPH_H__
+#define __ONERT_IR_LOWERED_GRAPH_H__
+
+#include "ir/Graph.h"
+#include "ir/LowerInfoMap.h"
+#include "ir/OpSequences.h"
+#include "compiler/BackendResolver.h"
+#include "compiler/Compiler.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+/**
+ * @brief Class that contains lowering information on graph.
+ * In addition, after lowering, operands in graph will be set to "dynamic"
+ * if the shape of output of an operation cannot be decided at compilation time.
+ */
+class LoweredGraph
+{
+public:
+ LoweredGraph(const ir::Graph &graph, const compiler::CompilerOptions &options);
+
+ ir::Graph &graph() { return _graph; }
+ const ir::Graph &graph() const { return _graph; }
+ const ir::LowerInfoMap *getLowerInfo() const { return &_lower_info_map; }
+ const ir::operation::LowerInfo *getLowerInfo(const ir::OpSequenceIndex &op_seq_index) const;
+ void setLowerInfo(const ir::OpSequenceIndex &op_seq_index,
+ std::unique_ptr<ir::operation::LowerInfo> &&lower_info);
+ void removeLowerInfo(const ir::OpSequenceIndex &op_seq_index);
+ const ir::operand::LowerInfo *getLowerInfo(const ir::OperandIndex &index) const;
+ ir::operand::LowerInfo *getLowerInfo(const ir::OperandIndex &index);
+ void setLowerInfo(const ir::OperandIndex &index,
+ std::unique_ptr<ir::operand::LowerInfo> &&lower_info);
+ void removeLowerInfo(const ir::OperandIndex &index);
+ ir::OpSequences &op_seqs() { return _op_seqs; }
+ const ir::OpSequences &op_seqs() const { return _op_seqs; }
+ void iterateTopolOpSeqs(
+ const std::function<void(const ir::OpSequenceIndex &, const ir::OpSequence &)> &fn) const;
+ void
+ iterateTopolOpSeqs(const std::function<void(const ir::OpSequenceIndex &, ir::OpSequence &)> &fn);
+ const backend::BackendContexts &backend_contexts() { return _backend_contexts; }
+ const backend::BackendContexts &backend_contexts() const { return _backend_contexts; }
+ std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks() { return _indexed_ranks; }
+
+private:
+ void
+ makeOpSequences(ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
+ const compiler::CompilerOptions &options,
+ const compiler::BackendResolver &backend_resolver);
+
+ void manipulateLowerInfo(
+ ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
+ bool is_primary);
+ void dumpLowerInfo();
+ bool mergeable(const ir::OpSequenceIndex &op_seq_index, const ir::OperationIndex &node_index,
+ ir::Layout layout, const compiler::BackendResolver &backend_resolver);
+ ir::OpSequenceIndex appendFreshSingleOpSequence(const ir::OperationIndex &node_index,
+ const ir::Operation &node);
+
+private:
+ ir::Graph _graph;
+ backend::BackendContexts _backend_contexts;
+ std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
+ ir::LowerInfoMap _lower_info_map;
+ // Pass(for Perm) can accept only graph so that Graph has OpSequences as a member
+ ir::OpSequences _op_seqs;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_IR_LOWERED_GRAPH_H__
#include "ir/OperationVisitor.h"
#include "ir/OpSequence.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
#include "ir/Index.h"
#include <memory>
public:
StaticShapeInferer(
const ir::SubgraphIndex &subg_idx,
- const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<ir::LoweredGraph>> &lowered_subgs)
+ const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
+ &lowered_subgs)
: _lowered_subgs(lowered_subgs), _operands(lowered_subgs.at(subg_idx)->graph().operands()),
_operations(lowered_subgs.at(subg_idx)->graph().operations()),
_return_has_dynamic_tensor(false)
* @param op_seq sequence of operations
* @return @c true if op_seq's input or output has any dynamic tensor; @c false otherwise.
*/
- bool infer(const ir::OpSequence &op_seq)
- {
- bool has_dynamic_tensor = false;
-
- _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit()
-
- for (const auto &operation_idx : op_seq.operations())
- {
- _operations.at(operation_idx).accept(*this);
-
- has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor;
- }
-
- return has_dynamic_tensor;
- }
+ bool infer(const ir::OpSequence &op_seq);
void dump();
private:
+ bool checkDynamicInput(const ir::Operation &op);
+ void setDynamicOutput(const ir::Operation &op);
+
+private:
// TODO Define visitors for operations. List them in alphabetic order.
- void visit(const ir::operation::Abs &op) override;
- void visit(const ir::operation::Add &op) override;
void visit(const ir::operation::ArgMax &op) override;
void visit(const ir::operation::BatchMatMul &op) override;
+ void visit(const ir::operation::BinaryArithmetic &op) override;
void visit(const ir::operation::BroadcastTo &op) override;
- void visit(const ir::operation::Cast &op) override;
void visit(const ir::operation::Comparison &op) override;
void visit(const ir::operation::Concat &op) override;
void visit(const ir::operation::Conv2D &op) override;
- void visit(const ir::operation::Cos &op) override;
- void visit(const ir::operation::Div &op) override;
- void visit(const ir::operation::Exp &op) override;
+ void visit(const ir::operation::ElementwiseActivation &op) override;
+ void visit(const ir::operation::ElementwiseBinary &op) override;
+ void visit(const ir::operation::ElementwiseUnary &op) override;
void visit(const ir::operation::ExpandDims &op) override;
void visit(const ir::operation::Fill &op) override;
void visit(const ir::operation::FullyConnected &op) override;
void visit(const ir::operation::FusedBatchNorm &op) override;
void visit(const ir::operation::Gather &op) override;
void visit(const ir::operation::If &op) override;
- void visit(const ir::operation::Log &op) override;
- void visit(const ir::operation::LogicalNot &op) override;
- void visit(const ir::operation::LogicalOr &op) override;
- void visit(const ir::operation::Logistic &op) override;
void visit(const ir::operation::L2Normalization &op) override;
void visit(const ir::operation::MatrixBandPart &op) override;
- void visit(const ir::operation::Max &op) override;
- void visit(const ir::operation::Min &op) override;
- void visit(const ir::operation::Mul &op) override;
- void visit(const ir::operation::Neg &op) override;
void visit(const ir::operation::OneHot &op) override;
void visit(const ir::operation::Pack &op) override;
void visit(const ir::operation::Pad &op) override;
void visit(const ir::operation::Range &op) override;
void visit(const ir::operation::Reduce &op) override;
void visit(const ir::operation::Reshape &op) override;
- void visit(const ir::operation::Round &op) override;
- void visit(const ir::operation::RSQRT &op) override;
void visit(const ir::operation::ResizeBilinear &op) override;
void visit(const ir::operation::Reverse &op) override;
void visit(const ir::operation::Select &op) override;
void visit(const ir::operation::Shape &op) override;
- void visit(const ir::operation::Sin &op) override;
void visit(const ir::operation::Slice &op) override;
void visit(const ir::operation::Softmax &op) override;
void visit(const ir::operation::SpaceToBatchND &op) override;
void visit(const ir::operation::Split &op) override;
void visit(const ir::operation::Squeeze &op) override;
void visit(const ir::operation::StridedSlice &op) override;
- void visit(const ir::operation::Sub &op) override;
void visit(const ir::operation::SquaredDifference &op) override;
- void visit(const ir::operation::Tanh &op) override;
void visit(const ir::operation::Tile &op) override;
void visit(const ir::operation::Transpose &op) override;
void visit(const ir::operation::Unpack &op) override;
void visit(const ir::operation::While &op) override;
- void visit(const ir::operation::ZerosLike &op) override;
private:
/**
void handleSimpleUnaryOp(const ir::Operation &op, const ir::OperandIndex input_idx);
private:
- const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<ir::LoweredGraph>> &_lowered_subgs;
+ const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
+ &_lowered_subgs;
// _operands and _operations can be changed by controlflow operation
ir::Operands &_operands; // operands of current subgraph
ir::Operations &_operations; // operations of current subgraph
class DynamicShapeInferer : public ir::OperationVisitor
{
public:
- DynamicShapeInferer(const ir::Operands &operands, backend::IDynamicTensorManager *tensor_manager,
+ DynamicShapeInferer(const ir::Operands &operands,
const std::shared_ptr<backend::ITensorRegistry> &tensor_registry)
- : _operands(operands), _dynamic_tensor_manager(tensor_manager),
- _tensor_registry(tensor_registry)
+ : _operands(operands), _tensor_registry(tensor_registry)
{
UNUSED_RELEASE(_operands);
- UNUSED_RELEASE(_dynamic_tensor_manager);
UNUSED_RELEASE(_tensor_registry);
}
public:
// TODO Define visitors for operations. List them in alphabetic order.
// Remove TODO when any op starting from the alphabet is added
- void visit(const ir::operation::Abs &op) override;
- void visit(const ir::operation::Add &op) override;
void visit(const ir::operation::ArgMax &op) override;
void visit(const ir::operation::BatchMatMul &op) override;
+ void visit(const ir::operation::BinaryArithmetic &op) override;
void visit(const ir::operation::BroadcastTo &op) override;
- void visit(const ir::operation::Cast &op) override;
void visit(const ir::operation::Comparison &op) override;
void visit(const ir::operation::Concat &op) override;
void visit(const ir::operation::Conv2D &op) override;
- void visit(const ir::operation::Cos &op) override;
- void visit(const ir::operation::Div &op) override;
- void visit(const ir::operation::Exp &op) override;
+ void visit(const ir::operation::ElementwiseActivation &op) override;
+ void visit(const ir::operation::ElementwiseBinary &op) override;
+ void visit(const ir::operation::ElementwiseUnary &op) override;
void visit(const ir::operation::ExpandDims &op) override;
void visit(const ir::operation::Fill &op) override;
void visit(const ir::operation::FullyConnected &op) override;
void visit(const ir::operation::FusedBatchNorm &op) override;
void visit(const ir::operation::Gather &op) override;
- void visit(const ir::operation::Log &op) override;
- void visit(const ir::operation::LogicalNot &op) override;
- void visit(const ir::operation::LogicalOr &op) override;
- void visit(const ir::operation::Logistic &op) override;
void visit(const ir::operation::L2Normalization &op) override;
void visit(const ir::operation::MatrixBandPart &op) override;
- void visit(const ir::operation::Max &op) override;
- void visit(const ir::operation::Min &op) override;
- void visit(const ir::operation::Mul &op) override;
- void visit(const ir::operation::Neg &op) override;
void visit(const ir::operation::OneHot &op) override;
void visit(const ir::operation::Pack &op) override;
void visit(const ir::operation::Pad &op) override;
void visit(const ir::operation::Range &op) override;
void visit(const ir::operation::Reduce &op) override;
void visit(const ir::operation::Reshape &op) override;
- void visit(const ir::operation::Round &op) override;
- void visit(const ir::operation::RSQRT &op) override;
void visit(const ir::operation::ResizeBilinear &op) override;
void visit(const ir::operation::Reverse &op) override;
void visit(const ir::operation::Select &op) override;
void visit(const ir::operation::Shape &op) override;
- void visit(const ir::operation::Sin &op) override;
void visit(const ir::operation::Slice &op) override;
void visit(const ir::operation::Softmax &op) override;
void visit(const ir::operation::SpaceToBatchND &op) override;
void visit(const ir::operation::Split &op) override;
void visit(const ir::operation::Squeeze &op) override;
void visit(const ir::operation::StridedSlice &op) override;
- void visit(const ir::operation::Sub &op) override;
void visit(const ir::operation::SquaredDifference &op) override;
- void visit(const ir::operation::Tanh &op) override;
void visit(const ir::operation::Tile &op) override;
void visit(const ir::operation::Transpose &op) override;
void visit(const ir::operation::Unpack &op) override;
// TODO write op starting from V
- void visit(const ir::operation::ZerosLike &op) override;
private:
/**
*/
const ir::Operands &_operands;
/**
- * @brief To allocate memory for output tensor if needed
- */
- // TODO Remove this, as it is no longer used
- backend::IDynamicTensorManager *_dynamic_tensor_manager;
- /**
* @brief To get tensor object and access tensor-level info, e.g., ITensor::buffer()
*/
std::shared_ptr<backend::ITensorRegistry> _tensor_registry;
{
/// @brief index of input tensor whose memory needs to be allocated at execution time
ir::OperandIndex ind;
- /// @brief dynamic tensor manager that can allocate memory when input tensor is dynamic
- backend::IDynamicTensorManager *dyn_tensor_manager;
};
using DynAllocInfoMap = std::unordered_map<std::shared_ptr<backend::ITensor>, DynAllocInfo>;
{
std::vector<std::unique_ptr<InputDesc>> inputs;
std::vector<std::unique_ptr<OutputDesc>> outputs;
- // Contains shape of input set by set_input_tensorinfo
- std::unordered_map<ir::IOIndex, ir::Shape> input_shape_signature;
+ // Contains shape of input set by nnfw_set_input_tensorinfo(..)
+ std::unordered_map<ir::IOIndex, ir::Shape> dynamic_input_shapes;
};
} // namespace exec
OperandIndex addOperand(const Shape &shape, const TypeInfo &type);
OperationIndex addOperation(std::unique_ptr<Operation> &&node);
void setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data);
- void addInput(const OperandIndex &ind);
- void addOutput(const OperandIndex &ind);
+ void addInput(const OperandIndex &ind, const std::string &name = "");
+ void addOutput(const OperandIndex &ind, const std::string &name = "");
void finishBuilding(void);
void removeOperand(const OperandIndex &ind) { _operands.remove(ind); }
bool isBuildingPhase(void) const { return _phase == Phase::BUILDING; }
OperandIndexSequence &getInputs() { return _inputs; }
const OperandIndexSequence &getOutputs() const { return _outputs; }
OperandIndexSequence &getOutputs() { return _outputs; }
+ IOIndex getInputIndex(const std::string &name) const;
+ IOIndex getOutputIndex(const std::string &name) const;
const Operands &operands() const { return _operands; }
Operands &operands() { return _operands; } // TODO Remove this non-const accessor
const Operations &operations() const { return _operations; }
Operands _operands;
OperandIndexSequence _inputs;
OperandIndexSequence _outputs;
+ std::unordered_map<std::string, IOIndex> _name_to_input;
+ std::unordered_map<std::string, IOIndex> _name_to_output;
// Child subgraphs
std::shared_ptr<Subgraphs> _subgraphs;
// TFLite and circle's default layout is NHWC;
uint32_t horizontal;
};
+struct Dilation
+{
+ uint32_t width_factor;
+ uint32_t height_factor;
+};
+
} // namespace ir
} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_LOWERED_GRAPH_H__
-#define __ONERT_IR_LOWERED_GRAPH_H__
-
-#include "ir/Graph.h"
-#include "ir/LowerInfoMap.h"
-#include "ir/OpSequences.h"
-#include "compiler/BackendResolver.h"
-#include "compiler/Compiler.h"
-
-namespace onert
-{
-namespace ir
-{
-
-/**
- * @brief Class that contains lowering information on graph.
- * In addition, after lowering, operands in graph will be set to "dynamic"
- * if the shape of output of an operation cannot be decided at compilation time.
- */
-class LoweredGraph
-{
-public:
- LoweredGraph(const Graph &graph, const compiler::CompilerOptions &options);
-
- Graph &graph() { return _graph; }
- const Graph &graph() const { return _graph; }
- const LowerInfoMap *getLowerInfo() const { return &_lower_info_map; }
- const operation::LowerInfo *getLowerInfo(const OpSequenceIndex &op_seq_index) const;
- void setLowerInfo(const OpSequenceIndex &op_seq_index,
- std::unique_ptr<operation::LowerInfo> &&lower_info);
- void removeLowerInfo(const OpSequenceIndex &op_seq_index);
- const operand::LowerInfo *getLowerInfo(const OperandIndex &index) const;
- operand::LowerInfo *getLowerInfo(const OperandIndex &index);
- void setLowerInfo(const OperandIndex &index, std::unique_ptr<operand::LowerInfo> &&lower_info);
- void removeLowerInfo(const OperandIndex &index);
- OpSequences &op_seqs() { return _op_seqs; }
- const OpSequences &op_seqs() const { return _op_seqs; }
- void iterateTopolOpSeqs(
- const std::function<void(const OpSequenceIndex &, const OpSequence &)> &fn) const;
- void iterateTopolOpSeqs(const std::function<void(const OpSequenceIndex &, OpSequence &)> &fn);
- const backend::BackendContexts &backend_contexts() { return _backend_contexts; }
- const backend::BackendContexts &backend_contexts() const { return _backend_contexts; }
- std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks() { return _indexed_ranks; }
-
-private:
- void makeOpSequences(OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info,
- const compiler::CompilerOptions &options,
- const compiler::BackendResolver &backend_resolver);
-
- void
- manipulateLowerInfo(OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info,
- bool is_primary);
- void dumpLowerInfo();
- bool mergeable(const OpSequenceIndex &op_seq_index, const OperationIndex &node_index,
- Layout layout, const compiler::BackendResolver &backend_resolver);
- OpSequenceIndex appendFreshSingleOpSequence(const OperationIndex &node_index,
- const Operation &node);
-
-private:
- Graph _graph;
- backend::BackendContexts _backend_contexts;
- std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
- LowerInfoMap _lower_info_map;
- // Pass(for Perm) can accept only graph so that Graph has OpSequences as a member
- OpSequences _op_seqs;
-};
-
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_LOWERED_GRAPH_H__
*/
OpSequenceIndex getOperation(const OperationIndex &operation_index) const;
/**
- * @brief Dump OpSequences
- *
- * @param msg Message that will be displayed
- * @param graph Graph that has information used for dump
- */
- void dump(const std::string &msg, const Operations &operations) const;
- /**
* @brief Remove an operation from OpSequence
*
* @param operation_index Operation index to be removed
mutable std::unordered_map<OperationIndex, OpSequenceIndex> _seq_indexes;
};
+/**
+ * @brief Dump OpSequences
+ *
+ * @param op_seqs Operation Sequences
+ * @param operations Operation context
+ */
+void dumpOpSequences(const OpSequences &op_seqs, const Operations &operations);
+
} // namespace ir
} // namespace onert
// This file has no ifdef guard intentionally
#include "ir/operation/BatchToSpaceND.h"
+#include "ir/operation/BinaryArithmetic.h"
#include "ir/operation/BroadcastTo.h"
#include "ir/operation/Conv2D.h"
-#include "ir/operation/MaxPool2D.h"
-#include "ir/operation/AvgPool2D.h"
+#include "ir/operation/Pool2D.h"
#include "ir/operation/Concat.h"
#include "ir/operation/Reshape.h"
#include "ir/operation/Fill.h"
#include "ir/operation/Transpose.h"
#include "ir/operation/Permute.h"
#include "ir/operation/Reduce.h"
-#include "ir/operation/Add.h"
-#include "ir/operation/Sub.h"
#include "ir/operation/DepthwiseConv2D.h"
#include "ir/operation/Slice.h"
#include "ir/operation/StridedSlice.h"
-#include "ir/operation/Mul.h"
#include "ir/operation/Squeeze.h"
-#include "ir/operation/Tanh.h"
-#include "ir/operation/Log.h"
-#include "ir/operation/Logistic.h"
-#include "ir/operation/Cast.h"
-#include "ir/operation/Div.h"
-#include "ir/operation/Exp.h"
+#include "ir/operation/ElementwiseActivation.h"
+#include "ir/operation/ElementwiseBinary.h"
+#include "ir/operation/ElementwiseUnary.h"
#include "ir/operation/ExpandDims.h"
#include "ir/operation/Comparison.h"
-#include "ir/operation/LogicalAnd.h"
-#include "ir/operation/LogicalOr.h"
-#include "ir/operation/LogicalNot.h"
#include "ir/operation/LSTM.h"
-#include "ir/operation/RSQRT.h"
-#include "ir/operation/ReLU.h"
#include "ir/operation/ResizeBilinear.h"
-#include "ir/operation/ReLU1.h"
-#include "ir/operation/ReLU6.h"
+#include "ir/operation/ResizeNearestNeighbor.h"
#include "ir/operation/Reverse.h"
#include "ir/operation/RNN.h"
-#include "ir/operation/Round.h"
-#include "ir/operation/Floor.h"
#include "ir/operation/SpaceToBatchND.h"
#include "ir/operation/SpaceToDepth.h"
-#include "ir/operation/L2Pool2D.h"
#include "ir/operation/EmbeddingLookup.h"
#include "ir/operation/L2Normalization.h"
#include "ir/operation/HashtableLookup.h"
#include "ir/operation/InstanceNorm.h"
#include "ir/operation/PReLU.h"
#include "ir/operation/TransposeConv.h"
-#include "ir/operation/SQRT.h"
#include "ir/operation/SquaredDifference.h"
#include "ir/operation/TopKV2.h"
#include "ir/operation/Gather.h"
-#include "ir/operation/Neg.h"
-#include "ir/operation/Abs.h"
#include "ir/operation/ArgMax.h"
-#include "ir/operation/Dequantize.h"
#include "ir/operation/LocalResponseNormalization.h"
#include "ir/operation/DepthToSpace.h"
#include "ir/operation/Pack.h"
#include "ir/operation/SplitV.h"
#include "ir/operation/Unpack.h"
#include "ir/operation/Pad.h"
-#include "ir/operation/Min.h"
-#include "ir/operation/Max.h"
#include "ir/operation/Custom.h"
#include "ir/operation/Einsum.h"
#include "ir/operation/OneHot.h"
-#include "ir/operation/Cos.h"
-#include "ir/operation/Sin.h"
#include "ir/operation/Shape.h"
#include "ir/operation/ConvertFp32ToFp16.h"
#include "ir/operation/ConvertFp16ToFp32.h"
#include "ir/operation/If.h"
#include "ir/operation/While.h"
#include "ir/operation/Pow.h"
-#include "ir/operation/ZerosLike.h"
#include "ir/operation/Tile.h"
#include "ir/operation/Range.h"
+#include "ir/operation/Rank.h"
#include "ir/operation/BCQFullyConnected.h"
#include "ir/operation/BCQGather.h"
#include "ir/operation/MatrixBandPart.h"
#include "ir/operation/BatchMatMul.h"
#include "ir/operation/FusedBatchNorm.h"
#include "ir/operation/LogSoftmax.h"
-#include "ir/operation/Quantize.h"
#include "ir/operation/StatelessRandomUniform.h"
#endif
// Internal Name
-OP(Add)
-OP(Sub)
OP(BatchToSpaceND)
+OP(BinaryArithmetic)
OP(BroadcastTo)
-OP(Cast)
OP(Conv2D)
OP(DepthwiseConv2D)
-OP(AvgPool2D)
-OP(MaxPool2D)
+OP(Pool2D)
OP(Concat)
OP(Fill)
OP(FullyConnected)
OP(Reduce)
OP(Reshape)
-OP(Mul)
OP(Softmax)
OP(Squeeze)
OP(Slice)
OP(StridedSlice)
-OP(Tanh)
-OP(Logistic)
-OP(Div)
OP(Transpose)
-OP(Exp)
+OP(ElementwiseActivation)
+OP(ElementwiseBinary)
+OP(ElementwiseUnary)
OP(ExpandDims)
OP(Comparison)
-OP(LogicalAnd)
-OP(LogicalOr)
-OP(LogicalNot)
OP(LSTM)
-OP(RSQRT)
-OP(ReLU)
OP(ResizeBilinear)
-OP(ReLU1)
-OP(ReLU6)
+OP(ResizeNearestNeighbor)
OP(Reverse)
OP(RNN)
-OP(Round)
-OP(Floor)
OP(SpaceToBatchND)
OP(SpaceToDepth)
-OP(L2Pool2D)
OP(EmbeddingLookup)
OP(L2Normalization)
OP(HashtableLookup)
OP(InstanceNorm)
OP(PReLU)
OP(TransposeConv)
-OP(SQRT)
OP(SquaredDifference)
OP(TopKV2)
OP(Gather)
-OP(Neg)
-OP(Abs)
OP(ArgMax)
-OP(Dequantize)
OP(Einsum)
OP(LocalResponseNormalization)
OP(DepthToSpace)
OP(Pad)
OP(Custom)
OP(Permute)
-OP(Min)
-OP(Max)
OP(OneHot)
-OP(Cos)
-OP(Sin)
OP(Shape)
OP(ConvertFp32ToFp16)
OP(ConvertFp16ToFp32)
OP(If)
OP(While)
-OP(Log)
OP(Pow)
-OP(ZerosLike)
OP(Tile)
OP(Range)
+OP(Rank)
OP(BCQFullyConnected)
OP(BCQGather)
OP(MatrixBandPart)
OP(BatchMatMul)
OP(FusedBatchNorm)
OP(LogSoftmax)
-OP(Quantize)
OP(StatelessRandomUniform)
// TODO Change to Padding struct's method
const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShape &ifm_shape,
const FeatureShape &ofm_shape, const Stride &stride,
- uint32_t kw, uint32_t kh);
+ uint32_t kw, uint32_t kh, uint32_t dwf = 1,
+ uint32_t dhf = 1);
} // namespace ir
} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ABS_H__
-#define __ONERT_IR_OPERATION_ABS_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Abs : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Abs(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Abs; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ABS_H__
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_ADD_H__
-#define __ONERT_IR_OPERATION_ADD_H__
+#ifndef __ONERT_IR_OPERATION_BINARY_ARITHMETIC_H__
+#define __ONERT_IR_OPERATION_BINARY_ARITHMETIC_H__
#include "ir/Operation.h"
#include "ir/InternalType.h"
namespace operation
{
-class Add : public Operation
+class BinaryArithmetic final : public Operation
{
public:
enum Input
RHS
};
+ enum class ArithmeticType
+ {
+ ADD,
+ SUB,
+ MUL,
+ DIV
+ };
+
struct Param
{
+ ArithmeticType arithmetic_type;
Activation activation;
};
public:
- Add(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m);
+ BinaryArithmetic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param ¶m);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Add; }
+ std::string name() const override;
+ OpCode opcode() const final { return OpCode::BinaryArithmetic; }
public:
const Param ¶m() const { return _param; }
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_ADD_H__
+#endif // __ONERT_IR_OPERATION_BINARY_ARITHMETIC_H__
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Cast; }
+ OpCode opcode() const final { return OpCode::BroadcastTo; }
};
} // namespace operation
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_CAST_H__
-#define __ONERT_IR_OPERATION_CAST_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Cast : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Cast(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Cast; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_CAST_H__
Stride stride;
Padding padding;
Activation activation;
+ Dilation dilation;
};
public:
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_DEQUANTIZE_H__
-#define __ONERT_IR_OPERATION_DEQUANTIZE_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Dequantize : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Dequantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Dequantize; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_DEQUANTIZE_H__
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Add; }
+ OpCode opcode() const final { return OpCode::Einsum; }
public:
const Param ¶m() const { return _param; }
/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_DIV_H__
-#define __ONERT_IR_OPERATION_DIV_H__
+#ifndef __ONERT_IR_OPERATION_ELEMENTWISE_ACTIVATION_H__
+#define __ONERT_IR_OPERATION_ELEMENTWISE_ACTIVATION_H__
#include "ir/Operation.h"
-#include "ir/InternalType.h"
namespace onert
{
namespace operation
{
-class Div : public Operation
+class ElementwiseActivation : public Operation
{
public:
enum Input
{
- LHS = 0,
- RHS
+ INPUT = 0
+ };
+
+ enum class Type
+ {
+ ELU,
+ LOGISTIC,
+ RELU,
+ TANH,
+ LEAKY_RELU
};
struct Param
{
- Activation activation;
+ Type op_type;
+ float alpha;
+ float beta;
+ Param() : op_type(Type::ELU), alpha(0.0f), beta(0.0f) {}
};
public:
- Div(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m);
+ ElementwiseActivation(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param ¶m);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Div; }
+ std::string name() const override;
+ OpCode opcode() const final { return OpCode::ElementwiseActivation; }
public:
const Param ¶m() const { return _param; }
+public:
+ static float infinity;
+
private:
Param _param;
};
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_DIV_H__
+#endif // __ONERT_IR_OPERATION_ELEMENTWISE_ACTIVATION_H__
/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_MUL_H__
-#define __ONERT_IR_OPERATION_MUL_H__
+#ifndef __ONERT_IR_OPERATION_ELEMENTWISEBINARY_H__
+#define __ONERT_IR_OPERATION_ELEMENTWISEBINARY_H__
#include "ir/Operation.h"
-#include "ir/InternalType.h"
namespace onert
{
namespace operation
{
-class Mul : public Operation
+class ElementwiseBinary : public Operation
{
public:
enum Input
RHS
};
+ enum class ElementwiseBinaryType
+ {
+ LOGICAL_AND,
+ LOGICAL_OR,
+ MAX,
+ MIN
+ };
+
struct Param
{
- Activation activation;
+ ElementwiseBinaryType op_type;
};
public:
- Mul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m);
+ ElementwiseBinary(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param ¶m);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Mul; }
+ std::string name() const override;
+ OpCode opcode() const final { return OpCode::ElementwiseBinary; }
public:
const Param ¶m() const { return _param; }
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_MUL_H__
+#endif // __ONERT_IR_OPERATION_ELEMENTWISEBINARY_H__
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_MAXPOOL2D_H__
-#define __ONERT_IR_OPERATION_MAXPOOL2D_H__
-
-#include <memory>
+#ifndef __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
+#define __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
#include "ir/Operation.h"
-#include "ir/InternalType.h"
-#include "ir/Padding.h"
namespace onert
{
namespace operation
{
-class MaxPool2D : public Operation
+class ElementwiseUnary : public Operation
{
public:
enum Input
INPUT = 0
};
+ enum class Type
+ {
+ ABS,
+ CAST,
+ COS,
+ DEQUANTIZE,
+ ERF,
+ EXP,
+ FLOOR,
+ LOG,
+ LOGICAL_NOT,
+ NEG,
+ QUANTIZE,
+ ROUND,
+ RSQRT,
+ SIN,
+ SQRT,
+ SQURE,
+ ZEROS_LIKE
+ };
+
struct Param
{
- uint32_t kh;
- uint32_t kw;
- Stride stride;
- Padding padding;
- Activation activation;
+ Type op_type;
};
public:
- MaxPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param ¶m);
+ ElementwiseUnary(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param ¶m);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::MaxPool2D; }
+ std::string name() const override;
+ OpCode opcode() const final { return OpCode::ElementwiseUnary; }
public:
const Param ¶m() const { return _param; }
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_MAXPOOL2D_H__
+#endif // __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_EXP_H__
-#define __ONERT_IR_OPERATION_EXP_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Exp : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Exp(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Exp; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_EXP_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_FLOOR_H__
-#define __ONERT_IR_OPERATION_FLOOR_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Floor : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Floor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Floor; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_FLOOR_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOG_H__
-#define __ONERT_IR_OPERATION_LOG_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Log : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Log(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Log; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOG_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOGICAL_AND_H__
-#define __ONERT_IR_OPERATION_LOGICAL_AND_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class LogicalAnd : public Operation
-{
-public:
- enum Input
- {
- INPUT0 = 0,
- INPUT1 = 1,
- };
-
-public:
- LogicalAnd(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::LogicalAnd; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOGICAL_AND_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOGICAL_NOT_H__
-#define __ONERT_IR_OPERATION_LOGICAL_NOT_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class LogicalNot : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0,
- };
-
-public:
- LogicalNot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::LogicalNot; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOGICAL_NOT_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOGICAL_OR_H__
-#define __ONERT_IR_OPERATION_LOGICAL_OR_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class LogicalOr : public Operation
-{
-public:
- enum Input
- {
- INPUT0 = 0,
- INPUT1 = 1,
- };
-
-public:
- LogicalOr(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::LogicalOr; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOGICAL_OR_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOGISTIC_H__
-#define __ONERT_IR_OPERATION_LOGISTIC_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Logistic : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Logistic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Logistic; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOGISTIC_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_MAX_H__
-#define __ONERT_IR_OPERATION_MAX_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Max : public Operation
-{
-public:
- enum Input
- {
- LHS = 0,
- RHS
- };
-
-public:
- Max(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Max; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_MAX_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_MEAN_H__
-#define __ONERT_IR_OPERATION_MEAN_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Mean : public Operation
-{
-public:
- enum Input
- {
- INPUT,
- AXES
- };
-
- struct Param
- {
- bool keep_dims;
- };
-
-public:
- Mean(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Mean; }
-
-public:
- const Param ¶m() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_MEAN_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_MIN_H__
-#define __ONERT_IR_OPERATION_MIN_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Min : public Operation
-{
-public:
- enum Input
- {
- LHS = 0,
- RHS
- };
-
-public:
- Min(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Min; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_MIN_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_NEG_H__
-#define __ONERT_IR_OPERATION_NEG_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Neg : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Neg(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Neg; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_NEG_H__
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_AVGPOOL2D_H__
-#define __ONERT_IR_OPERATION_AVGPOOL2D_H__
+#ifndef __ONERT_IR_OPERATION_POOL2D_H__
+#define __ONERT_IR_OPERATION_POOL2D_H__
#include <memory>
namespace operation
{
-class AvgPool2D : public Operation
+class Pool2D : public Operation
{
public:
enum Input
INPUT = 0
};
+ enum class PoolType
+ {
+ AVG,
+ L2,
+ MAX,
+ };
+
struct Param
{
+ PoolType op_type;
uint32_t kh;
uint32_t kw;
-
Stride stride;
Padding padding;
Activation activation;
};
public:
- AvgPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param ¶m);
+ Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param ¶m);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::AvgPool2D; }
+ std::string name() const override;
+ OpCode opcode() const final { return OpCode::Pool2D; }
public:
const Param ¶m() const { return _param; }
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_AVGPOOL2D_H__
+#endif // __ONERT_IR_OPERATION_POOL2D_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_QUANTIZE_H__
-#define __ONERT_IR_OPERATION_QUANTIZE_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Quantize : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0,
- };
-
-public:
- Quantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Quantize; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_QUANTIZE_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_RSQRT_H__
-#define __ONERT_IR_OPERATION_RSQRT_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class RSQRT : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- RSQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::RSQRT; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_RSQRT_H__
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_COS_H__
-#define __ONERT_IR_OPERATION_COS_H__
+#ifndef __ONERT_IR_OPERATION_RANK_H__
+#define __ONERT_IR_OPERATION_RANK_H__
+
+#include <memory>
#include "ir/Operation.h"
namespace operation
{
-class Cos : public Operation
+class Rank : public Operation
{
public:
enum Input
};
public:
- Cos(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+ Rank(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Cos; }
+ OpCode opcode() const final { return OpCode::Rank; }
};
} // namespace operation
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_COS_H__
+#endif // __ONERT_IR_OPERATION_RANK_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_RELU_H__
-#define __ONERT_IR_OPERATION_RELU_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class ReLU : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- ReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ReLU; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_RELU_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ReLU1_H__
-#define __ONERT_IR_OPERATION_ReLU1_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class ReLU1 : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- ReLU1(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ReLU1; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ReLU1_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ReLU6_H__
-#define __ONERT_IR_OPERATION_ReLU6_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class ReLU6 : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- ReLU6(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ReLU6; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ReLU6_H__
/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_L2_POOL_2D_H__
-#define __ONERT_IR_OPERATION_L2_POOL_2D_H__
+#ifndef __ONERT_IR_OPERATION_RESIZE_NEAREST_NEIGHBOR_H__
+#define __ONERT_IR_OPERATION_RESIZE_NEAREST_NEIGHBOR_H__
#include <memory>
#include "ir/Operation.h"
-#include "ir/InternalType.h"
-#include "ir/Padding.h"
namespace onert
{
namespace operation
{
-class L2Pool2D : public Operation
+class ResizeNearestNeighbor : public Operation
{
public:
enum Input
struct Param
{
- Padding padding;
- Stride stride;
- uint32_t kw;
- uint32_t kh;
- Activation activation;
+ int32_t height_out;
+ int32_t width_out;
+ bool align_corners;
};
public:
- L2Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param ¶m);
+ ResizeNearestNeighbor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param ¶m);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::L2Pool2D; }
+ OpCode opcode() const final { return OpCode::ResizeNearestNeighbor; }
public:
const Param ¶m() const { return _param; }
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_L2_POOL_2D_H__
+#endif // __ONERT_IR_OPERATION_RESIZE_NEAREST_NEIGHBOR_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ROUND_H__
-#define __ONERT_IR_OPERATION_ROUND_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Round : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Round(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Round; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ROUND_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_SQRT_H__
-#define __ONERT_IR_OPERATION_SQRT_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class SQRT : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- SQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::SQRT; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_SQRT_H__
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Abs; }
+ OpCode opcode() const final { return OpCode::Select; }
};
} // namespace operation
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_SIN_H__
-#define __ONERT_IR_OPERATION_SIN_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Sin : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Sin(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Sin; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_SIN_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_SUB_H__
-#define __ONERT_IR_OPERATION_SUB_H__
-
-#include "ir/Operation.h"
-#include "ir/InternalType.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Sub : public Operation
-{
-public:
- enum Input
- {
- LHS = 0,
- RHS
- };
-
- struct Param
- {
- Activation activation;
- };
-
-public:
- Sub(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Sub; }
-
-public:
- const Param ¶m() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_SUB_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_TANH_H__
-#define __ONERT_IR_OPERATION_TANH_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Tanh : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Tanh(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Tanh; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_TANH_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ZEROS_LIKE_H__
-#define __ONERT_IR_OPERATION_ZEROS_LIKE_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class ZerosLike : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- ZerosLike(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ZerosLike; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ZEROS_LIKE_H__
// Name | Type | Default
CONFIG(GRAPH_DOT_DUMP , int , "0")
-CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon")
+CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon;bcq") // FIXME Remove bcq
CONFIG(OP_BACKEND_ALLOPS , std::string , "")
CONFIG(OP_BACKEND_MAP , std::string , "")
CONFIG(DISABLE_COMPILE , bool , "0")
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__
+#ifndef __ONERT_UTIL_ONERTEXCEPTION_H__
+#define __ONERT_UTIL_ONERTEXCEPTION_H__
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
+#include <string>
namespace onert
{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class ExpLayer : public ::onert::exec::IFunction
+class OnertException : public std::exception
{
public:
- ExpLayer();
-
-public:
- void expFloat32();
+ OnertException(const std::string &msg) : _msg{msg} {}
+ OnertException(const std::string &tag, const std::string &msg) : _msg{tag + " : " + msg} {}
- void expQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
+ const char *what() const noexcept override { return _msg.c_str(); }
private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
+ std::string _msg;
+};
+
+class InsufficientBufferSizeException : public OnertException
+{
+public:
+ InsufficientBufferSizeException(const std::string &msg)
+ : OnertException{"InsufficientBufferSize", msg}
+ {
+ }
};
-} // namespace ops
-} // namespace cpu
-} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__
+#endif // __ONERT_UTIL_ONERTEXCEPTION_H__
#include "Utils.h"
-#include "ir/operation/AvgPool2D.h"
#include "ir/operation/Concat.h"
-#include "ir/operation/MaxPool2D.h"
#include "ir/operation/Conv2D.h"
#include "ir/operation/DepthwiseConv2D.h"
+#include "ir/operation/Pool2D.h"
#include "ir/operation/Reshape.h"
-#include "ir/operation/RSQRT.h"
#include "ir/operation/StridedSlice.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
#include "ir/Index.h"
#include "ir/Layout.h"
#include "ir/OperationVisitor.h"
ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank);
-ir::Shape inferAvgPoolShape(const ir::Shape &in_shape, const ir::operation::AvgPool2D::Param ¶m,
- ir::Layout layout = ir::Layout::NHWC);
ir::Shape inferBatchMatMulShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape,
const ir::operation::BatchMatMul::Param ¶m);
ir::Shape inferGatherShape(const ir::Shape &input_shape, const ir::Shape &indices_shape, int axis,
int rank);
-ir::Shape inferMaxPoolShape(const ir::Shape &in_shape, const ir::operation::MaxPool2D::Param ¶m,
- ir::Layout layout = ir::Layout::NHWC);
-
ir::Shape inferOnehotShape(const ir::Shape &input_shape, const int depth, int axis);
ir::Shape inferPackShape(const ir::Shape &input_shape, int axis, int rank, int num);
ir::Shape inferPadShape(const ir::Shape &in_shape, const int32_t *pad_buf, const size_t num_pads);
+ir::Shape inferPoolShape(const ir::Shape &in_shape, const ir::operation::Pool2D::Param ¶m,
+ ir::Layout layout = ir::Layout::NHWC);
+
template <typename T> ir::Shape inferRangeShape(T start_val, T limit_val, T delta_val);
ir::Shape inferReshapeShape(const int32_t *shape_buf, const int32_t shape_num_elements,
#include "ConstantInitializer.h"
#include "KernelGenerator.h"
#include "TensorBuilder.h"
+#include "Tensor.h"
#include <backend/Backend.h>
// there is no such case until now, let's support it later
// TODO Remove TensorBuilder and ConstantInitializer
// TODO Support Consecutive controflow operation's intermediate tensor
- auto tb = std::make_shared<TensorBuilder>();
+ auto tr = std::make_shared<TensorRegistry>();
+ auto tb = std::make_shared<TensorBuilder>(tr);
+ context->tensor_registry = tr;
context->tensor_builder = tb;
- context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
- context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb);
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb->dynamicTensorManager(), tr);
context->tensor_register = nullptr;
context->optimizer = nullptr;
return context;
#ifndef __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
-#include "TensorBuilder.h"
+#include "TensorRegistry.h"
#include <backend/IConstantInitializer.h>
#include <ir/Operands.h>
{
public:
ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+ const std::shared_ptr<ITensorRegistry> &tensor_reg)
+ : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
{
}
private:
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
+ std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
private:
- std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<ITensorRegistry> _tensor_reg;
};
} // namespace controlflow
#include "DynamicTensorManager.h"
#include "util/logging.h"
+#include "util/Exceptions.h"
+#include "ir/DataType.h"
namespace onert
{
namespace controlflow
{
-DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> ®,
- const std::shared_ptr<UserTensorRegistry> &user_reg)
- : _dynamic_mem_mgr{new cpu_common::DynamicMemoryManager()}, _tensors{reg},
- _user_tensors{user_reg}
+DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<TensorRegistry> &tensors)
+ : _dynamic_mem_mgr{new cpu_common::DynamicMemoryManager()}, _tensors{tensors}
{
// DO NOTHING
}
void DynamicTensorManager::applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape)
{
// NOTE Handle user tensors first
- auto user_tensor = _user_tensors->getNativeTensor(ind);
+ auto user_tensor = _tensors->getNativeUserTensor(ind);
if (user_tensor)
{
// User tensors cannot be reallocated.
auto buffer_size = user_tensor->total_size();
auto new_size = new_shape.num_elements() * sizeOfDataType(user_tensor->data_type());
if (buffer_size < new_size)
- throw std::runtime_error{"ExecutorBase: output buffer size is less than output tensor size"};
+ throw InsufficientBufferSizeException{"Output buffer size is less than output tensor size"};
user_tensor->setShape(new_shape);
return;
}
- // NOTE Then handle native tensors
- auto tensor = _tensors->getNativeTensor(ind);
+ // NOTE Then handle own tensors
+ auto tensor = _tensors->getNativeOwnTensor(ind);
assert(tensor);
bool previously_dynamic = tensor->is_dynamic();
const ir::OperandInfo &tensor_info,
ir::Layout backend_layout)
{
- assert(_tensors->getNativeTensor(ind) == nullptr);
auto tensor = std::make_shared<cpu_common::Tensor>(tensor_info, backend_layout, this);
- _tensors->setNativeTensor(ind, tensor);
+ _tensors->setNativeOwnTensor(ind, tensor);
}
void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind)
{
- auto find = _dealloc_tensor_map.find(op_ind);
- if (find != _dealloc_tensor_map.end())
- {
- auto &input_set = find->second;
- input_set.emplace(operand_ind);
- }
- else
- {
- _dealloc_tensor_map.emplace(
- std::make_pair(op_ind, std::unordered_set<ir::OperandIndex>{operand_ind}));
- }
+ _dealloc_tensor_map[op_ind].emplace(operand_ind);
}
void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
#ifndef __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
#define __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
-#include "UserTensorRegistry.h"
+#include "TensorRegistry.h"
+#include "Tensor.h"
#include <backend/IDynamicTensorManager.h>
#include <backend/cpu_common/MemoryManager.h>
-#include <backend/cpu_common/TensorRegistry.h>
#include <ir/OperandInfo.h>
#include <ir/Operation.h>
#include <ir/Index.h>
namespace controlflow
{
-// TODO Find optimized algorithm to manage memory.
-
/**
* @brief Class to manage dynamic tensor and its memory
*/
class DynamicTensorManager : public backend::IDynamicTensorManager
{
public:
- DynamicTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> ®,
- const std::shared_ptr<UserTensorRegistry> &user_reg);
+ DynamicTensorManager(const std::shared_ptr<TensorRegistry> &tensors);
virtual ~DynamicTensorManager() = default;
* @todo DynamicMemoryManager is not optimized. Optimized one is needed
*/
std::shared_ptr<cpu_common::DynamicMemoryManager> _dynamic_mem_mgr;
- // TODO Refactoring : Merge two TensorRegistries into one
- const std::shared_ptr<cpu_common::TensorRegistry> _tensors;
- const std::shared_ptr<UserTensorRegistry> _user_tensors;
+ const std::shared_ptr<TensorRegistry> _tensors;
// contains list of dynamic tensor index, which can be deallocated after running operation
// note: this map could contain static tensor index too. Careful use is required.
namespace controlflow
{
-KernelGenerator::KernelGenerator(const ir::Graph &graph,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : _graph{graph}, _tensor_builder{tensor_builder}, _tensor_builder_set{}, _executor_map{nullptr}
+KernelGenerator::KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager,
+ const std::shared_ptr<TensorRegistry> &tensor_reg)
+ : _graph{graph}, _dyn_tensor_manager{dyn_tensor_manager}, _tensor_reg{tensor_reg},
+ _tensor_registries{}, _executor_map{nullptr}
{
UNUSED_RELEASE(_graph);
- UNUSED_RELEASE(_tensor_builder_set);
+ UNUSED_RELEASE(_tensor_registries);
UNUSED_RELEASE(_executor_map);
}
void KernelGenerator::visit(const ir::OpSequence &op_seq)
{
assert(!_return_fn_seq);
- assert(_tensor_builder->dynamicTensorManager());
- assert(_tensor_builder->tensorRegistry());
+ assert(_dyn_tensor_manager);
+ assert(_tensor_reg);
- auto dyn_tensor_manager = _tensor_builder->dynamicTensorManager();
- auto dyn_shape_inferer = std::make_unique<exec::DynamicShapeInferer>(
- _graph.operands(), dyn_tensor_manager, _tensor_builder->tensorRegistry());
+ auto dyn_shape_inferer =
+ std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg);
_return_fn_seq = std::make_unique<exec::FunctionSequence>();
dyn_ctx->op_seq = &op_seq;
dyn_ctx->operations = &_graph.operations();
dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
- dyn_ctx->tensor_registry = _tensor_builder->tensorRegistry();
- dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
+ dyn_ctx->tensor_registry = _tensor_reg;
+ dyn_ctx->dynamic_tensor_manager = _dyn_tensor_manager;
_return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
}
auto output_tensor = getTensor(output_index);
output_tensors.emplace_back(output_tensor);
- const auto output_tensor_builder = getTensorBuilder(output_index);
- if (output_tensor_builder->supportDynamicTensor())
- {
- auto output_dyn_manager = output_tensor_builder->dynamicTensorManager();
- outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index, output_dyn_manager};
- }
+ outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index};
}
// IfLayer just set ExecutorMap instead of then and else executor to avoid complexity of
std::vector<std::shared_ptr<ITensor>> output_tensors{getTensor(output_index)};
std::vector<std::shared_ptr<ITensor>> input_tensors{getTensor(input_index)};
std::unordered_map<std::shared_ptr<ITensor>, exec::DynAllocInfo> outputs_dyn_alloc_info;
- const auto output_tensor_builder = getTensorBuilder(output_index);
- VERBOSE(PERMUTE_FIND_TB) << output_index << " -> " << output_tensor_builder.get() << std::endl;
- assert(output_tensor_builder != nullptr);
- if (output_tensor_builder->supportDynamicTensor())
- {
- outputs_dyn_alloc_info[output_tensors.at(0)] =
- exec::DynAllocInfo{output_index, output_tensor_builder->dynamicTensorManager()};
- }
+ outputs_dyn_alloc_info[output_tensors.at(0)] = exec::DynAllocInfo{output_index};
auto fn =
std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors, outputs_dyn_alloc_info);
output_tensors.emplace_back(output_tensor);
- const auto output_tensor_builder = getTensorBuilder(output_index);
- if (output_tensor_builder->supportDynamicTensor())
- {
- auto output_dyn_manager = output_tensor_builder->dynamicTensorManager();
- outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index, output_dyn_manager};
- }
+ outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index};
}
// WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of
std::shared_ptr<backend::ITensor> KernelGenerator::getTensor(const ir::OperandIndex &index)
{
- std::shared_ptr<backend::ITensor> ret;
- for (auto tensor_builder : _tensor_builder_set)
- {
- auto tensor = tensor_builder->tensorAt(index);
- if (tensor)
- {
- ret = tensor;
- break;
- }
- }
- assert(ret != nullptr);
- return ret;
-}
-
-std::shared_ptr<backend::ITensorBuilder>
-KernelGenerator::getTensorBuilder(const ir::OperandIndex &index)
-{
- std::shared_ptr<backend::ITensorBuilder> ret;
- for (auto tensor_builder : _tensor_builder_set)
- {
- auto reg = tensor_builder->tensorRegistry();
- auto tensor = reg ? reg->getNativeITensor(index) : tensor_builder->tensorAt(index);
- if (tensor)
- {
- ret = tensor_builder;
- break;
- }
- }
+ std::shared_ptr<backend::ITensor> ret = _tensor_registries.getITensor(index);
assert(ret != nullptr);
return ret;
}
#include <exec/IExecutor.h>
#include <ir/Graph.h>
#include "TensorBuilder.h"
-#include "compiler/TensorBuilders.h"
-
-#include "compiler/TensorBuilders.h"
+#include "compiler/TensorRegistries.h"
+#include "TensorRegistry.h"
namespace onert
{
class KernelGenerator : public IKernelGenerator
{
public:
- KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder);
+ KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager,
+ const std::shared_ptr<TensorRegistry> &tensor_reg);
- void setTensorBuilderSet(const compiler::TensorBuilders &tensor_builder_set)
+ void setTensorRegistries(const compiler::TensorRegistries &tensor_registries)
{
- _tensor_builder_set = tensor_builder_set;
+ _tensor_registries = tensor_registries;
}
void setExecutorMap(const std::shared_ptr<exec::ExecutorMap> &executor_map)
{
private:
std::shared_ptr<backend::ITensor> getTensor(const ir::OperandIndex &index);
- std::shared_ptr<backend::ITensorBuilder> getTensorBuilder(const ir::OperandIndex &index);
private:
const ir::Graph &_graph;
- std::shared_ptr<TensorBuilder> _tensor_builder;
- compiler::TensorBuilders _tensor_builder_set;
+ IDynamicTensorManager *_dyn_tensor_manager;
+ std::shared_ptr<TensorRegistry> _tensor_reg;
+ compiler::TensorRegistries _tensor_registries;
exec::ExecutorMap *_executor_map;
};
* limitations under the License.
*/
-#include "ir/operation/Log.h"
+#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
+#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
+#include <backend/cpu_common/Tensor.h>
namespace onert
{
-namespace ir
+namespace backend
{
-namespace operation
+namespace controlflow
{
-void Log::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Log::Log(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
+using Tensor = cpu_common::Tensor;
-} // namespace operation
-} // namespace ir
+} // namespace controlflow
+} // namespace backend
} // namespace onert
+
+#endif // __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
namespace controlflow
{
-TensorBuilder::TensorBuilder()
- : _tensor_reg{new cpu_common::TensorRegistry()}, _user_tensor_reg{new UserTensorRegistry()},
- _static_tensor_mgr{new cpu_common::StaticTensorManager(_tensor_reg)},
- _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg, _user_tensor_reg)}
+TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
+ : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
+ _static_tensor_mgr{
+ new cpu_common::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
{
/* empty */
}
void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
{
- assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
+ // TODO Enhance the way of checking user tensors
+ if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
+ return;
+
const auto tensor_info = _tensor_info_map.at(ind);
- if (!at(ind)->is_dynamic())
+ if (!nativeOwnTensorAt(ind)->is_dynamic())
{
const auto size = tensor_info.total_size();
_static_tensor_mgr->claimPlan(ind, size);
void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
{
- if (!at(ind)->is_dynamic())
+ // TODO Enhance the way of checking user tensors
+ if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
+ return;
+
+ if (!nativeOwnTensorAt(ind)->is_dynamic())
{
_static_tensor_mgr->releasePlan(ind);
}
bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
{
+ // User tensors are not registered in _tensor_info_map but objects for them are exist
+ // in the tensor registry.
+ // TODO Enhance the way of checking user tensors
+ if (_tensor_reg->getITensor(ind))
+ return true;
return _tensor_info_map.find(ind) != _tensor_info_map.end();
}
// This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
}
-std::shared_ptr<ITensor> TensorBuilder::tensorAt(const ir::OperandIndex &ind)
-{
- // NOTE Find from User Tensor Registry first
- // FIXME There may be both user tensor and native tensor for a `ind` which is a waste
- auto user_tensor = _user_tensor_reg->getITensor(ind);
- auto tensor = _tensor_reg->getITensor(ind);
- if (user_tensor)
- {
- return user_tensor;
- }
- else
- return tensor;
-}
-
-void TensorBuilder::iterate(const IterateFunction &fn) { _static_tensor_mgr->iterate(fn); }
-
-std::shared_ptr<cpu_common::Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
+std::shared_ptr<cpu_common::Tensor> TensorBuilder::nativeOwnTensorAt(const ir::OperandIndex &ind)
{
- return _tensor_reg->getNativeTensor(ind);
+ return _tensor_reg->getNativeOwnTensor(ind);
}
std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
return std::move(_dynamic_tensor_mgr);
}
-void TensorBuilder::setUserTensor(const ir::OperandIndex &ind,
- const std::shared_ptr<UserTensor> &tensor)
+void TensorBuilder::setNativeUserTensor(const ir::OperandIndex &ind,
+ const std::shared_ptr<UserTensor> &tensor)
{
- _user_tensor_reg->setNativeTensor(ind, tensor);
+ _tensor_reg->setNativeUserTensor(ind, tensor);
}
} // namespace controlflow
class TensorBuilder : public ITensorBuilder
{
public:
- TensorBuilder();
-
- bool supportDynamicTensor() override { return true; }
+ TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
/**
* @brief Register tensor information to allocate on CPU backend
void allocate() override;
void postFunctionPrepare() override { /* DO NOTHING */}
- /**
- * @brief Get tensor with a specific OperandIndex
- *
- * @return shared_ptr<ITensor> if a tensor with given OperandIndex exists. nullptr otherwise.
- */
- std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override;
-
- void iterate(const IterateFunction &fn) override;
-
std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
* If not, program will crash with assert or exception.
* @return shared_ptr<operand::Tensor>
*/
- std::shared_ptr<cpu_common::Tensor> at(const ir::OperandIndex &ind);
- void setUserTensor(const ir::OperandIndex &ind, const std::shared_ptr<UserTensor> &tensor);
-
- std::shared_ptr<ITensorRegistry> tensorRegistry() override { return _tensor_reg; }
+ std::shared_ptr<cpu_common::Tensor> nativeOwnTensorAt(const ir::OperandIndex &ind);
+ void setNativeUserTensor(const ir::OperandIndex &ind, const std::shared_ptr<UserTensor> &tensor);
private:
- const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
- const std::shared_ptr<UserTensorRegistry> _user_tensor_reg;
- std::unique_ptr<cpu_common::StaticTensorManager> _static_tensor_mgr;
+ const std::shared_ptr<TensorRegistry> _tensor_reg;
std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
+ std::unique_ptr<cpu_common::StaticTensorManager> _static_tensor_mgr;
ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
ir::OperandIndexMap<ir::Layout> _tensor_layout_map;
};
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
+#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
+
+#include "backend/cpu_common/TensorRegistry.h"
+#include "backend/ITensorRegistry.h"
+#include "Tensor.h"
+#include "UserTensor.h"
+#include <assert.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace controlflow
+{
+
+/**
+ * @brief Tensor registry class for controlflow backend
+ *
+ * This class contains three types of tensors. Two native tensors(tensors that are managed by this
+ * backend) and the other is migrant tensor.
+ *
+ * - NativeUserTensor - @c UserTensor managed by this backend, buffer is user-given
+ * - NativeOwnTensor - @c cpu_common::Tensor managed by this backend ( in @c _base_reg )
+ * - MigrantTensor - @c IPortableTensor managed by other backends ( in @c _base_reg )
+ *
+ * @note @c _base_reg is used in implementation to reuse @c cpu_common::StaticTensorManager
+ *
+ */
+class TensorRegistry : public ITensorRegistry
+{
+public:
+ TensorRegistry() : _base_reg{new cpu_common::TensorRegistry} {}
+
+ std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override
+ {
+ auto base_tensor = _base_reg->getITensor(ind);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeUserTensor(ind);
+ }
+
+ std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override
+ {
+ auto base_tensor = _base_reg->getNativeITensor(ind);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeUserTensor(ind);
+ }
+
+ std::shared_ptr<IPortableTensor> getPortableTensor(const ir::OperandIndex &ind)
+ {
+ auto base_tensor = _base_reg->getPortableTensor(ind);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeUserTensor(ind);
+ }
+
+ std::shared_ptr<IPortableTensor> getNativeTensor(const ir::OperandIndex &ind)
+ {
+ auto base_tensor = _base_reg->getNativeTensor(ind);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeUserTensor(ind);
+ }
+
+ std::shared_ptr<Tensor> getNativeOwnTensor(const ir::OperandIndex &ind)
+ {
+ return _base_reg->getNativeTensor(ind);
+ }
+
+ std::shared_ptr<UserTensor> getNativeUserTensor(const ir::OperandIndex &ind)
+ {
+ auto tensor = _native_user_tensors.find(ind);
+ if (tensor != _native_user_tensors.end())
+ return tensor->second;
+ return nullptr;
+ }
+
+ bool setMigrantTensor(const ir::OperandIndex &ind,
+ const std::shared_ptr<IPortableTensor> &tensor) override
+ {
+ assert(tensor);
+ assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+ _base_reg->setMigrantTensor(ind, tensor);
+ return true;
+ }
+
+ void setNativeOwnTensor(ir::OperandIndex ind, const std::shared_ptr<Tensor> &tensor)
+ {
+ assert(tensor);
+ assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+ _base_reg->setNativeTensor(ind, tensor);
+ }
+
+ void setNativeUserTensor(ir::OperandIndex ind, const std::shared_ptr<UserTensor> &tensor)
+ {
+ assert(tensor);
+ assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+ _native_user_tensors[ind] = tensor;
+ }
+
+ const ir::OperandIndexMap<std::shared_ptr<UserTensor>> &native_user_tensors()
+ {
+ return _native_user_tensors;
+ }
+ std::shared_ptr<cpu_common::TensorRegistry> base_reg() { return _base_reg; }
+
+private:
+ std::shared_ptr<cpu_common::TensorRegistry> _base_reg;
+ ir::OperandIndexMap<std::shared_ptr<UserTensor>> _native_user_tensors;
+};
+
+} // namespace controlflow
+} // namespace backend
+} // namespace onert
+
+#endif // ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
try
{
const auto dst_index = _dst_dyn_alloc_info_map.at(dst_tensor).ind;
- _dst_dyn_alloc_info_map.at(dst_tensor).dyn_tensor_manager->applyShape(dst_index, new_shape);
+ auto dyn_tensor_manager = dst_tensor->dynamic_tensor_manager();
+ if (!dyn_tensor_manager)
+ throw std::runtime_error{
+ "Error: PermuteLayer: output's TensorManager does not support dynamic tensor"};
+ dyn_tensor_manager->applyShape(dst_index, new_shape);
assert(dst_tensor->buffer() != nullptr);
}
catch (const std::out_of_range &e)
void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind)
{
- auto find = _dealloc_tensor_map.find(op_ind);
- if (find != _dealloc_tensor_map.end())
- {
- auto &input_set = find->second;
- input_set.emplace(operand_ind);
- }
- else
- {
- _dealloc_tensor_map.emplace(
- std::make_pair(op_ind, std::unordered_set<ir::OperandIndex>{operand_ind}));
- }
+ _dealloc_tensor_map[op_ind].emplace(operand_ind);
}
void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
namespace cpu_common
{
-StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> ®)
- : _const_mgr{new DynamicMemoryManager()}, _nonconst_mgr{new MemoryManager()}, _tensors{reg}
+StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> ®,
+ IDynamicTensorManager *dynamic_tensor_manager)
+ : _const_mgr{new DynamicMemoryManager()}, _nonconst_mgr{new MemoryManager()}, _tensors{reg},
+ _dynamic_tensor_manager{dynamic_tensor_manager}
{
// DO NOTHING
}
bool as_const)
{
assert(!_tensors->getNativeTensor(ind));
- auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, nullptr);
+ auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, _dynamic_tensor_manager);
_tensors->setNativeTensor(ind, tensor);
_as_constants[ind] = as_const;
}
backend::controlflow::Config::ID;
}
+ // FIXME This is a workaround for bcq operations, should remove it
+ {
+ _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
+ _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
+ }
+
{
VERBOSE(Compiler) << std::boolalpha;
VERBOSE(Compiler) << "==== Compiler Options ====" << std::endl;
auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options.graph_dump_level);
// Lower: Assign backend
- std::unordered_map<ir::SubgraphIndex, std::unique_ptr<ir::LoweredGraph>> lowered_subgs;
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_subgs;
_subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
_options.is_primary_subgraph = (index == ir::SubgraphIndex{0});
onert::dumper::dot::DotDumper dot_dumper(subg, dump_level);
dot_dumper.dump(nnfw::misc::str("before_lower_subg-", index.value()));
// Lower: Assign backend
- lowered_subgs[index] = std::make_unique<ir::LoweredGraph>(subg, _options);
+ lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, _options);
// Check backend(s) for subgraph support FP16
bool backends_support_fp16 = true;
#include "compiler/ExecutionBuilder.h"
#include "exec/ExecTime.h"
#include "compiler/Linear.h"
+#include "compiler/TensorBuilders.h"
#include "backend/IConstantInitializer.h"
#include "backend/IKernelGenerator.h"
#include "backend/IOptimizer.h"
std::shared_ptr<backend::IConfig> _config;
};
+// TODO Think of a better way to manage TensorManagers
+backend::TensorManagerSet createTensorManagerSet(const compiler::TensorBuilders &tensor_builders)
+{
+ backend::TensorManagerSet tensor_mgrs;
+ for (auto &tensor_builder : tensor_builders)
+ {
+ auto s_tensor_manager = tensor_builder->releaseStaticTensorManager();
+ if (s_tensor_manager != nullptr)
+ tensor_mgrs.insert(std::move(s_tensor_manager));
+
+ auto d_tensor_manager = tensor_builder->releaseDynamicTensorManager();
+ if (d_tensor_manager != nullptr)
+ tensor_mgrs.insert(std::move(d_tensor_manager));
+ }
+ return tensor_mgrs;
+}
+
} // namespace
} // namespace onert
std::placeholders::_3, true);
}
-exec::IExecutor *ExecutorFactory::create(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const compiler::CompilerOptions &options,
const std::shared_ptr<exec::ExecutorMap> &executor_map)
{
return _map.at(options.executor)(std::move(lowered_graph), options, executor_map);
}
-void ExecutorFactory::initializeBackendContext(ir::LoweredGraph *lowered_graph)
+void ExecutorFactory::initializeBackendContext(compiler::LoweredGraph *lowered_graph)
{
struct Entry
{
}
}
-void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph,
+void ExecutorFactory::runTensorRegistration(compiler::LoweredGraph *lowered_graph,
const std::vector<ir::OpSequenceIndex> &order)
{
for (const auto index : order)
const auto backend = lowered_graph->getLowerInfo(index)->backend();
const auto tensor_register = lowered_graph->backend_contexts().at(backend)->tensor_register;
auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
+ auto model_io = lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs();
+
if (tensor_register)
{
// Custom registration
const auto &op = lowered_graph->graph().operations().at(op_idx);
for (const auto &index : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
{
- if (!tensor_builder->isRegistered(index))
+ if (!tensor_builder->isRegistered(index) && !model_io.contains(index))
{
const auto &operand_lower_info =
lowered_graph->getLowerInfo(index)->def_factors().getOnlyElement();
}
std::vector<std::shared_ptr<backend::ITensor>>
-ExecutorFactory::initializeModelIOTensors(ir::LoweredGraph &lowered_graph,
+ExecutorFactory::initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
const ir::OperandIndexSequence &indices)
{
std::vector<std::shared_ptr<backend::ITensor>> ret;
- TensorBuilders tensor_builders{lowered_graph.backend_contexts(), false};
- std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder =
- tensor_builders.getControlflowTensorBuilder();
+ // TODO Store controlflow backend in BackendContext
+ std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder;
+ std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg;
+ for (const auto &e : lowered_graph.backend_contexts())
+ {
+ auto backend = e.first;
+ auto &context = e.second;
+ if (backend->config()->id() == backend::controlflow::Config::ID)
+ {
+ cf_tensor_builder =
+ std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>(context->tensor_builder);
+ cf_tensor_reg =
+ std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry);
+ }
+ }
assert(cf_tensor_builder);
+ assert(cf_tensor_reg);
for (auto ind : indices)
{
cf_tensor_builder->dynamicTensorManager());
// Add tensor to controlflow TensorRegistry.
- cf_tensor_builder->setUserTensor(ind, tensor);
+ cf_tensor_reg->setNativeUserTensor(ind, tensor);
ret.push_back(tensor);
}
return ret;
}
-void ExecutorFactory::prepareExternalTensors(ir::LoweredGraph &lowered_graph,
- TensorBuilders &tensor_builders)
+void ExecutorFactory::prepareExternalTensors(compiler::LoweredGraph &lowered_graph)
{
+ TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true};
+
lowered_graph.op_seqs().iterate(
[&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
auto lower_info = lowered_graph.getLowerInfo(op_seq_index);
// If an OpSequence input/output tensor does not have a own tensor object,
// it must be using external tensors, so find the tensor from other tensor builders and
// set the tensor to this tensor builder if portable
- if (!backend_ctx->tensor_builder->tensorAt(ind))
+ if (!backend_ctx->tensor_registry->getITensor(ind))
{
- auto tensor = tensor_builders.getITensor(ind);
- assert(tensor); // The tensor must have been created in one of TensorBuilders
+ auto tensor = tensor_regs.getITensor(ind);
+ assert(tensor); // The tensor must have been registered
auto ptensor = std::dynamic_pointer_cast<backend::IPortableTensor>(tensor);
if (ptensor)
- backend_ctx->tensor_builder->setMigrantTensor(ind, ptensor);
+ backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor);
}
}
});
}
exec::IExecutor *
-ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const compiler::CompilerOptions &options,
const std::shared_ptr<exec::ExecutorMap> &executor_map)
{
Linear::planTensors(*lowered_graph, order);
TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
+ TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
for (auto &tensor_builder : tensor_builders)
{
tensor_builder->prepare();
}
- prepareExternalTensors(*lowered_graph, tensor_builders);
+ prepareExternalTensors(*lowered_graph);
ExecutionBuilder builder;
auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get());
if (cf_kernel_gen != nullptr)
{
- cf_kernel_gen->setTensorBuilderSet(tensor_builders);
+ cf_kernel_gen->setTensorRegistries(tensor_regs);
cf_kernel_gen->setExecutorMap(executor_map);
}
auto fn_seq = kernel_gen->generate(op_seq);
});
}
- auto exec =
- new exec::LinearExecutor{std::move(lowered_graph), input_tensors, output_tensors,
- tensor_builders, std::move(code_map), order};
+ backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders);
+ auto exec = new exec::LinearExecutor{
+ std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
+ std::move(tensor_mgrs), std::move(code_map), order};
if (!options.trace_filepath.empty())
{
}
exec::IExecutor *ExecutorFactory::createDataflowExecutor(
- std::unique_ptr<ir::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
+ std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel)
{
const auto &backend_contexts = lowered_graph->backend_contexts();
}
TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
+ TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
// To make tensors never be deallocated, this is a workaround to use static memory planner
for (auto &tensor_builder : tensor_builders)
tensor_builder->prepare();
}
- prepareExternalTensors(*lowered_graph, tensor_builders);
+ prepareExternalTensors(*lowered_graph);
ExecutionBuilder builder;
if (cf_kernel_gen != nullptr)
{
assert(cf_kernel_gen != nullptr);
- cf_kernel_gen->setTensorBuilderSet(tensor_builders);
+ cf_kernel_gen->setTensorRegistries(tensor_regs);
cf_kernel_gen->setExecutorMap(executor_map);
}
auto fn_seq = kernel_gen->generate(op_seq);
});
}
+ backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders);
+
exec::ExecutorBase *exec = nullptr;
if (parallel)
{
- exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors, output_tensors,
- tensor_builders, std::move(code_map)};
+ exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors,
+ output_tensors, tensor_regs,
+ std::move(tensor_mgrs), std::move(code_map)};
}
else
{
- auto dataflow_exec =
- new exec::DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors,
- tensor_builders, std::move(code_map)};
+ auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), input_tensors,
+ output_tensors, tensor_regs,
+ std::move(tensor_mgrs), std::move(code_map)};
if (options.he_profiling_mode)
{
std::vector<const backend::Backend *> backends;
#include "backend/ITensor.h"
#include "exec/IExecutor.h"
-#include "ir/LoweredGraph.h"
-#include "TensorBuilders.h"
+#include "compiler/LoweredGraph.h"
+#include "TensorRegistries.h"
namespace onert
{
static ExecutorFactory &get();
public:
- exec::IExecutor *create(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ exec::IExecutor *create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const compiler::CompilerOptions &options,
const std::shared_ptr<exec::ExecutorMap> &executor_map);
ExecutorFactory();
private:
- static void initializeBackendContext(ir::LoweredGraph *lowered_graph);
- static void runTensorRegistration(ir::LoweredGraph *lowered_graph,
+ static void initializeBackendContext(compiler::LoweredGraph *lowered_graph);
+ static void runTensorRegistration(compiler::LoweredGraph *lowered_graph,
const std::vector<ir::OpSequenceIndex> &order);
static std::vector<std::shared_ptr<backend::ITensor>>
- initializeModelIOTensors(ir::LoweredGraph &lowered_graph,
+ initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
const ir::OperandIndexSequence &indices);
- static void prepareExternalTensors(ir::LoweredGraph &lowered_graph,
- TensorBuilders &tensor_builders);
+ static void prepareExternalTensors(compiler::LoweredGraph &lowered_graph);
static exec::IExecutor *
- createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const compiler::CompilerOptions &options,
const std::shared_ptr<exec::ExecutorMap> &executor_map);
static exec::IExecutor *
- createDataflowExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const compiler::CompilerOptions &options,
const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel);
private:
- std::unordered_map<
- std::string, std::function<exec::IExecutor *(
- std::unique_ptr<ir::LoweredGraph>, const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map)>>
+ std::unordered_map<std::string, std::function<exec::IExecutor *(
+ std::unique_ptr<compiler::LoweredGraph>,
+ const compiler::CompilerOptions &options,
+ const std::shared_ptr<exec::ExecutorMap> &executor_map)>>
_map;
};
namespace compiler
{
-Fp32ToFp16Converter::Fp32ToFp16Converter(ir::LoweredGraph &lowered_graph)
+Fp32ToFp16Converter::Fp32ToFp16Converter(compiler::LoweredGraph &lowered_graph)
: _lowered_graph{lowered_graph}
{
VERBOSE(Fp32ToFp16Converter) << "Fp16 Enable on" << std::endl;
#ifndef __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
#define __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
namespace onert
{
class Fp32ToFp16Converter
{
public:
- Fp32ToFp16Converter(ir::LoweredGraph &lowered_graph);
+ Fp32ToFp16Converter(compiler::LoweredGraph &lowered_graph);
public:
void run();
void convertOperandsOfOpSequence(ir::OpSequence &op_seq);
private:
- ir::LoweredGraph &_lowered_graph;
+ compiler::LoweredGraph &_lowered_graph;
OpSeqIndexList _list_fp32_to_fp16;
OpSeqIndexList _list_fp16_to_fp32;
};
return false;
}
-static bool isWorkaroundSkip(const ir::Graph &graph, const backend::Backend *backend,
- const ir::Operation &node, bool quant)
+static bool isWorkaroundSkip(const ir::Graph &, const backend::Backend *, const ir::Operation &,
+ bool)
{
- /* TODO: this is workaround, come up with better solution if have.
- Adding exception in stage doesn't help. Because if there is a record for add without
- broadcast, scheduling will select it since it doesn't distinguish broadcast and
- non-broadcast like it does for quant non-quantized*/
- if (backend->config()->id() == "cpu" &&
- (node.opcode() == ir::OpCode::Add || node.opcode() == ir::OpCode::Sub ||
- node.opcode() == ir::OpCode::Mul))
- {
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
- /*Broadcasting isn't supported on CPU: no way to differ the existing exec_time record with and
- * without broadcasting*/
- if (!(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape()))
- {
- return true;
- }
- }
- /* TODO: this is workaround, come up with better solution if have.
- Adding exception in stage doesn't help. Because if there is a record for Mul without
- broadcast, scheduling will select it since it doesn't distinguish broadcast and
- non-broadcast like it does for quant non-quantized*/
- else if (backend->config()->id() == "acl_neon" && node.opcode() == ir::OpCode::Mul)
- {
- const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
- // Nontrivial broadcasting isn't supported yet
- if (quant ||
- !(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape()))
- {
- return true;
- }
- }
+ // Now, there is no workaround
return false;
}
{
for (auto &entry : backend_contexts)
{
+ if (entry.first->config()->id() == backend::controlflow::Config::ID)
+ continue;
_all_backends.push_back(entry.first);
}
_backend_resolver = std::make_unique<compiler::BackendResolver>();
namespace compiler
{
-std::vector<ir::OpSequenceIndex> Linear::linearize(const ir::LoweredGraph &lowered_graph)
+std::vector<ir::OpSequenceIndex> Linear::linearize(const compiler::LoweredGraph &lowered_graph)
{
std::vector<ir::OpSequenceIndex> order;
lowered_graph.iterateTopolOpSeqs(
return order;
}
-void Linear::dump(const ir::LoweredGraph &lowered_graph,
+void Linear::dump(const compiler::LoweredGraph &lowered_graph,
const std::vector<ir::OpSequenceIndex> &order)
{
{
}
}
-void Linear::planTensors(const ir::LoweredGraph &lowered_graph,
+void Linear::planTensors(const compiler::LoweredGraph &lowered_graph,
const std::vector<ir::OpSequenceIndex> &order)
{
const auto &graph = lowered_graph.graph();
tensor_builder_map[ind]->notifyLastUse(ind);
// plan for deallocation of dynamic tensor
- if (tensor_builder_map[ind]->supportDynamicTensor())
- {
- assert(tensor_builder_map[ind]->dynamicTensorManager());
- tensor_builder_map[ind]->dynamicTensorManager()->planDealloc(op_idx, ind);
- }
+ auto dyn_tensor_manager = tensor_builder_map[ind]->dynamicTensorManager();
+ if (dyn_tensor_manager)
+ dyn_tensor_manager->planDealloc(op_idx, ind);
}
}
}
#include "ir/OpSequences.h"
#include "ir/Index.h"
#include "backend/ITensorBuilder.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
namespace onert
{
class Linear
{
public:
- static std::vector<ir::OpSequenceIndex> linearize(const ir::LoweredGraph &lowered_graph);
- static void dump(const ir::LoweredGraph &lowered_graph,
+ static std::vector<ir::OpSequenceIndex> linearize(const compiler::LoweredGraph &lowered_graph);
+ static void dump(const compiler::LoweredGraph &lowered_graph,
const std::vector<ir::OpSequenceIndex> &order);
- static void planTensors(const ir::LoweredGraph &lowered_graph,
+ static void planTensors(const compiler::LoweredGraph &lowered_graph,
const std::vector<ir::OpSequenceIndex> &order);
};
* limitations under the License.
*/
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
#include <assert.h>
#include <sstream>
#include "util/logging.h"
-#include "pass/ConstantInsertionPass.h"
-#include "pass/ConstantLoweringPass.h"
-#include "pass/PermutationOperationPass.h"
-#include "pass/PermutationInsertionPass.h"
-#include "pass/PermutationEliminationPass.h"
+#include "compiler/pass/ConstantInsertionPass.h"
+#include "compiler/pass/ConstantLoweringPass.h"
+#include "compiler/pass/PermutationOperationPass.h"
+#include "compiler/pass/PermutationInsertionPass.h"
+#include "compiler/pass/PermutationEliminationPass.h"
#include "ir/GraphIterator.h"
-#include "verifier/Verifier.h"
+#include "ir/verifier/Verifier.h"
#include "backend/Backend.h"
#include "backend/IConfig.h"
#include "compiler/BackendResolver.h"
namespace onert
{
-namespace ir
+namespace compiler
{
-LoweredGraph::LoweredGraph(const Graph &graph, const compiler::CompilerOptions &options)
- : _graph{graph}
+LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph}
{
bool linear_executor = (options.executor == "Linear");
// Build backend contexts
- auto &backend_manager = compiler::BackendManager::get();
+ auto &backend_manager = BackendManager::get();
// Always create Controlflow backend context
auto cf_backend = backend_manager.getControlflow();
// TODO Move "schedule" phase out of here
// Schedule
- std::unique_ptr<compiler::BackendResolver> backend_resolver;
+ std::unique_ptr<BackendResolver> backend_resolver;
if (options.he_scheduler)
{
- auto scheduler = compiler::HEScheduler(_backend_contexts, options);
+ auto scheduler = HEScheduler(_backend_contexts, options);
backend_resolver = scheduler.schedule(_graph);
_indexed_ranks = scheduler.getIndexedRanks();
}
else
{
- auto scheduler = compiler::ManualScheduler(_backend_contexts, options);
+ auto scheduler = ManualScheduler(_backend_contexts, options);
backend_resolver = scheduler.schedule(_graph);
}
{
// operand::LowerInfo holder
- OperandIndexMap<std::unique_ptr<operand::LowerInfo>> operands_lower_info;
+ ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> operands_lower_info;
- _graph.operands().iterate([&](const OperandIndex &index, const Operand &) {
- operands_lower_info[index] = std::make_unique<operand::LowerInfo>();
+ _graph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
+ operands_lower_info[index] = std::make_unique<ir::operand::LowerInfo>();
});
// Make op_seqs while checking whether a node can be merged into a op_seq.
makeOpSequences(operands_lower_info, options, *backend_resolver);
- _op_seqs.iterate([&](const OpSequenceIndex &, OpSequence &op_seq) {
+ _op_seqs.iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
assert(op_seq.operations().size() > 0);
std::reverse(std::begin(op_seq.operations()), std::end(op_seq.operations()));
});
- _op_seqs.dump("merged and sorted operations without permutation", _graph.operations());
+ VERBOSE(OpSequences) << "dump without permutation" << std::endl;
+ dumpOpSequences(_op_seqs, _graph.operations());
pass::ConstantInsertionPass ci_pass(*this);
ci_pass.run();
pass::PermutationEliminationPass pe_pass(*this);
pe_pass.run();
- _op_seqs.dump("merged and sorted operations with permutation", _graph.operations());
+ VERBOSE(OpSequences) << "dump with permutation" << std::endl;
+ dumpOpSequences(_op_seqs, _graph.operations());
}
// Graph verifications
{
- assert(verifier::DAGChecker().verify(_graph));
- assert(verifier::EdgeConsistencyChecker().verify(_graph));
+ assert(ir::verifier::DAGChecker().verify(_graph));
+ assert(ir::verifier::EdgeConsistencyChecker().verify(_graph));
}
}
-const operation::LowerInfo *LoweredGraph::getLowerInfo(const OpSequenceIndex &op_seq_index) const
+const ir::operation::LowerInfo *
+LoweredGraph::getLowerInfo(const ir::OpSequenceIndex &op_seq_index) const
{
auto itr = _lower_info_map.op_seq.find(op_seq_index);
if (itr == _lower_info_map.op_seq.end())
return itr->second.get();
}
-void LoweredGraph::setLowerInfo(const OpSequenceIndex &op_seq_index,
- std::unique_ptr<operation::LowerInfo> &&lower_info)
+void LoweredGraph::setLowerInfo(const ir::OpSequenceIndex &op_seq_index,
+ std::unique_ptr<ir::operation::LowerInfo> &&lower_info)
{
_lower_info_map.op_seq.insert(std::make_pair(op_seq_index, std::move(lower_info)));
}
-void LoweredGraph::removeLowerInfo(const OpSequenceIndex &op_seq_index)
+void LoweredGraph::removeLowerInfo(const ir::OpSequenceIndex &op_seq_index)
{
auto &op_seq_lower_info = _lower_info_map.op_seq;
assert(op_seq_lower_info.find(op_seq_index) != op_seq_lower_info.end());
}
}
-const operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index) const
+const ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index) const
{
auto itr = _lower_info_map.operand.find(index);
if (itr == _lower_info_map.operand.end())
return itr->second.get();
}
-operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index)
+ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index)
{
auto itr = _lower_info_map.operand.find(index);
if (itr == _lower_info_map.operand.end())
return itr->second.get();
}
-void LoweredGraph::setLowerInfo(const OperandIndex &index,
- std::unique_ptr<operand::LowerInfo> &&lower_info)
+void LoweredGraph::setLowerInfo(const ir::OperandIndex &index,
+ std::unique_ptr<ir::operand::LowerInfo> &&lower_info)
{
_lower_info_map.operand.insert(std::make_pair(index, std::move(lower_info)));
}
-void LoweredGraph::removeLowerInfo(const OperandIndex &index)
+void LoweredGraph::removeLowerInfo(const ir::OperandIndex &index)
{
_lower_info_map.operand.erase(index);
}
void LoweredGraph::iterateTopolOpSeqs(
- const std::function<void(const OpSequenceIndex &, const OpSequence &)> &fn) const
+ const std::function<void(const ir::OpSequenceIndex &, const ir::OpSequence &)> &fn) const
{
- // Topological Sorting for OpSequences
- std::vector<OpSequenceIndex> topol_sorted;
- PostDfsIterator<true>{}.iterateOpSeqs(
- *this,
- [&](const OpSequenceIndex &index, const OpSequence &) { topol_sorted.emplace_back(index); });
+ // Topological Sorting for ir::OpSequences
+ std::vector<ir::OpSequenceIndex> topol_sorted;
+ ir::PostDfsIterator<true>{}.iterateOpSeqs(
+ *this, [&](const ir::OpSequenceIndex &index, const ir::OpSequence &) {
+ topol_sorted.emplace_back(index);
+ });
std::reverse(topol_sorted.begin(), topol_sorted.end());
for (const auto op_seq_idx : topol_sorted)
{
}
void LoweredGraph::iterateTopolOpSeqs(
- const std::function<void(const OpSequenceIndex &, OpSequence &)> &fn)
+ const std::function<void(const ir::OpSequenceIndex &, ir::OpSequence &)> &fn)
{
- // Topological Sorting for OpSequences
- std::vector<OpSequenceIndex> topol_sorted;
- PostDfsIterator<false>{}.iterateOpSeqs(
- *this, [&](const OpSequenceIndex &index, OpSequence &) { topol_sorted.emplace_back(index); });
+ // Topological Sorting for ir::OpSequences
+ std::vector<ir::OpSequenceIndex> topol_sorted;
+ ir::PostDfsIterator<false>{}.iterateOpSeqs(
+ *this, [&](const ir::OpSequenceIndex &index, ir::OpSequence &) {
+ topol_sorted.emplace_back(index);
+ });
std::reverse(topol_sorted.begin(), topol_sorted.end());
for (const auto op_seq_idx : topol_sorted)
{
}
}
-OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const OperationIndex &node_index,
- const Operation &node)
+ir::OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const ir::OperationIndex &node_index,
+ const ir::Operation &node)
{
// Create a fresh op_seq with one operation, and append it to op_seqs
// Create a fresh op_seq
- auto op_seq = std::make_unique<OpSequence>(_graph.layout());
+ auto op_seq = std::make_unique<ir::OpSequence>(_graph.layout());
// Add an operation
op_seq->appendOperation(node_index);
}
void LoweredGraph::makeOpSequences(
- OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info,
- const compiler::CompilerOptions &options, const compiler::BackendResolver &backend_resolver)
+ ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
+ const CompilerOptions &options, const BackendResolver &backend_resolver)
{
// if SUBG_MAX_NODE == 0, no limit on nodes of a op_seq
const int op_seq_max_node = options.op_seq_max_node;
assert(op_seq_max_node >= 0);
bool is_profiling = options.he_profiling_mode;
- OpSequence *op_seq = nullptr;
- OpSequenceIndex op_seq_index;
+ ir::OpSequence *op_seq = nullptr;
+ ir::OpSequenceIndex op_seq_index;
// NOTE: The below method appends nodes while making one op_seq if needed. If something better
// ways, happy to update this code.
- PostDfsConstIterator{}.iterate(
- _graph, [&](const OperationIndex &node_index, const Operation &node) {
+ ir::PostDfsConstIterator{}.iterate(
+ _graph, [&](const ir::OperationIndex &node_index, const ir::Operation &node) {
// LowerInfo for in/output operands
auto backend = backend_resolver.getBackend(node_index);
for (auto operand : node.getInputs() | ir::Remove::UNDEFINED)
{
auto &&lower_info = operands_lower_info.at(operand);
- lower_info->addUsePermuteFactor(operand::PermuteFactor{backend, backend_layout});
+ lower_info->addUsePermuteFactor(ir::operand::PermuteFactor{backend, backend_layout});
}
for (auto operand : node.getOutputs())
{
auto &&lower_info = operands_lower_info.at(operand);
- lower_info->addDefPermuteFactor(operand::PermuteFactor{backend, backend_layout});
+ lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{backend, backend_layout});
}
bool new_op_seq = (op_seq == nullptr ||
{
auto new_op_seq_index = appendFreshSingleOpSequence(node_index, node);
- // OpSequence LowerInfo
+ // ir::OpSequence LowerInfo
setLowerInfo(new_op_seq_index,
- std::make_unique<operation::LowerInfo>(backend, backend_layout));
+ std::make_unique<ir::operation::LowerInfo>(backend, backend_layout));
op_seq_index = new_op_seq_index;
op_seq = &(_op_seqs.at(new_op_seq_index));
}
void LoweredGraph::manipulateLowerInfo(
- OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info, bool is_primary)
+ ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
+ bool is_primary)
{
- const auto controlflow_backend = compiler::BackendManager::get().getControlflow();
+ const auto controlflow_backend = BackendManager::get().getControlflow();
// TODO Rather than handling primary graph specially,
// let the permute inserted and remove it later
if (is_primary)
{
// TODO Rather than using NHWC Get frontend layout of this node from IR
- auto factor = operand::PermuteFactor{controlflow_backend, Layout::NHWC};
+ auto factor = ir::operand::PermuteFactor{controlflow_backend, ir::Layout::NHWC};
for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED)
{
auto &&lower_info = operands_lower_info.at(index);
else
{
// In case of that an operand is Graph's input and not input or output of any operation
- lower_info->addDefPermuteFactor(operand::PermuteFactor{
+ lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{
controlflow_backend,
- Layout::NHWC // TODO Get frontend layout of this node from IR
+ ir::Layout::NHWC // TODO Get frontend layout of this node from IR
});
}
}
if (lower_info->def_factors().size() == 0)
{
// In case of that an operand is Graph's output and not input or output of any operation
- lower_info->addDefPermuteFactor(operand::PermuteFactor{
+ lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{
controlflow_backend,
- Layout::NHWC // TODO Get frontend layout of this node from IR
+ ir::Layout::NHWC // TODO Get frontend layout of this node from IR
});
}
}
// Set LowerInfo for each operand from the operand::LowerInfo holder
- _graph.operands().iterate([&](const OperandIndex &index, Operand &) {
+ _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &) {
setLowerInfo(index, std::move(operands_lower_info[index]));
});
}
std::map<uint32_t, std::string> dumps;
- _graph.operands().iterate([&](const OperandIndex &index, Operand &object) {
+ _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &object) {
std::stringstream sstream;
if (!getLowerInfo(index)->def_factors().empty() || !getLowerInfo(index)->use_factors().empty())
{
- auto factors_to_string = [](const operand::PermuteFactorSet &factors) {
+ auto factors_to_string = [](const ir::operand::PermuteFactorSet &factors) {
std::string str;
for (auto factor : factors)
{
return "{ " + str + "}";
};
- auto operation_index_to_string = [](const OperationIndexSet &operations) {
+ auto operation_index_to_string = [](const ir::OperationIndexSet &operations) {
std::string str;
for (auto op : operations)
{
sstream << (shape.dim(i)) << " ";
}
sstream << "}" << std::endl;
- sstream << " - Def Operations : " << def_ops << std::endl;
- sstream << " - Use Operations : " << use_ops << std::endl;
+ sstream << " - Def ir::Operations : " << def_ops << std::endl;
+ sstream << " - Use ir::Operations : " << use_ops << std::endl;
sstream << " - Lower Info" << std::endl;
sstream << " - Def Backends : " << def_layouts << std::endl;
sstream << " - Use Backends : " << use_layouts << std::endl;
}
}
-bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const OperationIndex &node_index,
- Layout layout, const compiler::BackendResolver &backend_resolver)
+bool LoweredGraph::mergeable(const ir::OpSequenceIndex &op_seq_index,
+ const ir::OperationIndex &node_index, ir::Layout layout,
+ const BackendResolver &backend_resolver)
{
// Are they mergeable?
// 1. the same backend id and layout?
// Branched?
{
- std::unordered_set<OperationIndex> branched_set;
+ std::unordered_set<ir::OperationIndex> branched_set;
// Check for branching up
- for (const auto &input : op_seq.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ for (const auto &input : op_seq.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
const auto &input_obj = _graph.operands().at(input);
auto def = input_obj.getDef();
branched_set.clear();
// Check for branching down
- for (const auto &output : node.getOutputs() | Remove::DUPLICATED)
+ for (const auto &output : node.getOutputs() | ir::Remove::DUPLICATED)
{
// TODO Fix this workaround for the case of model outputs that are used by another operation
// This is needed since the branching is decided by operation, but for model outputs,
const auto &node_outputs = node.getOutputs();
// op_seq's operations are in order so that we just check the first and the last
- std::vector<OperationIndex> op_seq_ops{op_seq.operations()[0]};
+ std::vector<ir::OperationIndex> op_seq_ops{op_seq.operations()[0]};
if (op_seq.operations().size() > 1)
op_seq_ops.emplace_back(op_seq.operations()[op_seq.operations().size() - 1]);
return false;
}
-} // namespace ir
+} // namespace compiler
} // namespace onert
const auto &manual_options = _options.manual_scheduler_options;
auto backend_resolver = std::make_unique<compiler::BackendResolver>();
- // This fallback will be used for unavailable backends
+ // This fallback will be used in case that `backend_for_all` is unavailable
auto fallback = [&]() -> const backend::Backend * {
for (auto backend_id : _options.backend_list)
{
}
return nullptr;
}();
- assert(fallback != nullptr); // There must be at least one fallback
+ if (fallback == nullptr)
+ throw std::runtime_error{"No loaded backends available."};
// 1. Backend for All operations
const backend::Backend *backend_all = resolveBackend(manual_options.backend_for_all, fallback);
const backend::Backend *ManualScheduler::resolveBackend(const std::string &id,
const backend::Backend *fallback)
{
- // Ensure if the backend is available in the backend
+ // Ensure if the backend is available in the current backend context
const backend::Backend *backend = BackendManager::get().get(id);
if (!backend || _backend_contexts.find(backend) == _backend_contexts.end())
{
[&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
}
-void OperationValidator::visit(const ir::operation::Abs &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::AvgPool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
- return;
-
- const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
-}
-
void OperationValidator::visit(const ir::operation::BatchMatMul &node)
{
const auto lhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::LHS));
OP_REQUIRES(input_shape.C == output_shape.C);
}
-void OperationValidator::visit(const ir::operation::Cast &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(0)};
-
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
-
void OperationValidator::visit(const ir::operation::Comparison &node)
{
const auto output_index{node.getOutputs().at(0)};
OP_REQUIRES(_ctx.at(beta_index).shape().rank() == 1);
}
+void OperationValidator::visit(const ir::operation::Pool2D &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (_ctx.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
+
+ OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
+}
+
void OperationValidator::visit(const ir::operation::Permute &node)
{
VERBOSE(Permute) << "Configure Permute operation" << std::endl;
num_units == _ctx.at(hidden_state_out_index).shape().dim(1));
}
-void OperationValidator::visit(const ir::operation::Round &node) { checkUnaryOp(node); }
-
void OperationValidator::visit(const ir::operation::SpaceToBatchND &node)
{
const auto ofm_index{node.getOutputs().at(0)};
OP_REQUIRES(input_shape.C * block_size * block_size == output_shape.C);
}
+void OperationValidator::visit(const ir::operation::ElementwiseActivation &node)
+{
+ checkUnaryOp(node);
+}
+
+void OperationValidator::visit(const ir::operation::ElementwiseBinary &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
+
+ OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
+ OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
+}
+
+void OperationValidator::visit(const ir::operation::ElementwiseUnary &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
+
+ OP_REQUIRES(node.getInputs().size() == 1);
+ OP_REQUIRES(node.getOutputs().size() == 1);
+
+ // Check if I/O types match
+ if (node.param().op_type == ir::operation::ElementwiseUnary::Type::DEQUANTIZE)
+ {
+ OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
+ OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::FLOAT32);
+ }
+ else if (node.param().op_type == ir::operation::ElementwiseUnary::Type::QUANTIZE)
+ {
+ OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::FLOAT32);
+ OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
+ }
+ else if (node.param().op_type != ir::operation::ElementwiseUnary::Type::CAST)
+ {
+ OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
+ }
+
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+}
+
void OperationValidator::visit(const ir::operation::EmbeddingLookup &node)
{
const auto output_index{node.getOutputs().at(0)};
}
}
-void OperationValidator::visit(const ir::operation::Exp &node) { checkUnaryOp(node); }
-
void OperationValidator::visit(const ir::operation::ExpandDims &node)
{
const auto output_index{node.getOutputs().at(0)};
OP_REQUIRES(_ctx.at(axis_index).shape().rank() <= 1);
}
-void OperationValidator::visit(const ir::operation::Floor &node) { checkUnaryOp(node); }
-
void OperationValidator::visit(const ir::operation::HashtableLookup &node)
{
const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
OP_REQUIRES(ofm_shape.rank() <= 4);
}
-void OperationValidator::visit(const ir::operation::Dequantize &node)
-{
- const auto output_index{node.getOutputs().at(0)};
-
- const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
- OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
- OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::FLOAT32);
-
- if (_ctx.at(output_index).info().isDynamic())
- return;
- OP_REQUIRES(_ctx.at(input_index).shape().rank() <= 4);
- OP_REQUIRES(_ctx.at(input_index).shape() == _ctx.at(output_index).shape());
-}
-
void OperationValidator::visit(const ir::operation::DepthToSpace &node)
{
// param check
OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
}
-void OperationValidator::visit(const ir::operation::Min &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- // This validator does not check shape. So checking isDynamic() is skipped.
-
- const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
-}
-
-void OperationValidator::visit(const ir::operation::Max &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- // This validator does not check shape. So checking isDynamic() is skipped.
-
- const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
-}
-
void OperationValidator::visit(const ir::operation::Select &node)
{
const auto output_index{node.getOutputs().at(0)};
OP_REQUIRES(_ctx.at(input_index).shape().dim(axis) % num_splits == 0);
}
-void OperationValidator::visit(const ir::operation::Cos &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::Sin &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::RSQRT &node) { checkUnaryOp(node); }
-
void OperationValidator::visit(const ir::operation::Shape &node)
{
const auto output_index{node.getOutputs().at(0)};
// TODO Add to validate with subgraphs
}
-void OperationValidator::visit(const ir::operation::Neg &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::Log &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::LogicalNot &node) { checkUnaryOp(node); }
-
void OperationValidator::visit(const ir::operation::SquaredDifference &node)
{
const auto output_index{node.getOutputs().at(0)};
OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
}
-void OperationValidator::visit(const ir::operation::LogicalOr &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(0)};
- const auto rhs_index{node.getInputs().at(1)};
-
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
-}
-
void OperationValidator::visit(const ir::operation::Range &node)
{
const auto output_index{node.getOutputs().at(0)};
OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
}
-void OperationValidator::visit(const ir::operation::Quantize &node)
-{
- VERBOSE(Quantize) << "Configure Quantize operation" << std::endl;
-
- OP_REQUIRES(node.getInputs().size() == 1);
- OP_REQUIRES(node.getOutputs().size() == 1);
-
- const auto input_index{node.getInputs().at(0)};
- const auto output_index{node.getOutputs().at(0)};
-
- OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::FLOAT32);
-
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
-
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
-}
} // namespace compiler
} // namespace onert
void operator()();
public:
- void visit(const ir::operation::Abs &node) override;
- void visit(const ir::operation::AvgPool2D &node) override;
void visit(const ir::operation::BatchMatMul &node) override;
void visit(const ir::operation::BatchToSpaceND &node) override;
- void visit(const ir::operation::Cast &node) override;
void visit(const ir::operation::Comparison &node) override;
void visit(const ir::operation::Softmax &node) override;
void visit(const ir::operation::InstanceNorm &node) override;
void visit(const ir::operation::Permute &node) override;
+ void visit(const ir::operation::Pool2D &node) override;
void visit(const ir::operation::Reduce &node) override;
void visit(const ir::operation::Transpose &node) override;
void visit(const ir::operation::RNN &node) override;
- void visit(const ir::operation::Round &node) override;
void visit(const ir::operation::SpaceToBatchND &node) override;
void visit(const ir::operation::SpaceToDepth &node) override;
+ void visit(const ir::operation::ElementwiseActivation &node) override;
+ void visit(const ir::operation::ElementwiseBinary &node) override;
+ void visit(const ir::operation::ElementwiseUnary &node) override;
void visit(const ir::operation::EmbeddingLookup &node) override;
- void visit(const ir::operation::Exp &node) override;
void visit(const ir::operation::ExpandDims &node) override;
- void visit(const ir::operation::Floor &node) override;
void visit(const ir::operation::HashtableLookup &node) override;
void visit(const ir::operation::TransposeConv &node) override;
void visit(const ir::operation::Gather &node) override;
- void visit(const ir::operation::Dequantize &node) override;
void visit(const ir::operation::DepthToSpace &node) override;
void visit(const ir::operation::Pack &node) override;
void visit(const ir::operation::LSTM &node) override;
void visit(const ir::operation::L2Normalization &node) override;
void visit(const ir::operation::Unpack &node) override;
void visit(const ir::operation::Pad &node) override;
- void visit(const ir::operation::Min &node) override;
- void visit(const ir::operation::Max &node) override;
void visit(const ir::operation::Select &node) override;
void visit(const ir::operation::StridedSlice &node) override;
void visit(const ir::operation::Split &node) override;
- void visit(const ir::operation::Cos &node) override;
- void visit(const ir::operation::Sin &node) override;
- void visit(const ir::operation::RSQRT &node) override;
void visit(const ir::operation::Shape &node) override;
void visit(const ir::operation::ResizeBilinear &node) override;
void visit(const ir::operation::Reverse &node) override;
void visit(const ir::operation::If &node) override;
void visit(const ir::operation::While &node) override;
- void visit(const ir::operation::Neg &node) override;
- void visit(const ir::operation::Log &node) override;
- void visit(const ir::operation::LogicalNot &node) override;
void visit(const ir::operation::SquaredDifference &node) override;
void visit(const ir::operation::Tile &node) override;
- void visit(const ir::operation::LogicalOr &node) override;
void visit(const ir::operation::Range &node) override;
void visit(const ir::operation::MatrixBandPart &node) override;
void visit(const ir::operation::LogSoftmax &node) override;
- void visit(const ir::operation::Quantize &node) override;
private:
void checkUnaryOp(const ir::Operation &node);
namespace compiler
{
+bool StaticShapeInferer::infer(const ir::OpSequence &op_seq)
+{
+ bool has_dynamic_tensor = false;
+
+ for (const auto &operation_idx : op_seq.operations())
+ {
+ auto &op = _operations.at(operation_idx);
+ auto opcode = op.opcode();
+
+ _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit()
+
+ // IF: need shape inference for then, else
+ // While: need shape inference for condition, body
+ if (opcode == ir::OpCode::If || opcode == ir::OpCode::While)
+ {
+ op.accept(*this);
+ }
+ else
+ {
+ _return_has_dynamic_tensor = checkDynamicInput(op);
+
+ if (_return_has_dynamic_tensor)
+ {
+ setDynamicOutput(op);
+ }
+ else
+ {
+ op.accept(*this);
+ }
+ }
+
+ has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor;
+ }
+
+ return has_dynamic_tensor;
+}
+
+bool StaticShapeInferer::checkDynamicInput(const ir::Operation &op)
+{
+ for (auto input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED)
+ {
+ if (_operands.at(input_idx).info().isDynamic())
+ {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void StaticShapeInferer::setDynamicOutput(const ir::Operation &op)
+{
+ for (auto output_idx : op.getOutputs())
+ {
+ _operands.at(output_idx).info().setDynamic();
+ }
+}
+
void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
const ir::OperandIndex lhs_idx,
const ir::OperandIndex rhs_idx)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (lhs.info().isDynamic() || rhs.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// re-sizing output shape
ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs.info().shape(), rhs.info().shape());
output.info().shape(new_shape);
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// re-sizing output shape
ir::Shape new_shape = input.info().shape();
output.info().shape(new_shape);
}
}
-void StaticShapeInferer::visit(const ir::operation::Abs &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Abs::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Add &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Add::Input::LHS),
- op.getInputs().at(ir::operation::Add::Input::RHS));
-}
-
void StaticShapeInferer::visit(const ir::operation::ArgMax &op)
{
const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
-
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
const auto rank = input.info().shape().rank();
const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
const auto lhs = _operands.at(lhs_index);
const auto rhs = _operands.at(rhs_index);
auto &output = _operands.at(output_index);
-
- if (lhs.info().isDynamic() || rhs.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
auto new_shape = shape_inference::inferBatchMatMulShape(lhs.shape(), rhs.shape(), op.param());
output.info().shape(new_shape);
}
-void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op)
+void StaticShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
{
- const auto input_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS),
+ op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS));
+}
+void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op)
+{
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic, output also becomes dynamic.
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
const auto shape_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::SHAPE)};
const auto &shape = _operands.at(shape_idx);
output.info().shape(new_shape);
}
-void StaticShapeInferer::visit(const ir::operation::Cast &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cast::Input::INPUT));
-}
-
void StaticShapeInferer::visit(const ir::operation::Comparison &op)
{
handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0),
{
const auto input_idx{op.getInputs().at(i)};
const auto &input = _operands.at(input_idx);
-
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
input_shapes.emplace_back(input.shape());
}
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (input.info().isDynamic() || ker.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// re-sizing output shape
ir::Shape new_shape =
shape_inference::inferConv2DShape(input.info().shape(), ker.info().shape(), op.param());
output.info().shape(new_shape);
}
-void StaticShapeInferer::visit(const ir::operation::Cos &op)
+void StaticShapeInferer::visit(const ir::operation::ElementwiseActivation &op)
{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cos::Input::INPUT));
+ handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT));
}
-void StaticShapeInferer::visit(const ir::operation::Div &op)
+void StaticShapeInferer::visit(const ir::operation::ElementwiseBinary &op)
{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Div::Input::LHS),
- op.getInputs().at(ir::operation::Div::Input::RHS));
+ handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS),
+ op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS));
}
-void StaticShapeInferer::visit(const ir::operation::Exp &op)
+void StaticShapeInferer::visit(const ir::operation::ElementwiseUnary &op)
{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Exp::Input::INPUT));
+ handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT));
}
void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
if (!axis.isConstant())
{
output.info().setDynamic();
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
if (!input.isConstant())
{
output.info().setDynamic();
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
-
- // if input or ker is dynamic, output also becomes dynamic
- if (input.info().isDynamic() || ker.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// re-sizing output shape
ir::Shape new_shape =
shape_inference::inferFullyConnectedShape(input.info().shape(), ker.info().shape());
const auto indices_idx{op.getInputs().at(ir::operation::Gather::Input::INDICES)};
const auto &indices = _operands.at(indices_idx);
-
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic() || indices.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
const auto rank = input.info().shape().rank();
const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
}
}
-void StaticShapeInferer::visit(const ir::operation::Log &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Log::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::LogicalNot &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::LogicalNot::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::LogicalOr &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::LogicalOr::Input::INPUT0),
- op.getInputs().at(ir::operation::LogicalOr::Input::INPUT1));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Logistic &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Logistic::Input::INPUT));
-}
-
void StaticShapeInferer::visit(const ir::operation::L2Normalization &op)
{
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::Input::INPUT));
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT));
}
-void StaticShapeInferer::visit(const ir::operation::Max &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Max::Input::LHS),
- op.getInputs().at(ir::operation::Max::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Min &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Min::Input::LHS),
- op.getInputs().at(ir::operation::Min::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Mul &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Mul::Input::LHS),
- op.getInputs().at(ir::operation::Mul::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Neg &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Neg::Input::INPUT));
-}
-
void StaticShapeInferer::visit(const ir::operation::OneHot &op)
{
const auto indice_idx{op.getInputs().at(ir::operation::OneHot::Input::INDICES)};
auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (indice.info().isDynamic() || depth.info().isDynamic() || !depth.isConstant())
+ if (!depth.isConstant())
{
output.info().setDynamic();
_return_has_dynamic_tensor = true;
void StaticShapeInferer::visit(const ir::operation::Pack &op)
{
- bool is_any_of_inputs_dynamic = [&]() -> bool {
- for (uint32_t i = 0; i < op.getInputs().size(); ++i)
- {
- const auto &input = _operands.at(op.getInputs().at(i));
- if (input.info().isDynamic())
- {
- return true;
- }
- }
- return false;
- }();
-
const auto input_idx{op.getInputs().at(0)};
const auto &input = _operands.at(input_idx);
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic, output also becomes dynamic
- if (is_any_of_inputs_dynamic)
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
const auto rank = input.shape().rank() + 1;
const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
const auto num = op.param().num;
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic or pad is dynamic, output also becomes dynamic
- if (input.info().isDynamic() || pad.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// if pad is not constant, output also becomes dynamic
if (!pad.isConstant())
{
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// re-sizing output shape
// Permute is a special operation that layouts of input/output may be different on backend
// However, it is not applied here, so input/output have the same layout of frontend. Because
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if any input is dynamic, output also becomes dynamic
- if (start_op.info().isDynamic() || limit_op.info().isDynamic() || delta_op.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
ir::Shape new_shape;
if (start_op.isConstant() && limit_op.isConstant() && delta_op.isConstant())
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
std::vector<int32_t> axes_vec;
for (size_t i = 0; i < axes.shape().num_elements(); ++i)
{
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// New shape is given by second input tensor
if (op.getInputs().size() == 2)
{
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// Shape inferencing logic based on Params
ir::Shape new_shape = shape_inference::inferResizeBilinearShape(
input.shape(), op.param().height_out, op.param().width_out);
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::Input::INPUT));
}
-void StaticShapeInferer::visit(const ir::operation::Round &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Round::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::RSQRT &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::RSQRT::Input::INPUT));
-}
-
void StaticShapeInferer::visit(const ir::operation::Select &op)
{
const auto input_cond_idx{op.getInputs().at(ir::operation::Select::Input::CONDITION)};
auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (input_cond.info().isDynamic() || input_true.info().isDynamic() ||
- input_false.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// Select output shpae
ir::Shape new_shape = shape_inference::inferSelectShape(
input_cond.info().shape(), input_true.info().shape(), input_false.info().shape());
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// re-sizing output shape
ir::Shape output_shape;
output_shape.append(input.info().shape().rank());
output.info().shape(output_shape);
}
-void StaticShapeInferer::visit(const ir::operation::Sin &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Sin::Input::INPUT));
-}
-
void StaticShapeInferer::visit(const ir::operation::Slice &op)
{
const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)};
const auto output_index = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_index);
- if (input.info().isDynamic() || begins.info().isDynamic() || sizes.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// Whether input is constant or not does not affect whether output is dynamic or not
if (!(begins.isConstant() && sizes.isConstant()))
{
const auto &block_shape = _operands.at(block_shape_idx);
const auto &padding = _operands.at(padding_idx);
- if (input.info().isDynamic() || block_shape.info().isDynamic() || padding.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// Whether input is constant or not does not affect whether output is dynamic or not
if (!(block_shape.isConstant() && padding.isConstant()))
{
const auto axis = op.param().axis;
const auto num_splits = op.param().num_splits;
- if (input.info().isDynamic())
- {
- for (int out_tensor_idx = 0; out_tensor_idx < num_splits; out_tensor_idx++)
- {
- const auto output_idx = op.getOutputs().at(out_tensor_idx);
- ir::Operand &output = _operands.at(output_idx);
- output.info().setDynamic();
- }
- _return_has_dynamic_tensor = true;
- return;
- }
-
const auto rank = input.info().shape().rank();
auto axis_resolved = axis < 0 ? axis + rank : axis;
const auto output_index = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_index);
- if (input.info().isDynamic() || starts.info().isDynamic() || ends.info().isDynamic() ||
- strides.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
if (!(starts.isConstant() && ends.isConstant() && strides.isConstant()))
{
output.info().setDynamic();
output.info().shape(new_shape);
}
-void StaticShapeInferer::visit(const ir::operation::Sub &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Sub::Input::LHS),
- op.getInputs().at(ir::operation::Sub::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Tanh &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Tanh::Input::INPUT));
-}
-
void StaticShapeInferer::visit(const ir::operation::Tile &op)
{
const auto input_idx{op.getInputs().at(ir::operation::Tile::Input::INPUT)};
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
if (!multiplier.isConstant())
{
output.info().setDynamic();
ir::Operand &output = _operands.at(output_idx);
const auto perm{op.param().perm};
// const auto rank{op.param().rank};
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
+
// set output shape, based on input and params
ir::Shape new_shape = shape_inference::inferTransposeShape(input.info().shape(), perm);
output.info().shape(new_shape);
const auto input_idx{op.getInputs().at(0)};
const auto &input = _operands.at(input_idx);
const auto num = op.param().num;
-
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
- {
- const auto output_idx = op.getOutputs().at(out_tensor_idx);
- ir::Operand &output = _operands.at(output_idx);
- output.info().setDynamic();
- }
- _return_has_dynamic_tensor = true;
- return;
- }
-
const auto rank = input.shape().rank();
const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
}
}
-void StaticShapeInferer::visit(const ir::operation::ZerosLike &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ZerosLike::Input::INPUT));
-}
-
} // namespace compiler
} // namespace onert
return _cf_tensor_builder;
}
- std::shared_ptr<backend::ITensor> getITensor(ir::OperandIndex ind)
- {
- for (auto &tensor_builder : _tensor_builders)
- {
- auto tensor = tensor_builder->tensorAt(ind);
- if (tensor)
- return tensor;
- }
- return nullptr;
- }
-
private:
std::unordered_set<std::shared_ptr<backend::ITensorBuilder>> _tensor_builders;
std::shared_ptr<backend::controlflow::TensorBuilder> _cf_tensor_builder;
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_TENSOR_REGISTRIES_H__
+#define __ONERT_COMPILER_TENSOR_REGISTRIES_H__
+
+#include <unordered_set>
+#include <memory>
+#include "backend/BackendContext.h"
+#include "backend/Backend.h"
+#include "backend/controlflow/Config.h"
+#include "backend/controlflow/TensorBuilder.h"
+#include "backend/controlflow/TensorRegistry.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+class TensorRegistries
+{
+public:
+ TensorRegistries() = default;
+
+ TensorRegistries(const onert::backend::BackendContexts &backend_contexts,
+ bool include_controlflow)
+ {
+ for (const auto &e : backend_contexts)
+ {
+ auto tensor_reg = e.second->tensor_registry;
+ if (e.first->config()->id() == backend::controlflow::Config::ID)
+ {
+ _cf_tensor_reg =
+ std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(tensor_reg);
+ if (include_controlflow)
+ _tensor_regs.insert(tensor_reg);
+ }
+ else
+ {
+ _tensor_regs.insert(tensor_reg);
+ }
+ }
+ }
+
+ std::unordered_set<std::shared_ptr<onert::backend::ITensorRegistry>>::const_iterator begin() const
+ {
+ return _tensor_regs.cbegin();
+ }
+ std::unordered_set<std::shared_ptr<onert::backend::ITensorRegistry>>::const_iterator end() const
+ {
+ return _tensor_regs.cend();
+ }
+
+ std::shared_ptr<backend::controlflow::TensorRegistry> getControlflowTensorRegistry() const
+ {
+ return _cf_tensor_reg;
+ }
+
+ std::shared_ptr<backend::ITensor> getITensor(ir::OperandIndex ind) const
+ {
+ for (auto &tensor_reg : _tensor_regs)
+ {
+ auto tensor = tensor_reg->getITensor(ind);
+ if (tensor)
+ return tensor;
+ }
+ return nullptr;
+ }
+
+private:
+ std::unordered_set<std::shared_ptr<backend::ITensorRegistry>> _tensor_regs;
+ std::shared_ptr<backend::controlflow::TensorRegistry> _cf_tensor_reg;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_TENSOR_REGISTRIES_H__
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
-void ConstantInsertionPass::callback(const OperationIndex &node_index, Operation &node)
+void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::Operation &node)
{
const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index);
const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index);
const auto backend = op_seq_lower_info->backend();
const auto layout = op_seq_lower_info->layout();
- const auto factor = operand::PermuteFactor{backend, layout};
+ const auto factor = ir::operand::PermuteFactor{backend, layout};
- for (const auto input : node.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
auto &object = _graph.operands().at(input);
auto new_object = object;
new_object.unsetDef();
// TODO Remove const_case
- const_cast<OperationIndexSet &>(new_object.getUses()).clear();
+ const_cast<ir::OperationIndexSet &>(new_object.getUses()).clear();
const auto new_index = _graph.operands().emplace(new_object);
_replace_operands_map[key] = new_index;
}
}
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
* limitations under the License.
*/
-#ifndef __ONERT_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__
-#define __ONERT_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
+#define __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
#include <ir/operand/PermuteFactor.h>
#include <ir/Index.h>
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
std::string id() final { return "ConstantInsertionPass"; }
public:
- void callback(const OperationIndex &index, Operation &node) final;
+ void callback(const ir::OperationIndex &index, ir::Operation &node) final;
private:
struct ReplaceKey
{
- OperandIndex index;
- operand::PermuteFactor factor;
+ ir::OperandIndex index;
+ ir::operand::PermuteFactor factor;
bool operator==(const ReplaceKey &other) const
{
std::size_t operator()(const ReplaceKey &key) const noexcept
{
using std::hash;
- return hash<OperandIndex>()(key.index) ^ (hash<operand::PermuteFactor>()(key.factor) << 1);
+ return hash<ir::OperandIndex>()(key.index) ^
+ (hash<ir::operand::PermuteFactor>()(key.factor) << 1);
}
};
- std::unordered_map<ReplaceKey, OperandIndex, KeyHasher> _replace_operands_map;
+ std::unordered_map<ReplaceKey, ir::OperandIndex, KeyHasher> _replace_operands_map;
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
-#endif // __ONERT_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__
+#endif // __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
-void ConstantLoweringPass::callback(const OperationIndex &node_index, Operation &node)
+void ConstantLoweringPass::callback(const ir::OperationIndex &node_index, ir::Operation &node)
{
const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index);
const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index);
const auto backend = op_seq_lower_info->backend();
const auto layout = op_seq_lower_info->layout();
- const auto factor = operand::PermuteFactor{backend, layout};
+ const auto factor = ir::operand::PermuteFactor{backend, layout};
// Now this runtime does not support the node making output of operation as constant
- for (const auto input : node.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
auto &object = _graph.operands().at(input);
if (object.isConstant())
{
// All constant operand are already assinged at each backend by ContantInsertionPass. So a
// constant has `def` and `use` as the same PermuteFactor
- _lowered_graph.setLowerInfo(input, std::make_unique<operand::LowerInfo>());
+ _lowered_graph.setLowerInfo(input, std::make_unique<ir::operand::LowerInfo>());
_lowered_graph.getLowerInfo(input)->addDefPermuteFactor(factor);
_lowered_graph.getLowerInfo(input)->addUsePermuteFactor(factor);
}
}
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
* limitations under the License.
*/
-#ifndef __ONERT_GRAPH_PASS_CONSTANT_LOWERING_PASS_H__
-#define __ONERT_GRAPH_PASS_CONSTANT_LOWERING_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_CONSTANT_LOWERING_PASS_H__
+#define __ONERT_COMPILER_PASS_CONSTANT_LOWERING_PASS_H__
#include <ir/Index.h>
#include "LoweredOperationPass.h"
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
std::string id() final { return "ConstantLoweringPass"; }
public:
- void callback(const OperationIndex &index, Operation &node) final;
+ void callback(const ir::OperationIndex &index, ir::Operation &node) final;
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
-#endif // __ONERT_GRAPH_PASS_CONSTANT_LOWERING_PASS_H__
+#endif // __ONERT_COMPILER_PASS_CONSTANT_LOWERING_PASS_H__
#define __ONERT_IR_PASS_LOWERED_OPERAND_PASS_H__
#include "OperandPass.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
class LoweredOperandPass : public OperandPass
{
public:
- LoweredOperandPass(ir::LoweredGraph &lowered_graph)
+ LoweredOperandPass(compiler::LoweredGraph &lowered_graph)
: OperandPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
{
// DO NOTHING
virtual ~LoweredOperandPass() = default;
std::string id() override = 0;
- void callback(const OperandIndex &i, Operand &o) override = 0;
+ void callback(const ir::OperandIndex &i, ir::Operand &o) override = 0;
protected:
- ir::LoweredGraph &_lowered_graph;
+ compiler::LoweredGraph &_lowered_graph;
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
#endif // __ONERT_IR_PASS_LOWERED_OPERAND_PASS_H__
#define __ONERT_IR_PASS_LOWERED_OPERATION_PASS_H__
#include "OperationPass.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
class LoweredOperationPass : public OperationPass
{
public:
- LoweredOperationPass(ir::LoweredGraph &lowered_graph)
+ LoweredOperationPass(LoweredGraph &lowered_graph)
: OperationPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
{
// DO NOTHING
virtual ~LoweredOperationPass() = default;
std::string id() override = 0;
- void callback(const OperationIndex &i, Operation &o) override = 0;
+ void callback(const ir::OperationIndex &i, ir::Operation &o) override = 0;
protected:
- ir::LoweredGraph &_lowered_graph;
+ LoweredGraph &_lowered_graph;
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
#endif // __ONERT_IR_PASS_LOWERED_OPERATION_PASS_H__
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
void OperandPass::run()
{
_graph.operands().iterate(
- [&](const OperandIndex &index, Operand &object) { callback(index, object); });
+ [&](const ir::OperandIndex &index, ir::Operand &object) { callback(index, object); });
}
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
* limitations under the License.
*/
-#ifndef __ONERT_GRAPH_PASS_OPERAND_PASS_H__
-#define __ONERT_GRAPH_PASS_OPERAND_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_OPERAND_PASS_H__
+#define __ONERT_COMPILER_PASS_OPERAND_PASS_H__
#include "Pass.h"
#include "ir/Index.h"
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
public:
std::string id() override = 0;
void run() override final;
- virtual void callback(const OperandIndex &i, Operand &o) = 0;
+ virtual void callback(const ir::OperandIndex &i, ir::Operand &o) = 0;
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
-#endif // __ONERT_GRAPH_PASS_OPERAND_PASS_H__
+#endif // __ONERT_COMPILER_PASS_OPERAND_PASS_H__
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
void OperationPass::run()
{
_graph.operations().iterate(
- [&](const OperationIndex &index, Operation &node) { callback(index, node); });
+ [&](const ir::OperationIndex &index, ir::Operation &node) { callback(index, node); });
}
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
* @brief This file contains OperationPass class
*/
-#ifndef __ONERT_GRAPH_PASS_OPERATION_PASS_H__
-#define __ONERT_GRAPH_PASS_OPERATION_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_OPERATION_PASS_H__
+#define __ONERT_COMPILER_PASS_OPERATION_PASS_H__
#include "Pass.h"
#include "ir/Index.h"
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
* @param index is the index of a node in graph
* @param node is the node in graph
*/
- virtual void callback(const OperationIndex &index, Operation &node) = 0;
+ virtual void callback(const ir::OperationIndex &index, ir::Operation &node) = 0;
/**
* @brief Run the pass
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
-#endif // __ONERT_GRAPH_PASS_OPERATION_PASS_H__
+#endif // __ONERT_COMPILER_PASS_OPERATION_PASS_H__
* limitations under the License.
*/
-#ifndef __ONERT_GRAPH_PASS_PASS_H__
-#define __ONERT_GRAPH_PASS_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_PASS_H__
+#define __ONERT_COMPILER_PASS_PASS_H__
#include <string>
namespace ir
{
class Graph;
-} // namespace ir
+} // namespace compiler
} // namespace onert
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
class Pass
{
public:
- Pass(Graph &graph) : _graph{graph} {}
+ Pass(ir::Graph &graph) : _graph{graph} {}
virtual ~Pass() = default;
public:
virtual void run() = 0;
protected:
- Graph &_graph;
+ ir::Graph &_graph;
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
-#endif // __ONERT_GRAPH_PASS_PASS_H__
+#endif // __ONERT_COMPILER_PASS_PASS_H__
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
-void PermutationEliminationPass::callback(const OperationIndex &ind, Operation &node)
+void PermutationEliminationPass::callback(const ir::OperationIndex &ind, ir::Operation &node)
{
_op_ind = ind;
node.accept(*this);
};
-void PermutationEliminationPass::visit(const operation::Permute &node)
+void PermutationEliminationPass::visit(const ir::operation::Permute &node)
{
auto in_operand = node.getInputs().at(0);
auto out_operand = node.getOutputs().at(0);
- // Check if two tensors are both portable
- // TODO Make this general, this is just a workaround to check two tensors are portable
+ // Check if two tensors are both portable if not, we can't eliminate the node
{
auto in_def_factor = _lowered_graph.getLowerInfo(in_operand)->def_factors().getOnlyElement();
auto out_def_factor = _lowered_graph.getLowerInfo(out_operand)->def_factors().getOnlyElement();
- auto in_backend_id = in_def_factor.backend()->config()->id();
- auto out_backend_id = out_def_factor.backend()->config()->id();
+ auto in_config = in_def_factor.backend()->config();
+ auto out_config = out_def_factor.backend()->config();
- // TODO Fix this workaround that removes only Permute between cpu and controlflow backend.
- // This should be general.
- if (!((in_backend_id == backend::controlflow::Config::ID && out_backend_id == "cpu") ||
- (in_backend_id == "cpu" && out_backend_id == backend::controlflow::Config::ID)))
+ // FIXME Supporting dynamic tensor does not exactly mean those are portable.
+ // It may need to have another config option for checking if each uses `IPortableTensor`.
+ if (!(in_config->supportDynamicTensor() && out_config->supportDynamicTensor()))
return;
}
if (!op_seq.getOutputs().contains(in_operand))
return;
- // Update OpSequence/Operation edges and Operand edges
+ // Update OpSequence/ir::Operation edges and ir::Operand edges
op_seq.replaceOutputs(in_operand, out_operand);
for (auto op : op_seq.operations())
{
});
VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
- VERBOSE(removePermute) << " - Input (removed) Operand : " << in_operand << std::endl;
- VERBOSE(removePermute) << " - Output(kept) Operand : " << out_operand << std::endl;
+ VERBOSE(removePermute) << " - Input (removed) ir::Operand : " << in_operand << std::endl;
+ VERBOSE(removePermute) << " - Output(kept) ir::Operand : " << out_operand << std::endl;
}
else
{
}
VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
- VERBOSE(removePermute) << " - Input (kept) Operand : " << in_operand << std::endl;
- VERBOSE(removePermute) << " - Output(removed) Operand : " << out_operand << std::endl;
+ VERBOSE(removePermute) << " - Input (kept) ir::Operand : " << in_operand << std::endl;
+ VERBOSE(removePermute) << " - Output(removed) ir::Operand : " << out_operand << std::endl;
}
}
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
* limitations under the License.
*/
-#ifndef __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
-#define __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_PERMUTATION_ELIMINATION_PASS_H__
+#define __ONERT_COMPILER_PASS_PERMUTATION_ELIMINATION_PASS_H__
#include "ir/OperationVisitor.h"
#include "LoweredOperationPass.h"
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
* @note This is an optimization pass which means that everything should work fine even if this pass
* was skipped.
*/
-class PermutationEliminationPass : public LoweredOperationPass, public OperationVisitor
+class PermutationEliminationPass : public LoweredOperationPass, public ir::OperationVisitor
{
public:
using LoweredOperationPass::LoweredOperationPass;
std::string id() final { return "PermutationEliminationPass"; }
public:
- void callback(const OperationIndex &i, Operation &n) final;
+ void callback(const ir::OperationIndex &i, ir::Operation &n) final;
private:
- void visit(const operation::Permute &) final;
+ void visit(const ir::operation::Permute &) final;
private:
ir::OperationIndex _op_ind;
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
-#endif // __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
+#endif // __ONERT_COMPILER_PASS_PERMUTATION_ELIMINATION_PASS_H__
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
-void PermutationInsertionPass::callback(const OperandIndex &index, Operand &object)
+void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Operand &object)
{
auto &&operand_li = _lowered_graph.getLowerInfo(index);
assert(operand_li);
return;
}
- std::list<OperationIndex> permute_indexes;
+ std::list<ir::OperationIndex> permute_indexes;
// Build a map for all necessary type of operands
- std::unordered_map<operand::PermuteFactor, OperandIndex> factor_to_index;
+ std::unordered_map<ir::operand::PermuteFactor, ir::OperandIndex> factor_to_index;
{
assert(operand_li->def_factors().size() == 1);
for (auto factor : operand_li->def_factors())
// Update operations' input that uses this operand
{
- std::list<OperationIndex> remove_list;
+ std::list<ir::OperationIndex> remove_list;
auto uses = object.getUses();
for (auto use : uses)
}
}
-OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &operand_index,
- const operand::PermuteFactor &factor)
+ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandIndex &operand_index,
+ const ir::operand::PermuteFactor &factor)
{
assert(!_graph.isBuildingPhase());
auto output_backend = factor.backend();
// NOTE Permute may not have specific layout because the layout of input and output may be
// different.
- const auto permute_node_layout = Layout::UNKNOWN;
+ const auto permute_node_layout = ir::Layout::UNKNOWN;
// NOTE If one backend supports several layout, the backend must support Permute operation
const backend::Backend *permute_node_backend = compiler::BackendManager::get().getControlflow();
if (input_backend == output_backend)
{
permute_node_backend = input_backend;
}
- const operand::PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout};
+ const ir::operand::PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout};
// Update LowerInfo of input operand
auto operand_lower_info = _lowered_graph.getLowerInfo(operand_index);
operand_lower_info->addUsePermuteFactor(permute_node_factor);
// Update LowerInfo of output operand
- auto out_operand_li = std::make_unique<operand::LowerInfo>();
+ auto out_operand_li = std::make_unique<ir::operand::LowerInfo>();
// The input and output factors of all nodes will be the same except Permute. So Tensor's
// allocators allocates memory using only the information of def permutation factor now.
// Insert permute operation to the graph
const auto input_layout = input_factor.layout();
const auto output_layout = factor.layout();
- using Permute = operation::Permute;
+ using Permute = ir::operation::Permute;
const auto permute_type = [&]() {
- if (input_layout == Layout::NHWC && output_layout == Layout::NCHW)
+ if (input_layout == ir::Layout::NHWC && output_layout == ir::Layout::NCHW)
{
return Permute::Type::NHWC_TO_NCHW;
}
- else if (input_layout == Layout::NCHW && output_layout == Layout::NHWC)
+ else if (input_layout == ir::Layout::NCHW && output_layout == ir::Layout::NHWC)
{
return Permute::Type::NCHW_TO_NHWC;
}
auto &op_seq = _lowered_graph.op_seqs().at(op_seq_index);
op_seq.setInputs(node.getInputs());
op_seq.setOutputs(node.getOutputs());
- _lowered_graph.setLowerInfo(op_seq_index, std::make_unique<operation::LowerInfo>(
+ _lowered_graph.setLowerInfo(op_seq_index, std::make_unique<ir::operation::LowerInfo>(
permute_node_backend, permute_node_layout));
}
return node_index;
}
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
* limitations under the License.
*/
-#ifndef __ONERT_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__
-#define __ONERT_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_PERMUTATION_INSERTION_PASS_H__
+#define __ONERT_COMPILER_PASS_PERMUTATION_INSERTION_PASS_H__
#include "LoweredOperandPass.h"
#include "compiler/BackendManager.h"
-#include "ir/Operand.h" //for OperationIndex
+#include "ir/Operand.h"
#include "ir/operand/PermuteFactor.h"
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
public:
std::string id() override { return "PermutationInsertionPass"; }
- void callback(const OperandIndex &index, Operand &object) override;
+ void callback(const ir::OperandIndex &index, ir::Operand &object) override;
private:
/**
* @param operand_index is the target operand index for the insertion
* @param factor is the output operand's backend type and layout
*
- * @return OperationIndex
+ * @return ir::OperationIndex
*/
- OperationIndex insertPermute(const OperandIndex &operand_index,
- const operand::PermuteFactor &factor);
+ ir::OperationIndex insertPermute(const ir::OperandIndex &operand_index,
+ const ir::operand::PermuteFactor &factor);
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
-#endif // __ONERT_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__
+#endif // __ONERT_COMPILER_PASS_PERMUTATION_INSERTION_PASS_H__
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
+using namespace ir;
+
void PermutationOperationPass::callback(const OperationIndex &, Operation &node)
{
node.accept(*this);
"operand used in more than one node");
// TODO remove const_cast later. For example, _ctx may need to be a non const variable or
// a node to extend shape may be inserted in front of this operation
- const_cast<ir::Shape &>(operand.shape()).extendRank(expanded_rank);
+ const_cast<Shape &>(operand.shape()).extendRank(expanded_rank);
}
}
}
const auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index);
_lowered_graph.setLowerInfo(
next_op_seq_index,
- std::make_unique<operation::LowerInfo>(op_seq_li->backend(), op_seq_li->layout()));
+ std::make_unique<ir::operation::LowerInfo>(op_seq_li->backend(), op_seq_li->layout()));
}
}
auto &new_op_seq = _lowered_graph.op_seqs().at(new_op_seq_index);
new_op_seq.setInputs(node.getInputs());
new_op_seq.setOutputs(node.getOutputs());
- _lowered_graph.setLowerInfo(new_op_seq_index,
- std::make_unique<operation::LowerInfo>(backend, frontend_layout));
+ _lowered_graph.setLowerInfo(
+ new_op_seq_index, std::make_unique<ir::operation::LowerInfo>(backend, frontend_layout));
}
// Change PermuteFactors of operands of target node
const auto backend = op_seq_li->backend();
const operand::PermuteFactor removed_factor{backend, backend_layout};
const operand::PermuteFactor new_factor{backend, frontend_layout};
- for (const auto &input : node.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ for (const auto &input : node.getInputs() | Remove::DUPLICATED | Remove::UNDEFINED)
{
bool canRemove = true;
for (const auto &use : _graph.operands().at(input).getUses())
}
}
-void PermutationOperationPass::visit(const operation::Add &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::BinaryArithmetic &node)
+{
+ applyExpandRanks(node);
+}
-void PermutationOperationPass::visit(const operation::Concat &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::Concat &node) { applyExpandRanks(node); }
-void PermutationOperationPass::visit(const operation::Comparison &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::Comparison &node)
+{
+ applyExpandRanks(node);
+}
-void PermutationOperationPass::visit(const operation::Div &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::ElementwiseBinary &node)
+{
+ applyExpandRanks(node);
+}
-void PermutationOperationPass::visit(const operation::FullyConnected &node)
+void PermutationOperationPass::visit(const ir::operation::ElementwiseUnary &node)
{
- const auto &input_ind = node.getInputs().at(operation::FullyConnected::Input::INPUT);
+ applyExpandRanks(node);
+}
+
+void PermutationOperationPass::visit(const ir::operation::FullyConnected &node)
+{
+ const auto &input_ind = node.getInputs().at(ir::operation::FullyConnected::Input::INPUT);
const auto &input_obj = _graph.operands().at(input_ind);
const auto &input_shape = input_obj.shape();
}
}
-void PermutationOperationPass::visit(const operation::Gather &node)
+void PermutationOperationPass::visit(const ir::operation::Gather &node)
{
- const auto &input_ind = node.getInputs().at(operation::Gather::Input::INPUT);
+ const auto &input_ind = node.getInputs().at(ir::operation::Gather::Input::INPUT);
const auto &input_obj = _graph.operands().at(input_ind);
const auto &input_shape = input_obj.shape();
}
}
-void PermutationOperationPass::visit(const operation::LogicalAnd &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::LogicalNot &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::LogicalOr &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Max &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Min &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Mul &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Pack &node)
+void PermutationOperationPass::visit(const ir::operation::Pack &node)
{
- const auto &input_ind = node.getInputs().at(operation::Reshape::Input::INPUT);
+ const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT);
const auto &input_obj = _graph.operands().at(input_ind);
const auto &input_shape = input_obj.shape();
}
}
-void PermutationOperationPass::visit(const operation::PReLU &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::PReLU &node) { applyExpandRanks(node); }
-void PermutationOperationPass::visit(const operation::Reshape &node)
+void PermutationOperationPass::visit(const ir::operation::Reshape &node)
{
- const auto &input_ind = node.getInputs().at(operation::Reshape::Input::INPUT);
+ const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT);
const auto &input_obj = _graph.operands().at(input_ind);
const auto &input_shape = input_obj.shape();
}
}
-void PermutationOperationPass::visit(const operation::SquaredDifference &node)
+void PermutationOperationPass::visit(const ir::operation::SquaredDifference &node)
{
applyExpandRanks(node);
}
-void PermutationOperationPass::visit(const operation::Sub &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Unpack &node)
+void PermutationOperationPass::visit(const ir::operation::Unpack &node)
{
- const auto &input_ind = node.getInputs().at(operation::Reshape::Input::INPUT);
+ const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT);
const auto &input_obj = _graph.operands().at(input_ind);
const auto &input_shape = input_obj.shape();
}
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_PASS_PERMUTATION_OPERATION_PASS_H__
+#define __ONERT_COMPILER_PASS_PERMUTATION_OPERATION_PASS_H__
+
+#include "ir/OperationVisitor.h"
+#include "LoweredOperationPass.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+class PermutationOperationPass : public LoweredOperationPass, public ir::OperationVisitor
+{
+public:
+ using LoweredOperationPass::LoweredOperationPass;
+
+public:
+ std::string id() final { return "PermutationOperationPass"; }
+
+public:
+ void callback(const ir::OperationIndex &i, ir::Operation &n) final;
+
+public:
+ void visit(const ir::operation::BinaryArithmetic &) final;
+ void visit(const ir::operation::Comparison &) final;
+ void visit(const ir::operation::Concat &) final;
+ void visit(const ir::operation::ElementwiseBinary &) final;
+ void visit(const ir::operation::ElementwiseUnary &) final;
+ void visit(const ir::operation::Pack &) final;
+ void visit(const ir::operation::PReLU &) final;
+ void visit(const ir::operation::SquaredDifference &) final;
+ void visit(const ir::operation::Unpack &) final;
+ void visit(const ir::operation::FullyConnected &) final;
+ void visit(const ir::operation::Gather &) final;
+ void visit(const ir::operation::Reshape &) final;
+
+private:
+ void applyExpandRanks(const ir::Operation &);
+ void changeToKeepLayout(const ir::Operation &);
+};
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_PASS_PERMUTATION_OPERATION_PASS_H__
*/
#include "ir/Graph.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
#ifndef __ONERT_DUMPER_DOT_DOT_DUMPER_H__
#define __ONERT_DUMPER_DOT_DOT_DUMPER_H__
: _lowered_graph{nullptr}, _graph(graph), _level{level}
{
}
- DotDumper(const ir::LoweredGraph *lowered_graph, Level level)
+ DotDumper(const compiler::LoweredGraph *lowered_graph, Level level)
: _lowered_graph{lowered_graph}, _graph(_lowered_graph->graph()), _level{level}
{
}
void dump(const std::string &tag);
private:
- const ir::LoweredGraph *_lowered_graph;
+ const compiler::LoweredGraph *_lowered_graph;
const ir::Graph &_graph;
Level _level;
};
}
DataflowExecutor::DataflowExecutor(
- std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map)
- : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_builders},
+ const compiler::TensorRegistries &tensor_regs, backend::TensorManagerSet &&tensor_mgrs,
+ compiler::CodeMap &&code_map)
+ : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
+ std::move(tensor_mgrs)},
_code_map{std::move(code_map)}
{
VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl;
* @param tensor_builders Tensor builders that are currently used
* @param code_map OpSequence and its code map
*/
- DataflowExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map);
+ const compiler::TensorRegistries &tensor_regs,
+ backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map);
void executeImpl() override;
assert(output->buffer() != nullptr);
}
-void DynamicShapeInferer::visit(const ir::operation::Abs &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Abs::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Add &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Add::Input::LHS),
- op.getInputs().at(ir::operation::Add::Input::RHS));
-}
-
void DynamicShapeInferer::visit(const ir::operation::ArgMax &op)
{
const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
dynamicTensorManagerOf(output)->applyShape(output_index, new_shape);
}
+void DynamicShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
+{
+ handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS),
+ op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS));
+}
+
void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op)
{
auto output_ind = op.getOutputs().at(0);
assert(output->buffer() != nullptr);
}
-void DynamicShapeInferer::visit(const ir::operation::Cast &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cast::INPUT));
-}
-
void DynamicShapeInferer::visit(const ir::operation::Comparison &op)
{
handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0),
assert(output->buffer() != nullptr);
}
-void DynamicShapeInferer::visit(const ir::operation::Cos &op)
+void DynamicShapeInferer::visit(const ir::operation::ElementwiseActivation &op)
{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cos::Input::INPUT));
+ handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::INPUT));
}
-void DynamicShapeInferer::visit(const ir::operation::Div &op)
+void DynamicShapeInferer::visit(const ir::operation::ElementwiseBinary &op)
{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Div::Input::LHS),
- op.getInputs().at(ir::operation::Div::Input::RHS));
+ handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS),
+ op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS));
}
-void DynamicShapeInferer::visit(const ir::operation::Exp &op)
+void DynamicShapeInferer::visit(const ir::operation::ElementwiseUnary &op)
{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Exp::Input::INPUT));
+ handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT));
}
void DynamicShapeInferer::visit(const ir::operation::ExpandDims &op)
assert(output->buffer() != nullptr);
}
-void DynamicShapeInferer::visit(const ir::operation::Log &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Log::Input::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::LogicalNot &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::LogicalNot::Input::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::LogicalOr &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::LogicalOr::Input::INPUT0),
- op.getInputs().at(ir::operation::LogicalOr::Input::INPUT1));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Logistic &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Logistic::INPUT));
-}
-
void DynamicShapeInferer::visit(const ir::operation::L2Normalization &op)
{
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::INPUT));
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::INPUT));
}
-void DynamicShapeInferer::visit(const ir::operation::Max &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Max::Input::LHS),
- op.getInputs().at(ir::operation::Max::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Min &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Min::Input::LHS),
- op.getInputs().at(ir::operation::Min::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Mul &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Mul::Input::LHS),
- op.getInputs().at(ir::operation::Mul::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Neg &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Neg::Input::INPUT));
-}
-
void DynamicShapeInferer::visit(const ir::operation::OneHot &op)
{
auto output_ind = op.getOutputs().at(0);
if (output_shape != output->getShape() || output->buffer() == nullptr)
{
// change on output shape
- _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
}
assert(output->buffer() != nullptr);
}
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::INPUT));
}
-void DynamicShapeInferer::visit(const ir::operation::Round &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Round::Input::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::RSQRT &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::RSQRT::INPUT));
-}
-
void DynamicShapeInferer::visit(const ir::operation::Select &op)
{
const auto input_cond_idx = op.getInputs().at(ir::operation::Select::Input::CONDITION);
assert(output->buffer() != nullptr);
}
-void DynamicShapeInferer::visit(const ir::operation::Sin &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Sin::Input::INPUT));
-}
-
void DynamicShapeInferer::visit(const ir::operation::Slice &op)
{
const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)};
assert(output->buffer() != nullptr);
}
-void DynamicShapeInferer::visit(const ir::operation::Sub &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Sub::Input::LHS),
- op.getInputs().at(ir::operation::Sub::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Tanh &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Tanh::INPUT));
-}
-
void DynamicShapeInferer::visit(const ir::operation::Tile &op)
{
auto output_ind = op.getOutputs().at(0);
}
}
-void DynamicShapeInferer::visit(const ir::operation::ZerosLike &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ZerosLike::INPUT));
-}
-
} // namespace exec
} // namespace onert
if (_io_desc.inputs.at(index.value()) != 0)
throw std::runtime_error("Error in calling order");
- _io_desc.input_shape_signature[index] = new_shape;
+ // This will be used later to set input tensor dynamic
+ // Note that 'compiled' model will not be updated with new_shape
+ // but new_shape will change model input shape while 'running' the model
+ _io_desc.dynamic_input_shapes[index] = new_shape;
}
// TODO Remove default parameter
// if input_shape_sig is set, input_shape_sig overrides shape in info
// note: input_shape_sig contains shape passed by nnfw_set_input_tensorinfo()
{
- auto input_shape_sig = _io_desc.input_shape_signature.find(index);
- auto size_required = (input_shape_sig != _io_desc.input_shape_signature.end())
+ auto input_shape_sig = _io_desc.dynamic_input_shapes.find(index);
+ auto size_required = (input_shape_sig != _io_desc.dynamic_input_shapes.end())
? input_shape_sig->second.num_elements() *
onert::ir::sizeOfDataType(info.typeInfo().type())
: info.total_size();
ir::Shape Execution::getInputShape(ir::IOIndex ind) const
{
- auto itr = _io_desc.input_shape_signature.find(ind);
- if (itr == _io_desc.input_shape_signature.end())
+ auto itr = _io_desc.dynamic_input_shapes.find(ind);
+ if (itr == _io_desc.dynamic_input_shapes.end())
{
auto operand_idx = primary_subgraph().getInputs().at(ind.value());
return primary_subgraph().operands().at(operand_idx).shape();
namespace exec
{
-ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
+ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorBuilders &tensor_builders)
+ const compiler::TensorRegistries &tensor_regs,
+ backend::TensorManagerSet &&tensor_mgrs)
: _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()},
- _input_tensors{input_tensors}, _output_tensors{output_tensors}, _mutex()
+ _input_tensors{input_tensors}, _output_tensors{output_tensors},
+ _tensor_mgrs{std::move(tensor_mgrs)}, _mutex()
{
// TODO Fix the way of knowing whether it is primary or not
bool primary_executor = !(_input_tensors.empty() && _output_tensors.empty());
std::vector<std::shared_ptr<backend::ITensor>> list;
for (auto ind : ind_seq)
{
- std::shared_ptr<backend::ITensor> tensor;
- for (auto &tensor_builder : tensor_builders)
- {
- auto tensor_registry = tensor_builder->tensorRegistry();
- assert(tensor_registry);
- tensor = tensor_registry->getNativeITensor(ind);
- if (tensor != nullptr)
- {
- if (tensor_builder->supportDynamicTensor())
- {
- DynAllocInfo dyn_alloc_info{ind, tensor_builder->dynamicTensorManager()};
- _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
- }
- break;
- }
- }
+ std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind);
assert(tensor != nullptr);
+ DynAllocInfo dyn_alloc_info{ind};
+ _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
list.push_back(tensor);
}
return list;
std::vector<std::shared_ptr<backend::ITensor>> list;
for (auto ind : ind_seq)
{
- std::shared_ptr<backend::ITensor> tensor;
- for (auto &tensor_builder : tensor_builders)
- {
- auto tensor_registry = tensor_builder->tensorRegistry();
- assert(tensor_registry);
- tensor = tensor_registry->getNativeITensor(ind);
- if (tensor != nullptr)
- {
- if (tensor_builder->supportDynamicTensor())
- {
- DynAllocInfo dyn_alloc_info{ind, tensor_builder->dynamicTensorManager()};
- _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
- }
- break;
- }
- }
+ std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind);
assert(tensor != nullptr);
+ DynAllocInfo dyn_alloc_info{ind};
+ _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
list.push_back(tensor);
}
return list;
}
else
{
- // If primary graph, all the inputs and outputs belong to controlflow backend
- auto cf_dyn_tensor_builder = tensor_builders.getControlflowTensorBuilder();
- assert(cf_dyn_tensor_builder);
-
assert(input_tensors.size() == _graph.getInputs().size());
assert(output_tensors.size() == _graph.getOutputs().size());
for (uint32_t i = 0; i < input_tensors.size(); i++)
{
auto tensor = input_tensors[i];
auto ind = _graph.getInputs().at(i);
- DynAllocInfo dyn_alloc_info{ind, cf_dyn_tensor_builder->dynamicTensorManager()};
+ DynAllocInfo dyn_alloc_info{ind};
_input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
}
for (uint32_t i = 0; i < output_tensors.size(); i++)
{
auto tensor = output_tensors[i];
auto ind = _graph.getOutputs().at(i);
- DynAllocInfo dyn_alloc_info{ind, cf_dyn_tensor_builder->dynamicTensorManager()};
+ DynAllocInfo dyn_alloc_info{ind};
_output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
}
}
-
- // Prepare each TensorManager on each backend
- for (auto &tensor_builder : tensor_builders)
- {
- auto s_tensor_manager = tensor_builder->releaseStaticTensorManager();
- if (s_tensor_manager != nullptr)
- _tensor_mgrs.insert(std::move(s_tensor_manager));
-
- if (tensor_builder->supportDynamicTensor())
- {
- auto d_tensor_manager = tensor_builder->releaseDynamicTensorManager();
- if (d_tensor_manager != nullptr)
- _tensor_mgrs.insert(std::move(d_tensor_manager));
- }
- }
}
void ExecutorBase::execute(const std::vector<std::shared_ptr<backend::ITensor>> &src_tensors,
// TODO Remove dynamic_cast
auto tensor = std::dynamic_pointer_cast<backend::controlflow::UserTensor>(_input_tensors[i]);
assert(tensor);
- auto input_shape = desc.input_shape_signature.find(ir::IOIndex{i});
- if (input_shape != desc.input_shape_signature.end())
+ auto input_shape = desc.dynamic_input_shapes.find(ir::IOIndex{i});
+ if (input_shape != desc.dynamic_input_shapes.end())
{
tensor->set_dynamic();
tensor->setShape(input_shape->second);
*/
void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescription &desc)
{
- auto shape_sig_found = desc.input_shape_signature.find(io_ind);
- if (shape_sig_found != desc.input_shape_signature.end())
+ auto shape_sig_found = desc.dynamic_input_shapes.find(io_ind);
+ if (shape_sig_found != desc.dynamic_input_shapes.end())
{
auto dyn_alloc_info = _input_to_dyn_alloc_info.find(_input_tensors[io_ind.value()]);
if (dyn_alloc_info == _input_to_dyn_alloc_info.end())
auto changed_input_shape = shape_sig_found->second;
auto operand_ind = dyn_alloc_info->second.ind;
- dyn_alloc_info->second.dyn_tensor_manager->applyShape(operand_ind, changed_input_shape);
+ auto dyn_tensor_manager = _input_tensors[io_ind.value()]->dynamic_tensor_manager();
+ assert(dyn_tensor_manager);
+ dyn_tensor_manager->applyShape(operand_ind, changed_input_shape);
}
}
#include "Sink.h"
#include "ShapeConverter.h"
#include "exec/IExecutor.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
#include "ir/LowerInfoMap.h"
#include "backend/IConfig.h"
#include "backend/Backend.h"
#include "exec/IFunction.h"
#include "backend/IDynamicTensorManager.h"
#include "backend/ITensorManager.h"
-#include "backend/ITensorBuilder.h"
#include "exec/ExecutionObservee.h"
-#include "compiler/TensorBuilders.h"
+#include "compiler/TensorRegistries.h"
#include <list>
namespace onert
* @param graph Graph object
* @param tensor_builders Tensor builders that are currently used
*/
- ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
+ ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorBuilders &tensor_builders);
+ const compiler::TensorRegistries &tensor_regs,
+ backend::TensorManagerSet &&tensor_mgrs);
virtual ~ExecutorBase() = default;
protected:
ExecutionObservee _subject;
std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
- std::unique_ptr<ir::LoweredGraph> _lowered_graph;
+ std::unique_ptr<compiler::LoweredGraph> _lowered_graph;
const ir::Graph &_graph;
std::vector<std::shared_ptr<backend::ITensor>> _input_tensors;
std::vector<std::shared_ptr<backend::ITensor>> _output_tensors;
void FunctionSequence::run()
{
- if (_enable_dynamic_shape_inferer)
+ // TODO Find out when `_enable_dynamic_shape_inferer` is true but `_dynamic_tensor_ctx` is false
+ if (_enable_dynamic_shape_inferer && _dynamic_tensor_ctx)
{
if (_dynamic_tensor_ctx->op_seq->size() != _functions.size())
throw std::runtime_error("operation and functions should be mapped one by one");
* @param tensor_builders Tensor builders that are currently used
* @param code_map OpSequence and its code map
*/
- LinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ LinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map,
+ const compiler::TensorRegistries &tensor_regs,
+ backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map,
const std::vector<ir::OpSequenceIndex> &order)
- : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_builders}
+ : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
+ std::move(tensor_mgrs)}
{
for (auto index : order)
{
}
ParallelExecutor::ParallelExecutor(
- std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map)
- : DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_builders,
- std::move(code_map)}
+ const compiler::TensorRegistries &tensor_regs, backend::TensorManagerSet &&tensor_mgrs,
+ compiler::CodeMap &&code_map)
+ : DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
+ std::move(tensor_mgrs), std::move(code_map)}
{
VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl;
}
* @param tensor_builders Tensor builders that are currently used
* @param code_map OpSequence and its code map
*/
- ParallelExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map);
+ const compiler::TensorRegistries &tensor_regs,
+ backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map);
void executeImpl() override;
namespace nchw
{
-template <typename T> class Reader final : public feature::Reader<T>
+template <typename T> class Reader : public feature::Reader<T>
{
public:
// Construct for buffer of model inputs
}
public:
- T at(uint32_t ch, uint32_t row, uint32_t col) const override
+ T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const final
{
- const auto offset = feature_index_to_byte_offset(0, ch, row, col);
-
- const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
- return *ptr;
+ return getRef(batch, ch, row, col);
}
- T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
+ T at(uint32_t ch, uint32_t row, uint32_t col) const final { return getRef(0, ch, row, col); }
+
+protected:
+ const T &getRef(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const
{
const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
#ifndef __ONERT_EXEC_FEATURE_NCHW_VIEW_H__
#define __ONERT_EXEC_FEATURE_NCHW_VIEW_H__
-#include "../Reader.h"
+#include "Reader.h"
#include "backend/ITensor.h"
#include "ir/Shape.h"
namespace nchw
{
-template <typename T> class View final : public feature::Reader<T>
+template <typename T> class View final : public Reader<T>
{
public:
// Construct for buffer of model inputs
- View(const ir::FeatureShape &shape, T *ptr, size_t len)
- : _shape{shape}, _ptr{reinterpret_cast<uint8_t *>(ptr)}, _len{len}
+ View(const ir::FeatureShape &shape, T *ptr, size_t len) : Reader<T>{shape, ptr, len}
{
- assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
-
- _strides.W = sizeof(T);
- _strides.H = shape.W * sizeof(T);
- _strides.C = shape.W * shape.H * sizeof(T);
- _strides.N = shape.W * shape.H * shape.C * sizeof(T);
+ // DO NOTHING
}
// Construct for backend tensor
- View(::onert::backend::ITensor *tensor)
- : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
- {
- assert(tensor->layout() == ir::Layout::NCHW);
-
- const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
- _strides.W = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
- _strides.H = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
- _strides.C = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
- _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
- _shape.W = tensor->dimension(3);
- _shape.H = tensor->dimension(2);
- _shape.C = tensor->dimension(1);
- _shape.N = tensor->dimension(0);
- }
-
-public:
- T at(uint32_t ch, uint32_t row, uint32_t col) const override
+ View(::onert::backend::ITensor *tensor) : Reader<T>{tensor}
{
- const auto offset = feature_index_to_byte_offset(0, ch, row, col);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
- }
- T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
- {
- const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
+ // DO NOTHING
}
public:
- T &at(uint32_t ch, uint32_t row, uint32_t col)
- {
- const auto offset = feature_index_to_byte_offset(0, ch, row, col);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
- }
+ using Reader<T>::at;
T &at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col)
{
- const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
+ return const_cast<T &>(Reader<T>::getRef(batch, ch, row, col));
}
-
-private:
- size_t feature_index_to_byte_offset(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const
+ T &at(uint32_t ch, uint32_t row, uint32_t col)
{
- assert(1u * _shape.N > batch); // shape.N > batch
- assert(1u * _shape.C > ch); // shape.C > ch
- assert(1u * _shape.H > row); // shape.H > row
- assert(1u * _shape.W > col); // shape.W > col
-
- uint32_t res = 0;
- res += batch * _strides.N;
- res += ch * _strides.C;
- res += row * _strides.H;
- res += col * _strides.W;
-
- return res;
+ return const_cast<T &>(Reader<T>::getRef(0, ch, row, col));
}
-
-private:
- // TODO Remove _shape
- ir::FeatureShape _shape;
- using Strides = ir::FeatureShape;
- Strides _strides;
- uint8_t *_ptr;
- size_t _len;
};
} // namespace nchw
namespace nhwc
{
-template <typename T> class Reader final : public feature::Reader<T>
+template <typename T> class Reader : public feature::Reader<T>
{
public:
// Construct for buffer of model inputs
}
public:
- T at(uint32_t row, uint32_t col, uint32_t ch) const override
+ T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const final
{
- const auto offset = feature_index_to_byte_offset(0, row, col, ch);
-
- const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
- return *ptr;
+ return getRef(batch, row, col, ch);
}
- T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const override
+ T at(uint32_t row, uint32_t col, uint32_t ch) const final { return getRef(0, row, col, ch); }
+
+protected:
+ const T &getRef(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const
{
const auto offset = feature_index_to_byte_offset(batch, row, col, ch);
namespace nhwc
{
-template <typename T> class View final : public feature::Reader<T>
+template <typename T> class View final : public Reader<T>
{
public:
// Construct for buffer of model inputs
- View(const ir::FeatureShape &shape, T *ptr, size_t len)
- : _shape{shape}, _ptr{reinterpret_cast<uint8_t *>(ptr)}, _len{len}
+ View(const ir::FeatureShape &shape, T *ptr, size_t len) : Reader<T>{shape, ptr, len}
{
- UNUSED_RELEASE(len); // Workaround for unused variable in release mode
- assert(shape.N * shape.H * shape.W * shape.C * sizeof(T) == len);
-
- // No padding
- _strides.C = sizeof(T);
- _strides.W = shape.C * sizeof(T);
- _strides.H = shape.C * shape.W * sizeof(T);
- _strides.N = shape.C * shape.W * shape.H * sizeof(T);
+ // DO NOTHING
}
// Construct for backend tensor
- View(backend::ITensor *tensor)
- : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
+ View(backend::ITensor *tensor) : Reader<T>{tensor}
{
- assert(tensor->layout() == ir::Layout::NHWC);
-
- const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
- _strides.C = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
- _strides.W = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
- _strides.H = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
- _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
- _shape.C = tensor->dimension(3);
- _shape.W = tensor->dimension(2);
- _shape.H = tensor->dimension(1);
- _shape.N = tensor->dimension(0);
+ // DO NOTHING
}
public:
- T at(uint32_t row, uint32_t col, uint32_t ch) const override
- {
- const auto offset = feature_index_to_byte_offset(0, row, col, ch);
-
- const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
- return *ptr;
- }
- T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const override
- {
- const auto offset = feature_index_to_byte_offset(batch, row, col, ch);
-
- const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
- return *ptr;
- }
-
- T &at(uint32_t row, uint32_t col, uint32_t ch)
- {
- const auto offset = feature_index_to_byte_offset(0, row, col, ch);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
- }
-
+ using Reader<T>::at;
T &at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch)
{
- const auto offset = feature_index_to_byte_offset(batch, row, col, ch);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
+ return const_cast<T &>(Reader<T>::getRef(batch, row, col, ch));
}
-
-private:
- size_t feature_index_to_byte_offset(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const
+ T &at(uint32_t row, uint32_t col, uint32_t ch)
{
- assert(1u * _shape.N > batch); // shape.N > batch
- assert(1u * _shape.H > row); // shape.H > row
- assert(1u * _shape.W > col); // shape.W > col
- assert(1u * _shape.C > ch); // shape.C > ch
-
- uint32_t res = 0;
- res += batch * _strides.N;
- res += row * _strides.H;
- res += col * _strides.W;
- res += ch * _strides.C;
-
- return res;
+ return const_cast<T &>(Reader<T>::getRef(0, row, col, ch));
}
-
-private:
- // TODO Remove _shape
- ir::FeatureShape _shape;
- using Strides = ir::FeatureShape;
- Strides _strides;
- uint8_t *_ptr;
- size_t _len;
};
} // namespace nhwc
//
// Same list with Operations.lst
// Make comment out if operation is not supported in interpreter
-INTERP_OP(Add)
-INTERP_OP(Sub)
+INTERP_OP(BinaryArithmetic)
//INTERP_OP(BatchToSpaceND)
//INTERP_OP(Cast)
INTERP_OP(Conv2D)
INTERP_OP(DepthwiseConv2D)
-INTERP_OP(AvgPool2D)
-INTERP_OP(MaxPool2D)
+INTERP_OP(Pool2D)
INTERP_OP(Concat)
INTERP_OP(FullyConnected)
//INTERP_OP(Reduce)
INTERP_OP(Reshape)
-INTERP_OP(Mul)
INTERP_OP(Softmax)
//INTERP_OP(Squeeze)
//INTERP_OP(Slice)
//INTERP_OP(StridedSlice)
-INTERP_OP(Tanh)
-INTERP_OP(Logistic)
-//INTERP_OP(Div)
+INTERP_OP(ElementwiseActivation)
//INTERP_OP(Transpose)
//INTERP_OP(Exp)
//INTERP_OP(Comparison)
-//INTERP_OP(LogicalAnd)
-//INTERP_OP(LogicalOr)
//INTERP_OP(LogicalNot)
//INTERP_OP(LSTM)
//INTERP_OP(RSQRT)
-INTERP_OP(ReLU)
//INTERP_OP(ResizeBilinear)
-INTERP_OP(ReLU1)
-INTERP_OP(ReLU6)
//INTERP_OP(RNN)
//INTERP_OP(Floor)
//INTERP_OP(SpaceToBatchND)
//INTERP_OP(SpaceToDepth)
-//INTERP_OP(L2Pool2D)
//INTERP_OP(EmbeddingLookup)
//INTERP_OP(L2Normalization)
//INTERP_OP(HashtableLookup)
INTERP_OP(Pad)
//INTERP_OP(Custom)
//INTERP_OP(Permute)
-//INTERP_OP(Min)
-//INTERP_OP(Max)
//INTERP_OP(OneHot)
#include "OperationUtil.h"
#include "interp/Registration.h"
-#include "ir/operation/Add.h"
-#include "ir/operation/Sub.h"
-#include "ir/operation/Mul.h"
+#include "ir/operation/BinaryArithmetic.h"
#include "misc/polymorphic_downcast.h"
#include "cker/Types.h"
MUL
};
-template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation &node)
+void prepare(ExecEnv *env, const ir::Operation &node)
{
- const auto &add_node = nnfw::misc::polymorphic_downcast<const node_type &>(node);
+ const auto &arithmetic_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
- const auto lhs_index = node.getInputs().at(add_node.LHS);
- const auto rhs_index = node.getInputs().at(add_node.RHS);
+ const auto lhs_index = node.getInputs().at(arithmetic_node.LHS);
+ const auto rhs_index = node.getInputs().at(arithmetic_node.RHS);
const auto out_index = node.getOutputs().at(0);
const auto lhs_tensor = env->tensorAt(lhs_index);
// TODO Util function to compare TensorInfo
if (lhs_tensor->data_type() != rhs_tensor->data_type())
{
- throw std::runtime_error{"Interp(Add): Different input types"};
+ throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Different input types"};
}
bool try_broadcast = (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape());
rhs_tensor->tensorInfo().shape(), success);
if (!success)
{
- throw std::runtime_error{"Interp(Add): Fail to brodcasting"};
+ throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Fail to brodcasting"};
}
auto output_info =
// TODO Util function to compare TensorInfo
if (lhs_tensor->data_type() != out_tensor->data_type())
{
- throw std::runtime_error{"Interp(Add): Invalid output type"};
+ throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Invalid output type"};
}
}
params->quantized_activation_max = max;
}
-template <typename raw_type, typename param_type, OpType op_type>
+template <typename raw_type, OpType op_type>
void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor *out_tensor,
- const param_type ¶m)
+ const ir::operation::BinaryArithmetic::Param ¶m)
{
const auto lhs_buffer = lhs_tensor->bufferRO();
const auto rhs_buffer = rhs_tensor->bufferRO();
out_shape, out_ptr);
}
-template <typename node_type, typename param_type, OpType op_type>
-void invokeAdd(const ExecEnv *env, const ir::Operation &node)
+template <OpType op_type>
+void invokeBinaryArithmetic(const ExecEnv *env, const ir::operation::BinaryArithmetic &node)
{
- const auto &arithmetic_node = nnfw::misc::polymorphic_downcast<const node_type &>(node);
-
- const auto lhs_index = node.getInputs().at(arithmetic_node.LHS);
- const auto rhs_index = node.getInputs().at(arithmetic_node.RHS);
+ const auto lhs_index = node.getInputs().at(node.LHS);
+ const auto rhs_index = node.getInputs().at(node.RHS);
const auto out_index = node.getOutputs().at(0);
const auto lhs_tensor = env->tensorAt(lhs_index);
const auto rhs_tensor = env->tensorAt(rhs_index);
if (data_type == ir::DataType::INT32)
{
- invoke<int32_t, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor,
- arithmetic_node.param());
+ invoke<int32_t, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param());
}
else if (data_type == ir::DataType::FLOAT32)
{
- invoke<float, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor, arithmetic_node.param());
+ invoke<float, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param());
}
else
{
throw std::runtime_error{"NYI: Unsupported data type"};
}
}
-} // namespace
-OpKernel *getAdd()
+void invokeBinaryArithmeticOps(const ExecEnv *env, const ir::Operation &node)
{
- static OpKernel kernel = {prepareAdd<ir::operation::Add>,
- invokeAdd<ir::operation::Add, ir::operation::Add::Param, OpType::ADD>};
- return &kernel;
-}
+ const auto &arithmetic_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
-OpKernel *getSub()
-{
- static OpKernel kernel = {prepareAdd<ir::operation::Sub>,
- invokeAdd<ir::operation::Sub, ir::operation::Sub::Param, OpType::SUB>};
- return &kernel;
+ switch (arithmetic_node.param().arithmetic_type)
+ {
+ case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+ invokeBinaryArithmetic<OpType::ADD>(env, arithmetic_node);
+ break;
+ case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+ invokeBinaryArithmetic<OpType::SUB>(env, arithmetic_node);
+ break;
+ case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+ invokeBinaryArithmetic<OpType::MUL>(env, arithmetic_node);
+ break;
+ default:
+ throw std::runtime_error{"Interp(BinaryArithmetic): NYI unsupported operation " +
+ arithmetic_node.name()};
+ break;
+ }
}
-OpKernel *getMul()
+} // namespace
+
+OpKernel *getBinaryArithmetic()
{
- static OpKernel kernel = {prepareAdd<ir::operation::Mul>,
- invokeAdd<ir::operation::Mul, ir::operation::Mul::Param, OpType::MUL>};
+ static OpKernel kernel = {prepare, invokeBinaryArithmeticOps};
return &kernel;
}
#include "interp/Registration.h"
-#include "ir/operation/ReLU.h"
-#include "ir/operation/ReLU1.h"
-#include "ir/operation/ReLU6.h"
-#include "ir/operation/Tanh.h"
+#include "ir/operation/ElementwiseActivation.h"
+
+#include <misc/polymorphic_downcast.h>
+#include <cker/operation/Logistic.h>
+#include <cker/operation/Tanh.h>
namespace onert
{
enum class ActivationType
{
+ Logistic,
ReLU,
- ReLU1,
- ReLU6,
Tanh
};
// TODO Util function to compare TensorInfo
if (input_tensor->data_type() != output_tensor->data_type())
{
- throw std::runtime_error{"Interp(Activations): Invalid output type"};
+ throw std::runtime_error{"Interp(ElementwiseActivation): Invalid output type"};
}
}
template <ActivationType act_type>
-void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements)
+void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements, float alpha,
+ float beta)
{
std::function<float(const float &)> fn = [](const float &) { return std::nanf(""); };
switch (act_type)
{
case ActivationType::ReLU:
- fn = [](const float &in) { return std::max(0.f, in); };
- break;
- case ActivationType::ReLU1:
- fn = [](const float &in) { return std::min(std::max(-1.f, in), 1.f); };
- break;
- case ActivationType::ReLU6:
- fn = [](const float &in) { return std::min(std::max(0.f, in), 6.f); };
+ fn = [alpha, beta](const float &in) { return std::min(std::max(beta, in), alpha); };
break;
case ActivationType::Tanh:
fn = [](const float &in) { return std::tanh(in); };
break;
default:
- throw std::runtime_error{"Interp(Activations): NYI - Unsupported activation"};
+ throw std::runtime_error{"Interp(ElementwiseActivation): NYI - Unsupported activation"};
break;
}
uint64_t elements = input_tensor->num_elements();
const float *input_start = reinterpret_cast<const float *>(input_tensor->bufferRO());
float *out = reinterpret_cast<float *>(output_tensor->buffer());
-
- evalFloat<act_type>(input_start, out, elements);
+ if (act_type == ActivationType::Logistic)
+ {
+ const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
+ const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
+ nnfw::cker::Logistic(cker_input_shape, input_start, cker_output_shape, out);
+ }
+ else
+ {
+ const auto &act_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
+ evalFloat<act_type>(input_start, out, elements, act_node.param().alpha,
+ act_node.param().beta);
+ }
}
else
{
- throw std::runtime_error{"Interp(ReLU6): NYI - Support float only"};
+ throw std::runtime_error{"Interp(" + node.name() + "): NYI - Support float only"};
}
}
-} // namespace
-
-OpKernel *getReLU()
+void invokeElementwiseActivation(const ExecEnv *env, const ir::Operation &node)
{
- static OpKernel kernel = {prepare, invoke<ActivationType::ReLU>};
- return &kernel;
-}
-
-OpKernel *getReLU1()
-{
- static OpKernel kernel = {prepare, invoke<ActivationType::ReLU1>};
- return &kernel;
+ const auto &act_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
+ switch (act_node.param().op_type)
+ {
+ case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+ invoke<ActivationType::Logistic>(env, node);
+ break;
+ case ir::operation::ElementwiseActivation::Type::RELU:
+ invoke<ActivationType::ReLU>(env, node);
+ break;
+ case ir::operation::ElementwiseActivation::Type::TANH:
+ invoke<ActivationType::Tanh>(env, node);
+ break;
+ default:
+ throw std::runtime_error("Interp(" + node.name() + "): NYI - Unsupported activation");
+ }
}
-OpKernel *getReLU6()
-{
- static OpKernel kernel = {prepare, invoke<ActivationType::ReLU6>};
- return &kernel;
-}
+} // namespace
-OpKernel *getTanh()
+OpKernel *getElementwiseActivation()
{
- static OpKernel kernel = {prepare, invoke<ActivationType::Tanh>};
+ static OpKernel kernel = {prepare, invokeElementwiseActivation};
return &kernel;
}
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/Logistic.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/Logistic.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareLogistic(ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(0);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
-
- const auto output_info = env->graph().operands().at(output_index).info();
-
- // Check shape and type lhs is same with rhs
- // TODO Util function to compare TensorInfo
- if (output_info.total_size() == 0)
- {
- throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"};
- }
- else
- {
- env->allocateIfNeeded(output_index, output_info);
- }
-
- const auto output_tensor = env->tensorAt(output_index);
- if (input_tensor->data_type() != output_tensor->data_type())
- {
- throw std::runtime_error{"Interp(Logistic): Invalid output type"};
- }
-}
-
-void invoke(const ITensor *input_tensor, const ITensor *output_tensor)
-{
- const auto input_buffer = input_tensor->bufferRO();
- auto output_buffer = output_tensor->buffer();
-
- const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
- const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
- const float *input_ptr = reinterpret_cast<const float *>(input_buffer);
- float *output_ptr = reinterpret_cast<float *>(output_buffer);
-
- nnfw::cker::Logistic(cker_input_shape, input_ptr, cker_output_shape, output_ptr);
-}
-
-void invokeLogistic(const ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(0);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
- const auto output_tensor = env->tensorAt(output_index);
-
- const auto data_type = input_tensor->data_type();
-
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(input_tensor, output_tensor);
- }
- else
- {
- throw std::runtime_error{"Interp(Logistic): NYI - Unsupported data type"};
- }
-}
-} // namespace
-
-OpKernel *getLogistic()
-{
- static OpKernel kernel = {prepareLogistic, invokeLogistic};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/MaxPool.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/MaxPool2D.h"
-#include "util/Utils.h"
-#include "util/ShapeInference.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareMaxPool2D(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
-
- assert(in_tensor->num_dimensions() == 4);
- UNUSED_RELEASE(in_tensor);
-
- const auto output_info = env->graph().operands().at(out_index).info();
- if (output_info.total_size() == 0)
- {
- // Handle unspecified output shape
- const auto &maxpool_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node);
- const auto infered_output_shape =
- shape_inference::inferMaxPoolShape(in_tensor->tensorInfo().shape(), maxpool_node.param());
- env->allocateIfNeeded(
- out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
- }
- else
- {
- env->allocateIfNeeded(out_index, output_info);
- }
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Handle same ifm & ofm data type only
- assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->num_dimensions() == 4);
-}
-
-void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
- const ir::operation::MaxPool2D::Param ¶m)
-{
- // TODO support NCHW frontend
- const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto padding =
- ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh);
- // Calculate
- nnfw::cker::PoolParams cker_param;
- calculateActivationRange(param.activation, &cker_param.float_activation_min,
- &cker_param.float_activation_max);
- cker_param.filter_width = param.kw;
- cker_param.filter_height = param.kh;
- cker_param.padding_values.width = padding.left;
- cker_param.padding_values.height = padding.top;
- cker_param.stride_width = param.stride.horizontal;
- cker_param.stride_height = param.stride.vertical;
-
- const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
- const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
- float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
-
- nnfw::cker::MaxPool(cker_param, in_shape, in_ptr, out_shape, out_ptr);
-}
-
-void invokeMaxPool2D(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &maxpool_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node);
-
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- const auto out_tensor = env->tensorAt(out_index);
-
- const auto data_type = in_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(in_tensor, out_tensor, maxpool_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Support float32 only"};
- }
-}
-} // namespace
-
-OpKernel *getMaxPool2D()
-{
- static OpKernel kernel = {prepareMaxPool2D, invokeMaxPool2D};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
*/
#include <cker/operation/AveragePool.h>
+#include <cker/operation/MaxPool.h>
#include "OperationUtil.h"
#include "interp/Registration.h"
-#include "ir/operation/AvgPool2D.h"
+#include "ir/operation/Pool2D.h"
#include "util/Utils.h"
#include "util/ShapeInference.h"
#include "misc/polymorphic_downcast.h"
{
namespace interp
{
-namespace avgpool2d
+namespace pool2d
{
-void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node)
+void preparePool2D(ExecEnv *env, const ir::Operation &node)
{
- const auto in_index = node.getInputs().at(0);
+ const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node);
+ const auto in_index = node.getInputs().at(pool_node.INPUT);
const auto out_index = node.getOutputs().at(0);
const auto in_tensor = env->tensorAt(in_index);
if (output_info.total_size() == 0)
{
// Handle unspecified output shape
- const auto &avgpool_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node);
const auto infered_output_shape =
- shape_inference::inferAvgPoolShape(in_tensor->tensorInfo().shape(), avgpool_node.param());
+ shape_inference::inferPoolShape(in_tensor->tensorInfo().shape(), pool_node.param());
env->allocateIfNeeded(
out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
}
assert(out_tensor->num_dimensions() == 4);
}
-void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
- const ir::operation::AvgPool2D::Param ¶m)
+template <typename T>
+void invoke(const nnfw::cker::PoolParams ¶ms, const nnfw::cker::Shape &in_shape,
+ const T *in_ptr, const nnfw::cker::Shape &out_shape, T *out_ptr,
+ ir::operation::Pool2D::PoolType op_type)
{
- // TODO Support NCHW frontend
+ switch (op_type)
+ {
+ case ir::operation::Pool2D::PoolType::AVG:
+ nnfw::cker::AveragePool<T>(params, in_shape, in_ptr, out_shape, out_ptr);
+ break;
+ case ir::operation::Pool2D::PoolType::MAX:
+ nnfw::cker::MaxPool<T>(params, in_shape, in_ptr, out_shape, out_ptr);
+ break;
+ default:
+ throw std::runtime_error{"Interp(Pool2D): NYI unsupported operation"};
+ break;
+ }
+}
+
+void invokePool2DOps(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node);
+
+ const auto in_index = node.getInputs().at(0);
+ const auto out_index = node.getOutputs().at(0);
+
+ // Check lhs shape is same with rhs (with broadcast)
+ const auto in_tensor = env->tensorAt(in_index);
+ const auto out_tensor = env->tensorAt(out_index);
+
+ // TODO support NCHW frontend
const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ const auto param = pool_node.param();
const auto padding =
ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh);
// Calculate
nnfw::cker::PoolParams cker_param;
- calculateActivationRange(param.activation, &cker_param.float_activation_min,
- &cker_param.float_activation_max);
cker_param.filter_width = param.kw;
cker_param.filter_height = param.kh;
cker_param.padding_values.width = padding.left;
cker_param.stride_width = param.stride.horizontal;
cker_param.stride_height = param.stride.vertical;
- const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
- const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
- float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
-
- nnfw::cker::AveragePool(cker_param, in_shape, in_ptr, out_shape, out_ptr);
-}
-
-void invokeAvgPool2D(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &avgpool_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node);
-
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- // Check lhs shape is same with rhs (with broadcast)
- const auto in_tensor = env->tensorAt(in_index);
- const auto out_tensor = env->tensorAt(out_index);
-
const auto data_type = in_tensor->data_type();
if (data_type == ir::DataType::FLOAT32)
{
- invoke(in_tensor, out_tensor, avgpool_node.param());
+ calculateActivationRange(param.activation, &cker_param.float_activation_min,
+ &cker_param.float_activation_max);
+
+ const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
+ const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
+ const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
+ float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
+ // Now, invoke() supports only Pool2D in float
+ invoke<float>(cker_param, in_shape, in_ptr, out_shape, out_ptr, param.op_type);
}
else
{
throw std::runtime_error{"NYI: Support float only"};
}
}
-} // namespace avgpool2d
+} // namespace pool2d
-OpKernel *getAvgPool2D()
+OpKernel *getPool2D()
{
- static OpKernel kernel = {avgpool2d::prepareAvgPool2D, avgpool2d::invokeAvgPool2D};
+ static OpKernel kernel = {pool2d::preparePool2D, pool2d::invokePool2DOps};
return &kernel;
}
namespace
{
-void Softmax2D(const float *in, const int input_size, const int batch_size, const float beta,
- float *out)
-{
- assert(input_size > 0);
-
- // For each batch
- for (int b = 0; b < batch_size; b++)
- {
- // Find the max coeff.
- float max_coeff = in[0];
- for (int i = 1; i < input_size; i++)
- {
- if (in[i] > max_coeff)
- max_coeff = in[i];
- }
-
- // Compute the normalized sum of exps.
- float exp_sum = 0.0;
- for (int i = 0; i < input_size; i++)
- {
- out[i] = std::exp((in[i] - max_coeff) * beta);
- exp_sum += out[i];
- }
-
- // Divide by the sum of exps.
- float reciprocal_sum_exp = 1.f / exp_sum;
- for (int i = 0; i < input_size; i++)
- {
- out[i] *= reciprocal_sum_exp;
- }
-
- // Advance in and out pointers for the next batch.
- in += input_size;
- out += input_size;
- }
-}
-
void prepareSoftMax(ExecEnv *env, const ir::Operation &node)
{
const auto in_index = node.getInputs().at(0);
uint32_t batch_size = in_tensor->dimension(0);
uint32_t input_size = in_tensor->dimension(1);
- Softmax2D(in_ptr, input_size, batch_size, beta, out_ptr);
+ nnfw::cker::Softmax(in_ptr, input_size, batch_size, beta, out_ptr);
}
else if (in_tensor->num_dimensions() == 4)
{
_operands.at(ind).data(std::move(data));
}
-void Graph::addInput(const OperandIndex &ind)
+void Graph::addInput(const OperandIndex &ind, const std::string &name)
{
assert(isBuildingPhase());
+ if (!name.empty())
+ _name_to_input.emplace(name, IOIndex{_inputs.size()});
_inputs.append(ind);
}
-void Graph::addOutput(const OperandIndex &ind)
+void Graph::addOutput(const OperandIndex &ind, const std::string &name)
{
assert(isBuildingPhase());
+ if (!name.empty())
+ _name_to_output.emplace(name, IOIndex{_outputs.size()});
_outputs.append(ind);
}
+IOIndex Graph::getInputIndex(const std::string &name) const
+{
+ auto itr = _name_to_input.find(name);
+ return (itr == _name_to_input.end()) ? IOIndex{} : itr->second;
+}
+
+IOIndex Graph::getOutputIndex(const std::string &name) const
+{
+ auto itr = _name_to_output.find(name);
+ return (itr == _name_to_output.end()) ? IOIndex{} : itr->second;
+}
+
void Graph::finishBuilding(void)
{
assert(isBuildingPhase());
#include "GraphIterator.h"
#include "ir/OperationIndexMap.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
namespace onert
{
namespace onert
{
+namespace compiler
+{
+class LoweredGraph;
+} // namespace compiler
+} // namespace onert
+
+namespace onert
+{
namespace ir
{
class Graph;
class Operation;
-class LoweredGraph;
class OpSequence;
template <bool is_const> class Iterator
using NodeRef = typename Iterator<is_const>::NodeRef;
using IterFn = typename Iterator<is_const>::IterFn;
using LoweredGraphRef =
- typename std::conditional<is_const, const LoweredGraph &, LoweredGraph &>::type;
+ typename std::conditional<is_const, const typename compiler::LoweredGraph &,
+ typename compiler::LoweredGraph &>::type;
using OpSequenceRef = typename std::conditional<is_const, const OpSequence &, OpSequence &>::type;
using OpSeqIndexRef = const OpSequenceIndex &;
using OpSeqIterFn = std::function<void(OpSeqIndexRef, OpSequenceRef)>;
return ret;
}
-// TODO: Extract this into external helper function
-void OpSequences::dump(const std::string &msg, const Operations &operations) const
-{
- VERBOSE(OpSequences) << "OpSequences(" << msg << ")" << std::endl;
- iterate([&](const OpSequenceIndex &idx, const OpSequence &op_seq) {
- VERBOSE(OpSequences) << idx.value() << "] " << getStrFromOpSeq(op_seq, operations) << std::endl;
- });
-}
-
void OpSequences::removeFromOpSequence(const OperationIndex &operation_index)
{
const auto op_seq_index = findOperation(operation_index);
throw std::runtime_error("Operation not found");
}
+void dumpOpSequences(const OpSequences &op_seqs, const Operations &operations)
+{
+ op_seqs.iterate([&](const OpSequenceIndex &idx, const OpSequence &op_seq) {
+ VERBOSE(OpSequences) << idx.value() << "] " << getStrFromOpSeq(op_seq, operations) << std::endl;
+ });
+}
+
} // namespace ir
} // namespace onert
using namespace operation;
-OperationDumper::OperationDumper(const std::string &start_msg)
+namespace
{
- VERBOSE(LIR) << start_msg << std::endl;
-}
-
-void OperationDumper::visit(const Abs &node)
+void dumpUnaryInputOp(const Operation &node, const std::string &adding_input = "")
{
- VERBOSE(LIR) << "* Abs" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Abs::Input::INPUT) << ")"
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ") " << adding_input
<< std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const Add &node)
+void dumpBinaryInputOp(const Operation &node, const std::string &adding_input = "")
{
- VERBOSE(LIR) << "* Add" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Add::Input::LHS) << ", "
- << node.getInputs().at(Add::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const ArgMax &node)
-{
- VERBOSE(LIR) << "* ArgMax" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ArgMax::Input::INPUT) << ")"
- << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ", " << node.getInputs().at(0)
+ << ") " << adding_input << std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const AvgPool2D &node)
+void dumpConvOp(const Operation &node, const std::string &padding_type)
{
- VERBOSE(LIR) << "* AvgPool2D(Implicit)" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(AvgPool2D::Input::INPUT) << ")"
- << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << "(" << padding_type << ")" << std::endl;
+ VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(Conv2D::Input::INPUT) << ") Kernel("
+ << node.getInputs().at(Conv2D::Input::KERNEL) << ") Bias("
+ << node.getInputs().at(Conv2D::Input::BIAS) << ")" << std::endl;
VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const BatchToSpaceND &node)
+void dumpPackingOp(const Operation &node)
{
- VERBOSE(LIR) << "* BatchToSpaceND" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(BatchToSpaceND::Input::INPUT) << ")"
- << " BlockSize(" << node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE) << ")"
- << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
+ std::string inputs;
+ for (auto i : node.getInputs())
+ {
+ inputs += std::to_string(i.value()) + ",";
+ }
+ VERBOSE(LIR) << " - Inputs : Inputs(" << inputs << ")" << std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
+} // namespace
-void OperationDumper::visit(const operation::BroadcastTo &node)
+OperationDumper::OperationDumper(const std::string &start_msg)
{
- VERBOSE(LIR) << "* BroadcastTo" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(BroadcastTo::Input::INPUT) << ", "
- << node.getInputs().at(BroadcastTo::Input::SHAPE) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ VERBOSE(LIR) << start_msg << std::endl;
}
-void OperationDumper::visit(const Cast &node)
-{
- VERBOSE(LIR) << "* Cast" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Cast::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ArgMax &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const Comparison &node)
+void OperationDumper::visit(const BatchToSpaceND &node)
{
- VERBOSE(LIR) << "* Comparison" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Comparison::Input::INPUT0) << ", "
- << node.getInputs().at(Comparison::Input::INPUT1) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string block_size =
+ "BlockSize(" +
+ std::to_string(node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE).value()) + ")";
+ dumpUnaryInputOp(node, block_size);
}
-void OperationDumper::visit(const Concat &node)
-{
- VERBOSE(LIR) << "* Concat" << std::endl;
- std::string inputs;
- for (auto i : node.getInputs())
- {
- inputs += std::to_string(i.value()) + ",";
- }
- VERBOSE(LIR) << " - Inputs : IFM(" << inputs << ")" << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const BinaryArithmetic &node) { dumpBinaryInputOp(node); }
+
+void OperationDumper::visit(const operation::BroadcastTo &node) { dumpBinaryInputOp(node); }
+
+void OperationDumper::visit(const Comparison &node) { dumpBinaryInputOp(node); }
+
+void OperationDumper::visit(const Concat &node) { dumpPackingOp(node); }
void OperationDumper::visit(const Conv2D &node)
{
std::string padding_type =
node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
- VERBOSE(LIR) << "* Conv2D(" << padding_type << ")" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(Conv2D::Input::INPUT) << ") Kernel("
- << node.getInputs().at(Conv2D::Input::KERNEL) << ") Bias("
- << node.getInputs().at(Conv2D::Input::BIAS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
+ dumpConvOp(node, padding_type);
}
-void OperationDumper::visit(const ConvertFp16ToFp32 &node)
-{
- VERBOSE(LIR) << "* ConvertFp16ToFp32" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ConvertFp16ToFp32::Input::INPUT)
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ConvertFp16ToFp32 &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const ConvertFp32ToFp16 &node)
-{
- VERBOSE(LIR) << "* ConvertFp32ToFp16" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ConvertFp32ToFp16::Input::INPUT)
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Cos &node)
-{
- VERBOSE(LIR) << "* Cos" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Cos::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ConvertFp32ToFp16 &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const DepthToSpace &node)
-{
- VERBOSE(LIR) << "* DepthToSpace" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(DepthToSpace::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const DepthToSpace &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const DepthwiseConv2D &node)
{
std::string padding_type =
node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
- VERBOSE(LIR) << "* DepthwiseConv2D(" << padding_type << ")" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(DepthwiseConv2D::Input::INPUT)
- << ") Kernel(" << node.getInputs().at(DepthwiseConv2D::Input::KERNEL) << ") Bias("
- << node.getInputs().at(DepthwiseConv2D::Input::BIAS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
+ dumpConvOp(node, padding_type);
}
-void OperationDumper::visit(const Dequantize &node)
+void OperationDumper::visit(const ElementwiseActivation &node)
{
- VERBOSE(LIR) << "* Dequantize" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Dequantize::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string params;
+ if (node.param().op_type == ElementwiseActivation::Type::RELU)
+ {
+ params = " lower value(" + std::to_string(node.param().alpha) + ") upper value(" +
+ std::to_string(node.param().beta) + ")";
+ }
+ else if (node.param().op_type == ElementwiseActivation::Type::LEAKY_RELU)
+ {
+ params = " alpha value(" + std::to_string(node.param().alpha) + ")";
+ }
+ dumpUnaryInputOp(node, params);
}
-void OperationDumper::visit(const Div &node)
-{
- VERBOSE(LIR) << "* Div" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Div::Input::LHS) << ", "
- << node.getInputs().at(Div::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ElementwiseBinary &node) { dumpBinaryInputOp(node); }
+
+void OperationDumper::visit(const ElementwiseUnary &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const EmbeddingLookup &node)
{
- VERBOSE(LIR) << "* EmbeddingLookup" << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
VERBOSE(LIR) << " - Inputs : Lookups(" << node.getInputs().at(EmbeddingLookup::Input::LOOKUPS)
<< ") VALUES(" << node.getInputs().at(EmbeddingLookup::Input::VALUES) << ")"
<< std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const Exp &node)
-{
- VERBOSE(LIR) << "* Exp" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Exp::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
void OperationDumper::visit(const ExpandDims &node)
{
- VERBOSE(LIR) << "* ExpandDims" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ExpandDims::Input::INPUT)
- << ") AXIS(" << node.getInputs().at(ExpandDims::Input::AXIS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Floor &node)
-{
- VERBOSE(LIR) << "* Floor" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Floor::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string axis =
+ "AXIS(" + std::to_string(node.getInputs().at(ExpandDims::Input::AXIS).value()) + ")";
+ dumpUnaryInputOp(node, axis);
}
void OperationDumper::visit(const FullyConnected &node)
{
- VERBOSE(LIR) << "* FullyConnected" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(FullyConnected::Input::INPUT)
- << ") Weight(" << node.getInputs().at(FullyConnected::Input::WEIGHT) << ") Bias("
- << node.getInputs().at(FullyConnected::Input::BIAS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string inputs =
+ "Weight(" + std::to_string(node.getInputs().at(FullyConnected::Input::WEIGHT).value()) +
+ ") Bias(" + std::to_string(node.getInputs().at(FullyConnected::Input::BIAS).value()) + ")";
+ dumpUnaryInputOp(node, inputs);
}
void OperationDumper::visit(const Gather &node)
{
- VERBOSE(LIR) << "* Gather" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Gather::Input::INPUT) << ") Indices("
- << node.getInputs().at(Gather::Input::INDICES) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string indices =
+ "Indices(" + std::to_string(node.getInputs().at(Gather::Input::INDICES).value()) + ")";
+ dumpUnaryInputOp(node, indices);
}
void OperationDumper::visit(const HashtableLookup &node)
void OperationDumper::visit(const InstanceNorm &node)
{
- VERBOSE(LIR) << "* InstanceNorm" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(InstanceNorm::Input::INPUT)
- << ") Gamma(" << node.getInputs().at(InstanceNorm::Input::GAMMA) << ") Beta("
- << node.getInputs().at(InstanceNorm::Input::BETA) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const L2Normalization &node)
-{
- VERBOSE(LIR) << "* L2Normalization" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(L2Normalization::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string inputs =
+ "Gamma(" + std::to_string(node.getInputs().at(InstanceNorm::Input::GAMMA).value()) +
+ ") Beta(" + std::to_string(node.getInputs().at(InstanceNorm::Input::BETA).value()) + ")";
+ dumpUnaryInputOp(node, inputs);
}
-void OperationDumper::visit(const L2Pool2D &node)
-{
- VERBOSE(LIR) << "* L2Pool2D" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(L2Pool2D::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const L2Normalization &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const LocalResponseNormalization &node)
-{
- VERBOSE(LIR) << "* LocalResponseNormalization" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input("
- << node.getInputs().at(LocalResponseNormalization::Input::INPUT) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const LocalResponseNormalization &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const LSTM &node)
{
<< node.getInputs().at(LSTM::Output::OUTPUT) << ")" << std::endl;
}
-void OperationDumper::visit(const Log &node)
-{
- VERBOSE(LIR) << "* Log" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Log::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const LogicalAnd &node)
-{
- VERBOSE(LIR) << "* LogicalAnd" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LogicalAnd::Input::INPUT0) << ", "
- << node.getInputs().at(LogicalAnd::Input::INPUT1) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const LogicalNot &node)
-{
- VERBOSE(LIR) << "* LogicalNot" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LogicalNot::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const LogicalOr &node)
-{
- VERBOSE(LIR) << "* LogicalOr" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LogicalOr::Input::INPUT0) << ", "
- << node.getInputs().at(LogicalOr::Input::INPUT1) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Logistic &node)
-{
- VERBOSE(LIR) << "* Logistic" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Logistic::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const MaxPool2D &node)
-{
- std::string padding_type =
- node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
- VERBOSE(LIR) << "* MaxPool2D(" << padding_type << ")" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(MaxPool2D::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Mul &node)
-{
- VERBOSE(LIR) << "* Mul" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Mul::Input::LHS) << ", "
- << node.getInputs().at(Mul::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Neg &node)
-{
- VERBOSE(LIR) << "* Neg" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Neg::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Pack &node)
-{
- VERBOSE(LIR) << "* Pack" << std::endl;
- std::string inputs;
- const auto &input_indices = node.getInputs();
- for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
- {
- inputs += std::to_string(it->value());
- if (std::next(it) != std::end(input_indices))
- inputs += ", ";
- }
- VERBOSE(LIR) << " - Inputs : Inputs(" << inputs << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Pack &node) { dumpPackingOp(node); }
void OperationDumper::visit(const Pad &node)
{
- VERBOSE(LIR) << "* Pad" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Pad::Input::INPUT) << ") Pad("
- << node.getInputs().at(Pad::Input::PAD) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string pad = "Pad(" + std::to_string(node.getInputs().at(Pad::Input::PAD).value()) + ")";
+ dumpUnaryInputOp(node, pad);
}
void OperationDumper::visit(const Permute &node)
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const Pow &node)
+void OperationDumper::visit(const Pool2D &node)
{
- VERBOSE(LIR) << "* Pow" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Pow::Input::LHS) << ", "
- << node.getInputs().at(Pow::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const PReLU &node)
-{
- VERBOSE(LIR) << "* PReLU" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(PReLU::Input::INPUT) << ") Alpha("
- << node.getInputs().at(PReLU::Input::ALPHA) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Reduce &node)
-{
- VERBOSE(LIR) << "* " + node.name() << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Reduce::Input::INPUT) << ")"
+ std::string padding_type =
+ node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+ VERBOSE(LIR) << "* " << node.name() << "(" << padding_type << ")" << std::endl;
+ VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(Pool2D::Input::INPUT) << ")"
<< std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const ReLU &node)
-{
- VERBOSE(LIR) << "* ReLU" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReLU::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Pow &node) { dumpBinaryInputOp(node); }
-void OperationDumper::visit(const ReLU1 &node)
+void OperationDumper::visit(const PReLU &node)
{
- VERBOSE(LIR) << "* ReLU1" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReLU1::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string alpha =
+ "Alpha(" + std::to_string(node.getInputs().at(PReLU::Input::ALPHA).value()) + ")";
+ dumpUnaryInputOp(node, alpha);
}
-void OperationDumper::visit(const ReLU6 &node)
-{
- VERBOSE(LIR) << "* ReLU6" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReLU6::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Rank &node) { dumpUnaryInputOp(node); }
+
+void OperationDumper::visit(const Reduce &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const Reshape &node)
{
- VERBOSE(LIR) << "* Reshape" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Reshape::Input::INPUT) << ")";
// optional param
- if (node.getInputs().size() == 2)
- {
- VERBOSE(LIR) << " Shape(" << node.getInputs().at(Reshape::Input::SHAPE) << ")";
- }
- else
- {
- VERBOSE(LIR) << " Shape(not provided)";
- }
- VERBOSE(LIR) << std::endl;
-
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string shape =
+ node.getInputs().size() == 2
+ ? "Shape(" + std::to_string(node.getInputs().at(Reshape::Input::SHAPE).value()) + ")"
+ : "Shape(not provided)";
+ dumpUnaryInputOp(node, shape);
}
-void OperationDumper::visit(const ResizeBilinear &node)
-{
- VERBOSE(LIR) << "* ResizeBilinear" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ResizeBilinear::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ResizeBilinear &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const Reverse &node)
{
- VERBOSE(LIR) << "* Reverse" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Reverse::Input::INPUT) << ") Axis("
- << node.getInputs().at(Reverse::Input::AXIS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string axis =
+ "Axis(" + std::to_string(node.getInputs().at(Reverse::Input::AXIS).value()) + ")";
+ dumpUnaryInputOp(node, axis);
}
void OperationDumper::visit(const RNN &node)
<< std::endl;
}
-void OperationDumper::visit(const Round &node)
-{
- VERBOSE(LIR) << "* Round" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Round::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
void OperationDumper::visit(const Range &node)
{
VERBOSE(LIR) << "* Range" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Range::Input::START) << ")"
+ VERBOSE(LIR) << " - Inputs : Start(" << node.getInputs().at(Range::Input::START) << ")"
<< " Limit(" << node.getInputs().at(Range::Input::LIMIT) << ")"
<< " Delta(" << node.getInputs().at(Range::Input::DELTA) << ")" << std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const RSQRT &node)
-{
- VERBOSE(LIR) << "* RSQRT" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(RSQRT::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
void OperationDumper::visit(const Select &node)
{
VERBOSE(LIR) << "* Select" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Select::Input::CONDITION) << ")"
+ VERBOSE(LIR) << " - Inputs : Condition(" << node.getInputs().at(Select::Input::CONDITION) << ")"
<< " Input_X(" << node.getInputs().at(Select::Input::INPUT_TRUE) << ")"
<< " Input_Y(" << node.getInputs().at(Select::Input::INPUT_FALSE) << ")"
<< std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const ir::operation::Shape &node)
-{
- VERBOSE(LIR) << "* Shape" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ir::operation::Shape::Input::INPUT)
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Sin &node)
-{
- VERBOSE(LIR) << "* Sin" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Sin::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ir::operation::Shape &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const Softmax &node)
-{
- VERBOSE(LIR) << "* Softmax" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Softmax::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Softmax &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const SpaceToBatchND &node)
{
- VERBOSE(LIR) << "* SpaceToBatchND" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SpaceToBatchND::Input::INPUT)
- << ") BlockSize(" << node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE)
- << ") Paddings(" << node.getInputs().at(SpaceToBatchND::Input::PADDINGS) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string inputs =
+ "BlockSize(" +
+ std::to_string(node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE).value()) +
+ ") Paddings(" + std::to_string(node.getInputs().at(SpaceToBatchND::Input::PADDINGS).value()) +
+ ")";
+ dumpUnaryInputOp(node, inputs);
}
-void OperationDumper::visit(const SpaceToDepth &node)
-{
- VERBOSE(LIR) << "* SpaceToDepth" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SpaceToDepth::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const SpaceToDepth &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const Split &node)
-{
- VERBOSE(LIR) << "* Split" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Split::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const SQRT &node)
-{
- VERBOSE(LIR) << "* SQRT" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SQRT::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Split &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const SquaredDifference &node)
-{
- VERBOSE(LIR) << "* SquaredDifference" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SquaredDifference::Input::LHS)
- << ", " << node.getInputs().at(SquaredDifference::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const SquaredDifference &node) { dumpBinaryInputOp(node); }
void OperationDumper::visit(const StatelessRandomUniform &node)
{
VERBOSE(LIR) << "* StatelessRandomUniform" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(StatelessRandomUniform::Input::SHAPE)
- << ", " << node.getInputs().at(StatelessRandomUniform::Input::SEED) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Squeeze &node)
-{
- VERBOSE(LIR) << "* Squeeze" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Squeeze::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Slice &node)
-{
- VERBOSE(LIR) << "* Slice" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Slice::Input::INPUT) << ")"
+ VERBOSE(LIR) << " - Inputs : Shape(" << node.getInputs().at(StatelessRandomUniform::Input::SHAPE)
+ << " Seed(" << node.getInputs().at(StatelessRandomUniform::Input::SEED) << ")"
<< std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const StridedSlice &node)
-{
- VERBOSE(LIR) << "* StridedSlice" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(StridedSlice::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Squeeze &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const Sub &node)
-{
- VERBOSE(LIR) << "* Sub" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Sub::Input::LHS) << ", "
- << node.getInputs().at(Sub::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Slice &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const Tanh &node)
-{
- VERBOSE(LIR) << "* TanH" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Tanh::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const StridedSlice &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const Tile &node)
{
- VERBOSE(LIR) << "* Tile" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Tile::Input::INPUT) << ", "
- << node.getInputs().at(Tile::Input::MULTIPLES) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string multiples =
+ "Multiples(" + std::to_string(node.getInputs().at(Tile::Input::MULTIPLES).value()) + ")";
+ dumpUnaryInputOp(node, multiples);
}
void OperationDumper::visit(const TopKV2 &node)
VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const Transpose &node)
-{
- VERBOSE(LIR) << "* Transpose" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Transpose::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Transpose &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const Unpack &node)
{
- VERBOSE(LIR) << "* Unpack" << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Unpack::Input::INPUT) << ")"
<< std::endl;
std::string outputs;
VERBOSE(LIR) << " - Outputs : Outputs(" << outputs << ")" << std::endl;
}
-void OperationDumper::visit(const Min &node)
-{
- VERBOSE(LIR) << "* Min" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Min::Input::LHS) << ", "
- << node.getInputs().at(Min::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Max &node)
-{
- VERBOSE(LIR) << "* Max" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Max::Input::LHS) << ", "
- << node.getInputs().at(Max::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
void OperationDumper::visit(const OneHot &node)
{
- VERBOSE(LIR) << "* OneHot" << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
VERBOSE(LIR) << " - Inputs : "
<< "Indices(" << node.getInputs().at(OneHot::Input::INDICES) << ") " << std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
void OperationDumper::visit(const If &node)
{
- VERBOSE(LIR) << "* If" << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
std::string inputs;
const auto &input_indices = node.getInputs();
for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
void OperationDumper::visit(const While &node)
{
- VERBOSE(LIR) << "* While" << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
std::string inputs;
const auto &input_indices = node.getInputs();
for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
VERBOSE(LIR) << " - Output : Outputs(" << outputs << ")" << std::endl;
}
-void OperationDumper::visit(const ZerosLike &node)
-{
- VERBOSE(LIR) << "* RoZerosLike" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ZerosLike::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
} // namespace ir
} // namespace onert
OperationDumper(const std::string &start_msg);
public:
- void visit(const operation::Abs &) override;
- void visit(const operation::Add &node) override;
void visit(const operation::ArgMax &) override;
- void visit(const operation::AvgPool2D &node) override;
void visit(const operation::BatchToSpaceND &node) override;
+ void visit(const operation::BinaryArithmetic &node) override;
void visit(const operation::BroadcastTo &) override;
- void visit(const operation::Cast &) override;
void visit(const operation::Comparison &) override;
void visit(const operation::Concat &node) override;
void visit(const operation::Conv2D &node) override;
void visit(const operation::ConvertFp16ToFp32 &node) override;
void visit(const operation::ConvertFp32ToFp16 &node) override;
- void visit(const operation::Cos &node) override;
void visit(const operation::DepthToSpace &) override;
void visit(const operation::DepthwiseConv2D &node) override;
- void visit(const operation::Dequantize &) override;
- void visit(const operation::Div &) override;
+ void visit(const operation::ElementwiseActivation &) override;
+ void visit(const operation::ElementwiseBinary &) override;
+ void visit(const operation::ElementwiseUnary &) override;
void visit(const operation::EmbeddingLookup &) override;
- void visit(const operation::Exp &) override;
void visit(const operation::ExpandDims &) override;
- void visit(const operation::Floor &) override;
void visit(const operation::FullyConnected &node) override;
void visit(const operation::Gather &) override;
void visit(const operation::HashtableLookup &) override;
void visit(const operation::InstanceNorm &) override;
void visit(const operation::L2Normalization &) override;
- void visit(const operation::L2Pool2D &) override;
void visit(const operation::LocalResponseNormalization &) override;
- void visit(const operation::Log &) override;
- void visit(const operation::LogicalAnd &) override;
- void visit(const operation::LogicalNot &) override;
- void visit(const operation::LogicalOr &) override;
- void visit(const operation::Logistic &) override;
void visit(const operation::LSTM &) override;
- void visit(const operation::MaxPool2D &node) override;
- void visit(const operation::Mul &) override;
- void visit(const operation::Neg &) override;
void visit(const operation::Pack &) override;
void visit(const operation::Pad &) override;
void visit(const operation::Permute &node) override;
+ void visit(const operation::Pool2D &node) override;
void visit(const operation::Pow &node) override;
void visit(const operation::PReLU &) override;
void visit(const operation::Range &) override;
+ void visit(const operation::Rank &) override;
void visit(const operation::Reduce &) override;
- void visit(const operation::ReLU &) override;
- void visit(const operation::ReLU1 &) override;
- void visit(const operation::ReLU6 &) override;
void visit(const operation::Reshape &node) override;
void visit(const operation::ResizeBilinear &) override;
void visit(const operation::Reverse &) override;
void visit(const operation::RNN &) override;
- void visit(const operation::Round &) override;
- void visit(const operation::RSQRT &) override;
void visit(const operation::Select &node) override;
void visit(const operation::Shape &node) override;
- void visit(const operation::Sin &node) override;
void visit(const operation::Softmax &node) override;
void visit(const operation::SpaceToBatchND &) override;
void visit(const operation::SpaceToDepth &) override;
void visit(const operation::Split &) override;
- void visit(const operation::SQRT &) override;
void visit(const operation::SquaredDifference &) override;
void visit(const operation::Squeeze &) override;
void visit(const operation::Slice &) override;
void visit(const operation::StridedSlice &) override;
void visit(const operation::StatelessRandomUniform &) override;
- void visit(const operation::Sub &) override;
- void visit(const operation::Tanh &) override;
void visit(const operation::Tile &) override;
void visit(const operation::TopKV2 &) override;
void visit(const operation::TransposeConv &) override;
void visit(const operation::Transpose &) override;
void visit(const operation::Unpack &) override;
- void visit(const operation::Min &) override;
- void visit(const operation::Max &) override;
void visit(const operation::OneHot &) override;
void visit(const operation::If &) override;
void visit(const operation::While &) override;
- void visit(const operation::ZerosLike &) override;
};
} // namespace ir
}
inline ExplicitPadding samePaddingUsingIFM(const FeatureShape &ifm_shape, const Stride &stride,
- uint32_t kw, uint32_t kh)
+ uint32_t kw, uint32_t kh, uint32_t dwf, uint32_t dhf)
{
ExplicitPadding padding;
// padding_to_beginning = total_padding / 2
// padding_to_end = (total_padding + 1)/2.
//
+ const int32_t effective_filter_h_size = (kh - 1) * dhf + 1;
+ const int32_t effective_filter_w_size = (kw - 1) * dwf + 1;
+
const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
const int32_t horizontal_expected_output =
(ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
- const int32_t vertical_needed_input = (vertical_expected_output - 1) * stride.vertical + kh;
+ const int32_t vertical_needed_input =
+ (vertical_expected_output - 1) * stride.vertical + effective_filter_h_size;
const int32_t vertical_total_padding = std::max(0, vertical_needed_input - ifm_shape.H);
- const int32_t horizontal_needed_input = (horizontal_expected_output - 1) * stride.horizontal + kw;
+ const int32_t horizontal_needed_input =
+ (horizontal_expected_output - 1) * stride.horizontal + effective_filter_w_size;
const int32_t horizontal_total_padding = std::max(0, horizontal_needed_input - ifm_shape.W);
padding.top = vertical_total_padding / 2;
}
inline ExplicitPadding samePadding(const FeatureShape &ifm_shape, const FeatureShape &ofm_shape,
- const Stride &stride, uint32_t kw, uint32_t kh)
+ const Stride &stride, uint32_t kw, uint32_t kh, uint32_t dwf,
+ uint32_t dhf)
{
const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
const int32_t horizontal_expected_output =
UNUSED_RELEASE(vertical_expected_output);
UNUSED_RELEASE(horizontal_expected_output);
- return samePaddingUsingIFM(ifm_shape, stride, kw, kh);
+ return samePaddingUsingIFM(ifm_shape, stride, kw, kh, dwf, dhf);
}
} // namespace
const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShape &ifm_shape,
const FeatureShape &ofm_shape, const Stride &stride,
- uint32_t kw, uint32_t kh)
+ uint32_t kw, uint32_t kh, uint32_t dwf, uint32_t dhf)
{
if (padding.type == PaddingType::EXPLICIT)
{
}
else if (padding.type == PaddingType::SAME)
{
- return samePadding(ifm_shape, ofm_shape, stride, kw, kh);
+ return samePadding(ifm_shape, ofm_shape, stride, kw, kh, dwf, dhf);
}
else if (padding.type == PaddingType::VALID)
{
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Abs.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Abs::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Abs::Abs(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/AvgPool2D.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void AvgPool2D::accept(OperationVisitor &v) const { v.visit(*this); }
-
-AvgPool2D::AvgPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param ¶m)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#include "ir/operation/Add.h"
+#include "ir/operation/BinaryArithmetic.h"
#include <cassert>
+#include <unordered_map>
#include "ir/OperationVisitor.h"
namespace operation
{
-void Add::accept(OperationVisitor &v) const { v.visit(*this); }
+void BinaryArithmetic::accept(OperationVisitor &v) const { v.visit(*this); }
-Add::Add(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param ¶m)
+BinaryArithmetic::BinaryArithmetic(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs, const Param ¶m)
: Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
+std::string BinaryArithmetic::name() const
+{
+ using ArithmeticType = onert::ir::operation::BinaryArithmetic::ArithmeticType;
+ static const std::unordered_map<ArithmeticType, std::string> name_map{
+ {ArithmeticType::ADD, std::string{"Add"}},
+ {ArithmeticType::SUB, std::string{"Sub"}},
+ {ArithmeticType::MUL, std::string{"Mul"}},
+ {ArithmeticType::DIV, std::string{"Div"}}};
+ return name_map.at(_param.arithmetic_type);
+}
+
} // namespace operation
} // namespace ir
} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Cast.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Cast::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Cast::Cast(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Dequantize.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Dequantize::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Dequantize::Dequantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Div.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Div::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Div::Div(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param ¶m)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ElementwiseActivation.h"
+
+#include <cassert>
+#include <unordered_map>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ElementwiseActivation::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ElementwiseActivation::ElementwiseActivation(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs,
+ const Param ¶m)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+ if (param.op_type == Type::LOGISTIC)
+ {
+ assert(param.alpha == 0.0f && param.beta == 0.0f && "Logistic will be supported only as "
+ "sigmoid function(L=1, k=1, x0=0). So, do "
+ "not use alpha and beta");
+ }
+ else if (param.op_type == Type::RELU)
+ {
+ assert(param.alpha >= param.beta && "ReLU's alpha must be equal or greater than beta");
+ }
+ else if (param.op_type == Type::TANH)
+ {
+ assert(param.alpha == 1.0f && param.beta == 1.0f && "f(x) = alpha * tanh(beta * x), Tanh is "
+ "supported only the values of alpha and "
+ "beta are 1.f");
+ }
+}
+
+std::string ElementwiseActivation::name() const
+{
+ using ElementwiseActivationType = onert::ir::operation::ElementwiseActivation::Type;
+ static const std::unordered_map<Type, std::string> name_map{
+ {ElementwiseActivationType::ELU, "ELU"},
+ {ElementwiseActivationType::LOGISTIC, "Logistic"},
+ {ElementwiseActivationType::RELU, "ReLU"},
+ {ElementwiseActivationType::TANH, "Tanh"},
+ {ElementwiseActivationType::LEAKY_RELU, "LeakyRelu"}};
+ return name_map.at(_param.op_type);
+}
+
+float ElementwiseActivation::infinity = std::numeric_limits<float>::infinity();
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ElementwiseBinary.h"
+
+#include <cassert>
+#include <unordered_map>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ElementwiseBinary::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ElementwiseBinary::ElementwiseBinary(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs, const Param ¶m)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+{
+}
+
+std::string ElementwiseBinary::name() const
+{
+ using ElementwiseBinaryType = onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType;
+ static const std::unordered_map<ElementwiseBinaryType, std::string> name_map{
+ {ElementwiseBinaryType::LOGICAL_AND, std::string{"LogicalAnd"}},
+ {ElementwiseBinaryType::LOGICAL_OR, std::string{"LogicalOr"}},
+ {ElementwiseBinaryType::MAX, std::string{"Max"}},
+ {ElementwiseBinaryType::MIN, std::string{"Min"}}};
+ return name_map.at(_param.op_type);
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ElementwiseUnary.h"
+
+#include <cassert>
+#include <unordered_map>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ElementwiseUnary::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ElementwiseUnary::ElementwiseUnary(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs, const Param ¶m)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+std::string ElementwiseUnary::name() const
+{
+ using ElementwiseUnaryType = onert::ir::operation::ElementwiseUnary::Type;
+ static const std::unordered_map<ElementwiseUnaryType, std::string> name_map{
+ {ElementwiseUnaryType::ABS, std::string{"Abs"}},
+ {ElementwiseUnaryType::CAST, std::string{"Cast"}},
+ {ElementwiseUnaryType::COS, std::string{"Cos"}},
+ {ElementwiseUnaryType::DEQUANTIZE, std::string{"Dequantize"}},
+ {ElementwiseUnaryType::ERF, std::string{"Erf"}},
+ {ElementwiseUnaryType::EXP, std::string{"Exp"}},
+ {ElementwiseUnaryType::FLOOR, std::string{"Floor"}},
+ {ElementwiseUnaryType::LOG, std::string{"Log"}},
+ {ElementwiseUnaryType::LOGICAL_NOT, std::string{"LogicalNot"}},
+ {ElementwiseUnaryType::NEG, std::string{"Neg"}},
+ {ElementwiseUnaryType::QUANTIZE, std::string{"Quantize"}},
+ {ElementwiseUnaryType::ROUND, std::string{"Round"}},
+ {ElementwiseUnaryType::RSQRT, std::string{"RSqrt"}},
+ {ElementwiseUnaryType::SIN, std::string{"Sin"}},
+ {ElementwiseUnaryType::SQRT, std::string{"Sqrt"}},
+ {ElementwiseUnaryType::SQURE, std::string{"Squre"}},
+ {ElementwiseUnaryType::ZEROS_LIKE, std::string{"ZerosLike"}}};
+ return name_map.at(_param.op_type);
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Exp.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Exp::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Exp::Exp(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Floor.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Floor::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Floor::Floor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/L2Pool2D.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void L2Pool2D::accept(OperationVisitor &v) const { v.visit(*this); }
-
-L2Pool2D::L2Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param ¶m)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LogicalAnd.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void LogicalAnd::accept(OperationVisitor &v) const { v.visit(*this); }
-
-LogicalAnd::LogicalAnd(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LogicalNot.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void LogicalNot::accept(OperationVisitor &v) const { v.visit(*this); }
-
-LogicalNot::LogicalNot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LogicalOr.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void LogicalOr::accept(OperationVisitor &v) const { v.visit(*this); }
-
-LogicalOr::LogicalOr(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Logistic.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Logistic::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Logistic::Logistic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Max.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Max::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Max::Max(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/MaxPool2D.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void MaxPool2D::accept(OperationVisitor &v) const { v.visit(*this); }
-
-MaxPool2D::MaxPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param ¶m)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Min.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Min::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Min::Min(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Mul.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Mul::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Mul::Mul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param ¶m)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Neg.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Neg::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Neg::Neg(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
void Pad::accept(OperationVisitor &v) const { v.visit(*this); }
+// PAD: 2 inputs
+// PADV2: 3 inputs
Pad::Pad(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}
{
}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Pool2D.h"
+
+#include <cassert>
+#include <unordered_map>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Pool2D::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Pool2D::Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param ¶m)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+std::string Pool2D::name() const
+{
+ using PoolType = onert::ir::operation::Pool2D::PoolType;
+ static const std::unordered_map<PoolType, std::string> name_map{
+ {PoolType::AVG, "Avg" + std::string{toString(opcode())}},
+ {PoolType::L2, "L2" + std::string{toString(opcode())}},
+ {PoolType::MAX, "Max" + std::string{toString(opcode())}}};
+ return name_map.at(_param.op_type);
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/RSQRT.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void RSQRT::accept(OperationVisitor &v) const { v.visit(*this); }
-
-RSQRT::RSQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
* limitations under the License.
*/
-#include "ir/operation/Cos.h"
+#include "ir/operation/Rank.h"
#include <cassert>
namespace operation
{
-void Cos::accept(OperationVisitor &v) const { v.visit(*this); }
+void Rank::accept(OperationVisitor &v) const { v.visit(*this); }
-Cos::Cos(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+Rank::Rank(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
: Operation{OperandConstraint::createExact(1u), inputs, outputs}
{
}
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ReLU.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void ReLU::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ReLU::ReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ReLU1.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void ReLU1::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ReLU1::ReLU1(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ReLU6.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void ReLU6::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ReLU6::ReLU6(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
* limitations under the License.
*/
-#include "ir/operation/Round.h"
+#include "ir/operation/ResizeNearestNeighbor.h"
#include <cassert>
namespace operation
{
-void Round::accept(OperationVisitor &v) const { v.visit(*this); }
+void ResizeNearestNeighbor::accept(OperationVisitor &v) const { v.visit(*this); }
-Round::Round(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+ResizeNearestNeighbor::ResizeNearestNeighbor(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs,
+ const Param ¶m)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
}
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/SQRT.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void SQRT::accept(OperationVisitor &v) const { v.visit(*this); }
-
-SQRT::SQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Sub.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Sub::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Sub::Sub(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param ¶m)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Tanh.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Tanh::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Tanh::Tanh(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ZerosLike.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void ZerosLike::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ZerosLike::ZerosLike(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__
-#define __ONERT_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__
-
-#include "ir/OperationVisitor.h"
-#include "LoweredOperationPass.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace pass
-{
-
-class PermutationOperationPass : public LoweredOperationPass, public OperationVisitor
-{
-public:
- using LoweredOperationPass::LoweredOperationPass;
-
-public:
- std::string id() final { return "PermutationOperationPass"; }
-
-public:
- void callback(const OperationIndex &i, Operation &n) final;
-
-public:
- void visit(const operation::Add &) final;
- void visit(const operation::Comparison &) final;
- void visit(const operation::Concat &) final;
- void visit(const operation::Div &) final;
- void visit(const operation::LogicalAnd &) final;
- void visit(const operation::LogicalNot &) final;
- void visit(const operation::LogicalOr &) final;
- void visit(const operation::Max &) final;
- void visit(const operation::Min &) final;
- void visit(const operation::Mul &) final;
- void visit(const operation::Pack &) final;
- void visit(const operation::PReLU &) final;
- void visit(const operation::SquaredDifference &) final;
- void visit(const operation::Sub &) final;
- void visit(const operation::Unpack &) final;
- void visit(const operation::FullyConnected &) final;
- void visit(const operation::Gather &) final;
- void visit(const operation::Reshape &) final;
-
-private:
- void applyExpandRanks(const Operation &);
- void changeToKeepLayout(const Operation &);
-};
-
-} // namespace pass
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__
#include <unordered_map>
#include <json/json.h>
#include <assert.h>
+#include <utility>
+#include <map>
+#include <set>
+#include <stdint.h>
+// json type for Chrome Event Trace
namespace
{
} // namespace
+// md table type
+namespace
+{
+
+void writeMDTableRow(std::ostream &os, const std::vector<std::string> &list)
+{
+ os << "| ";
+ for (auto &key : list)
+ {
+ os << key << " | ";
+ }
+ os << "\n";
+}
+
+struct MDContent
+{
+ std::string name;
+ uint64_t begin_ts;
+ uint64_t end_ts;
+ uint32_t min_rss;
+ uint32_t max_rss;
+ uint32_t min_page_reclaims;
+ uint32_t max_page_reclaims;
+
+ MDContent()
+ : begin_ts(0), end_ts(0), min_rss(UINT32_MAX), max_rss(0), min_page_reclaims(UINT32_MAX),
+ max_page_reclaims(0)
+ {
+ // DO NOTHING
+ }
+
+ virtual ~MDContent() = default;
+
+ void updateRss(uint32_t rss)
+ {
+ if (min_rss == UINT32_MAX)
+ min_rss = rss;
+ if (max_rss == 0)
+ max_rss = rss;
+
+ if (min_rss > rss)
+ min_rss = rss;
+ else if (max_rss < rss)
+ max_rss = rss;
+ }
+
+ void updateMinflt(uint32_t minflt)
+ {
+ if (min_page_reclaims == UINT32_MAX)
+ min_page_reclaims = minflt;
+ if (max_page_reclaims == 0)
+ max_page_reclaims = minflt;
+
+ if (min_page_reclaims > minflt)
+ min_page_reclaims = minflt;
+ else if (max_page_reclaims < minflt)
+ max_page_reclaims = minflt;
+ }
+
+ virtual void write(std::ostream &os) const = 0;
+};
+
+struct OpSeq : public MDContent
+{
+ std::string backend;
+ uint64_t graph_latency;
+
+ struct OpSeqCmp
+ {
+ bool operator()(const OpSeq &lhs, const OpSeq &rhs) const
+ {
+ return lhs.begin_ts < rhs.begin_ts;
+ }
+ bool operator()(const OpSeq &lhs, const OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
+ bool operator()(OpSeq &lhs, OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
+ };
+
+ void write(std::ostream &os) const override
+ {
+ uint64_t opseq_latency = end_ts - begin_ts;
+ double opseq_per = static_cast<double>(opseq_latency) / graph_latency * 100.0;
+ writeMDTableRow(os, {name, backend, std::to_string(opseq_latency), std::to_string(opseq_per),
+ std::to_string(min_rss), std::to_string(max_rss),
+ std::to_string(min_page_reclaims), std::to_string(max_page_reclaims)});
+ }
+};
+
+struct Graph : public MDContent
+{
+ std::set<OpSeq, OpSeq::OpSeqCmp> opseqs;
+
+ void setOpSeqs(const std::map<std::string, OpSeq> &name_to_opseq)
+ {
+ uint64_t graph_latency = end_ts - begin_ts;
+ for (auto it : name_to_opseq)
+ {
+ auto opseq = it.second;
+ opseq.graph_latency = graph_latency;
+
+ opseqs.insert(opseq);
+
+ updateRss(opseq.min_rss);
+ updateRss(opseq.max_rss);
+ updateMinflt(opseq.min_page_reclaims);
+ updateMinflt(opseq.max_page_reclaims);
+ }
+ }
+
+ void write(std::ostream &os) const override
+ {
+ static std::vector<std::string> graph_headers{"latency(us)", "rss_min(kb)", "rss_max(kb)",
+ "page_reclaims_min", "page_reclaims_max"};
+
+ static std::vector<std::string> graph_headers_line{"-----------", "-------", "-------",
+ "-----------------", "-----------------"};
+
+ // Graph's Header
+ writeMDTableRow(os, graph_headers);
+ writeMDTableRow(os, graph_headers_line);
+
+ // Graph's contents
+ writeMDTableRow(os, {std::to_string(end_ts - begin_ts), std::to_string(min_rss),
+ std::to_string(max_rss), std::to_string(min_page_reclaims),
+ std::to_string(max_page_reclaims)});
+
+ os << "\n";
+
+ static std::vector<std::string> opseq_headers{
+ "OpSeq name", "backend", "latency(us)", "latency(%)",
+ "rss_min(kb)", "rss_max(kb)", "page_reclaims_min", "page_reclaims_max"};
+
+ static std::vector<std::string> opseq_headers_line{
+ "----------", "-------", "-----------", "-----------",
+ "-------", "-------", "-----------------", "-----------------"};
+
+ os << "## OpSequences \n";
+
+ // OpSeq's Header
+ writeMDTableRow(os, opseq_headers);
+ writeMDTableRow(os, opseq_headers_line);
+
+ // OpSeq's contents
+ for (auto opseq : opseqs)
+ {
+ opseq.write(os);
+ }
+
+ os << "\n";
+ }
+};
+
+struct MDTableBuilder
+{
+ MDTableBuilder(const std::vector<DurationEvent> &duration_events,
+ const std::vector<CounterEvent> &counter_events)
+ : _duration_events(duration_events), _counter_events(counter_events)
+ {
+ for (const auto &evt : _counter_events)
+ {
+ uint64_t ts = std::stoull(evt.ts);
+ auto &name = evt.name;
+ assert(name.compare("maxrss") == 0 || name.compare("minflt") == 0);
+ assert(evt.values.size() == 1);
+ auto &val = evt.values.begin()->second;
+ if (_ts_to_values.find(ts) == _ts_to_values.end())
+ {
+ std::pair<uint32_t, uint32_t> values;
+ if (name.compare("maxrss") == 0)
+ values.first = std::stoul(val);
+ else
+ values.second = std::stoul(val);
+ _ts_to_values.insert({ts, values});
+ }
+ else
+ {
+ auto &values = _ts_to_values.at(ts);
+ if (name.compare("maxrss") == 0)
+ values.first = std::stoul(val);
+ else
+ values.second = std::stoul(val);
+ }
+ }
+ }
+
+ MDTableBuilder &build()
+ {
+ for (auto &it : divideGraph())
+ {
+ size_t begin_idx = it.first;
+ size_t end_idx = it.second;
+ std::map<std::string, OpSeq> name_to_opseq;
+ for (size_t i = begin_idx + 1; i < end_idx; ++i)
+ {
+ const auto &evt = _duration_events[i];
+ assert(evt.name.compare("Graph") != 0);
+ assert(evt.ph.compare("B") == 0 || evt.ph.compare("E") == 0);
+ if (evt.ph.compare("B") == 0)
+ {
+ assert(name_to_opseq.find(evt.name) == name_to_opseq.end());
+ name_to_opseq.insert({evt.name, makeOpSeq(evt)});
+ }
+ else
+ {
+ assert(name_to_opseq.find(evt.name) != name_to_opseq.end());
+ auto &opseq = name_to_opseq.at(evt.name);
+ updateOpSeq(opseq, evt);
+ }
+ }
+
+ _graphs.emplace_back(makeGraph(begin_idx, end_idx, name_to_opseq));
+ }
+
+ return *this;
+ }
+
+ std::vector<std::pair<size_t, size_t>> divideGraph()
+ {
+ std::vector<std::pair<size_t, size_t>> graph_idx_list; // pair<begin_idx, end_idx>
+ for (size_t i = 0, begin_idx = 0; i < _duration_events.size(); ++i)
+ {
+ const auto &evt = _duration_events.at(i);
+ if (evt.name.compare("Graph") == 0)
+ {
+ if (evt.ph.compare("B") == 0)
+ begin_idx = i;
+ else
+ graph_idx_list.emplace_back(begin_idx, i);
+ }
+ }
+ return graph_idx_list;
+ }
+
+ OpSeq makeOpSeq(const DurationEvent &evt)
+ {
+ OpSeq opseq;
+ opseq.name = evt.name;
+ opseq.begin_ts = std::stoull(evt.ts);
+ opseq.updateRss(_ts_to_values.at(opseq.begin_ts).first);
+ opseq.updateMinflt(_ts_to_values.at(opseq.begin_ts).second);
+ opseq.backend = evt.tid;
+ return opseq;
+ }
+
+ void updateOpSeq(OpSeq &opseq, const DurationEvent &evt)
+ {
+ opseq.end_ts = std::stoull(evt.ts);
+ opseq.updateRss(_ts_to_values.at(opseq.end_ts).first);
+ opseq.updateMinflt(_ts_to_values.at(opseq.end_ts).second);
+ }
+
+ Graph makeGraph(size_t begin_idx, size_t end_idx,
+ const std::map<std::string, OpSeq> &name_to_opseq)
+ {
+ Graph graph;
+ graph.name = "Graph";
+ graph.begin_ts = std::stoull(_duration_events[begin_idx].ts);
+ graph.updateRss(_ts_to_values.at(graph.begin_ts).first);
+ graph.updateMinflt(_ts_to_values.at(graph.begin_ts).second);
+ graph.end_ts = std::stoull(_duration_events[end_idx].ts);
+ graph.updateRss(_ts_to_values.at(graph.end_ts).first);
+ graph.updateMinflt(_ts_to_values.at(graph.end_ts).second);
+ graph.setOpSeqs(name_to_opseq);
+ return graph;
+ }
+
+ void write(std::ostream &os)
+ {
+ // Write contents
+ for (size_t i = 0; i < _graphs.size(); ++i)
+ {
+ os << "# Graph " << i << "\n";
+ _graphs.at(i).write(os);
+ }
+ }
+
+ const std::vector<DurationEvent> &_duration_events;
+ const std::vector<CounterEvent> &_counter_events;
+ // timestamp to std::pair<maxrss, minflt>
+ std::unordered_map<uint64_t, std::pair<uint32_t, uint32_t>> _ts_to_values;
+ std::vector<Graph> _graphs;
+};
+
+} // namespace
+
void EventRecorder::emit(const DurationEvent &evt)
{
std::lock_guard<std::mutex> lock{_mu};
case WriteFormat::SNPE_BENCHMARK:
writeSNPEBenchmark(os);
break;
+ case WriteFormat::MD_TABLE:
+ writeMDTable(os);
+ break;
default:
assert(!"Invalid value");
break;
os << " ]\n";
os << "}\n";
}
+
+void EventRecorder::writeMDTable(std::ostream &os)
+{
+ MDTableBuilder(_duration_events, _counter_events).build().write(os);
+}
enum class WriteFormat
{
CHROME_TRACING,
- SNPE_BENCHMARK
+ SNPE_BENCHMARK,
+ MD_TABLE,
};
public:
private:
void writeSNPEBenchmark(std::ostream &os);
void writeChromeTrace(std::ostream &os);
+ void writeMDTable(std::ostream &os);
private:
std::mutex _mu;
#include "util/Utils.h"
#include "ir/InternalType.h"
#include "ir/Shape.h"
-#include "ir/operation/AvgPool2D.h"
-#include "ir/operation/MaxPool2D.h"
#include "util/ShapeInference.h"
#include "util/logging.h"
// Calculate output height and width of convolution-like operation
std::pair<int, int> calcConvLikeHeightAndWidth(const int in_h, const int in_w, const int ker_h,
const int ker_w, const ir::Padding pad,
- const ir::Stride stride)
+ const ir::Stride stride,
+ const ir::Dilation dilation = {1, 1})
{
int32_t out_h = 0, out_w = 0;
-
+ int32_t effective_filter_w_size = (ker_w - 1) * dilation.width_factor + 1;
+ int32_t effective_filter_h_size = (ker_h - 1) * dilation.height_factor + 1;
switch (pad.type)
{
case ir::PaddingType::SAME:
out_w = ceil_div(in_w, stride.horizontal);
break;
case ir::PaddingType::VALID:
- out_h = ceil_div(in_h - ker_h + 1, stride.vertical);
- out_w = ceil_div(in_w - ker_w + 1, stride.horizontal);
+ out_h = ceil_div(in_h - effective_filter_h_size + 1, stride.vertical);
+ out_w = ceil_div(in_w - effective_filter_w_size + 1, stride.horizontal);
break;
case ir::PaddingType::EXPLICIT:
- out_h = (in_h + pad.param.top + pad.param.bottom - ker_h) / stride.vertical + 1;
- out_w = (in_w + pad.param.left + pad.param.right - ker_w) / stride.horizontal + 1;
+ out_h =
+ (in_h + pad.param.top + pad.param.bottom - effective_filter_h_size) / stride.vertical + 1;
+ out_w =
+ (in_w + pad.param.left + pad.param.right - effective_filter_w_size) / stride.horizontal +
+ 1;
break;
default:
assert(false);
return out_shape;
}
-ir::Shape inferAvgPoolShape(const ir::Shape &in_shape, const ir::operation::AvgPool2D::Param ¶m,
- const ir::Layout layout)
-{
- assert(layout == ir::Layout::NHWC);
- auto ifm_shape = in_shape.asFeature(layout);
- const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
- param.padding, param.stride);
- // Pooling don't change number of channels and batch size
- return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C};
-}
-
ir::Shape inferReduceShape(const ir::Shape &input_shape, const std::vector<int> &axes,
bool keep_dims)
{
assert(ifm_shape.C == kf_shape.C);
const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, kf_shape.H, kf_shape.W,
- param.padding, param.stride);
+ param.padding, param.stride, param.dilation);
return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, kf_shape.N};
}
return out_shape;
}
-ir::Shape inferMaxPoolShape(const ir::Shape &in_shape, const ir::operation::MaxPool2D::Param ¶m,
- const ir::Layout layout)
-{
- assert(layout == ir::Layout::NHWC);
- auto ifm_shape = in_shape.asFeature(layout);
- const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
- param.padding, param.stride);
- // Pooling don't change number of channels and batch size
- return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C};
-}
-
ir::Shape inferOnehotShape(const ir::Shape &input_shape, const int depth, int axis)
{
assert(depth >= 0);
return ret;
}
+ir::Shape inferPoolShape(const ir::Shape &in_shape, const ir::operation::Pool2D::Param ¶m,
+ const ir::Layout layout)
+{
+ assert(layout == ir::Layout::NHWC);
+ auto ifm_shape = in_shape.asFeature(layout);
+ const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
+ param.padding, param.stride);
+ // Pooling don't change number of channels and batch size
+ return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C};
+}
+
ir::Shape inferResizeBilinearShape(const ir::Shape &in_shape, const int32_t output_height,
const int32_t output_width)
{
template <typename Param, typename OptionsType>
void loadStridesAndPaddings(Param ¶m, const OptionsType *options);
// Load Pool2D param
- template <typename Param> void loadPool2D(Param ¶m, const Pool2DOptions *options);
+ template <typename Param> void loadPool2DOptions(Param ¶m, const Pool2DOptions *options);
// Operations
void loadConv2D(const Operator *op, ir::Graph &subg);
void loadDepthwiseConv2D(const Operator *op, ir::Graph &subg);
void loadTransposeConv(const Operator *op, ir::Graph &subg);
- void loadAvgPool2D(const Operator *op, ir::Graph &subg);
+ void loadPool2D(const Operator *op, ir::Graph &subg, ir::operation::Pool2D::PoolType op_type);
void loadReshape(const Operator *op, ir::Graph &subg);
void loadSoftmax(const Operator *op, ir::Graph &subg);
- void loadMaxPool2D(const Operator *op, ir::Graph &subg);
void loadConcatenation(const Operator *op, ir::Graph &subg);
void loadFill(const Operator *op, ir::Graph &subg);
void loadFC(const Operator *op, ir::Graph &subg);
- void loadAdd(const Operator *op, ir::Graph &subg);
- void loadSub(const Operator *op, ir::Graph &subg);
- void loadMul(const Operator *op, ir::Graph &subg);
- void loadDiv(const Operator *op, ir::Graph &subg);
+ template <ir::operation::BinaryArithmetic::ArithmeticType op_type>
+ void loadBinaryArithmetic(const Operator *op, ir::Graph &subg);
+ void loadAddV2(const Operator *op, ir::Graph &subg);
void loadPack(const Operator *op, ir::Graph &subg);
- void loadRelu(const Operator *op, ir::Graph &subg);
- void loadRelu6(const Operator *op, ir::Graph &subg);
void loadResizeBilinear(const Operator *op, ir::Graph &subg);
- void loadRsqrt(const Operator *op, ir::Graph &subg);
+ void loadResizeNearestNeighbor(const Operator *op, ir::Graph &subg);
void loadSelect(const Operator *op, ir::Graph &subg);
- void loadSqrt(const Operator *op, ir::Graph &subg);
void loadSquaredDifference(const Operator *op, ir::Graph &subg);
- void loadTanh(const Operator *op, ir::Graph &subg);
void loadTranspose(const Operator *op, ir::Graph &subg);
- void loadReduce(const Operator *op, ir::Graph &subg,
- ir::operation::Reduce::ReduceType reduce_type);
+ template <ir::operation::Reduce::ReduceType reduce_type>
+ void loadReduce(const Operator *op, ir::Graph &subg);
void loadReduceAll(const Operator *op, ir::Graph &subg);
void loadReverseV2(const Operator *op, ir::Graph &subg);
void loadPad(const Operator *op, ir::Graph &subg);
- void loadLogistic(const Operator *op, ir::Graph &subg);
- void loadExp(const Operator *op, ir::Graph &subg);
+ void loadElementwiseActivation(const Operator *op, ir::Graph &subg,
+ ir::operation::ElementwiseActivation::Type op_type,
+ float alpha = 0.f, float beta = 0.f);
+ template <ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type>
+ void loadElementwiseBinary(const Operator *op, ir::Graph &subg);
+ void loadElementwiseUnary(const Operator *op, ir::Graph &subg,
+ ir::operation::ElementwiseUnary::Type op_type);
void loadExpandDims(const Operator *op, ir::Graph &subg);
void loadGather(const Operator *op, ir::Graph &subg);
void loadCustom(const Operator *op, ir::Graph &subg);
void loadSlice(const Operator *op, ir::Graph &subg);
void loadStridedSlice(const Operator *op, ir::Graph &subg);
void loadUnpack(const Operator *op, ir::Graph &subg);
- void loadMinimum(const Operator *op, ir::Graph &subg);
- void loadMaximum(const Operator *op, ir::Graph &subg);
- void loadCast(const Operator *op, ir::Graph &subg);
void loadComparison(const Operator *op, ir::Graph &subg);
void loadEinsum(const Operator *op, ir::Graph &subg);
void loadOneHot(const Operator *op, ir::Graph &subg);
- void loadAbs(const Operator *op, ir::Graph &subg);
- void loadCos(const Operator *op, ir::Graph &subg);
- void loadSin(const Operator *op, ir::Graph &subg);
void loadShape(const Operator *op, ir::Graph &subg);
void loadIf(const Operator *op, ir::Graph &subg);
void loadWhile(const Operator *op, ir::Graph &subg);
- void loadNeg(const Operator *op, ir::Graph &subg);
- void loadLog(const Operator *op, ir::Graph &subg);
void loadArgMax(const Operator *op, ir::Graph &subg);
- void loadRound(const Operator *op, ir::Graph &subg);
void loadPow(const Operator *op, ir::Graph &subg);
- void loadLogicalNot(const Operator *op, ir::Graph &subg);
- void loadZerosLike(const Operator *op, ir::Graph &subg);
void loadTile(const Operator *op, ir::Graph &subg);
- void loadLogicalOr(const Operator *op, ir::Graph &subg);
void loadRange(const Operator *op, ir::Graph &subg);
+ void loadRank(const Operator *op, ir::Graph &subg);
void loadMatrixBandPart(const Operator *op, ir::Graph &subg);
void loadBroadcastTo(const Operator *op, ir::Graph &subg);
void loadFusedBatchNorm(const Operator *op, ir::Graph &subg);
void loadLogSoftmax(const Operator *op, ir::Graph &subg);
- void loadQuantize(const Operator *op, ir::Graph &subg);
void loadSpaceToDepth(const Operator *op, ir::Graph &subg);
void loadStatelessRandomUniform(const Operator *op, ir::Graph &subg);
+ void loadL2Normalization(const Operator *op, ir::Graph &subg);
+ void loadLeakyRelu(const Operator *op, ir::Graph &subg);
protected:
// Base address for mapped region for loading (if needed)
const Model *_model;
// Maps Tensor indices to onert Operands.
std::vector<ir::OperandIndex> _tensor_to_operand;
+ std::unordered_map<ir::OperandIndex, std::string> _tensor_names;
// Verifier
std::unique_ptr<Verifier> _verifier;
};
subg.setOperandValue(operand_index, std::move(data_obj));
}
- // Name unused
- // auto name = tensor->name();
+ _tensor_names.emplace(operand_index, tensor->name()->str());
+
// Variablie
if (tensor->is_variable())
throw std::runtime_error("Variable tensor not supported!");
template <typename LoaderDomain, typename SpecificLoader>
template <typename Param>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2D(Param ¶m,
- const Pool2DOptions *options)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2DOptions(Param ¶m,
+ const Pool2DOptions *options)
{
// Strides and Paddings
loadStridesAndPaddings(param, options);
const auto *options = op->builtin_options_as_Conv2DOptions();
param.activation = convertActivation(options->fused_activation_function());
loadStridesAndPaddings(param, options);
- // Dilation h/w factor unused
+
+ param.dilation.width_factor = options->dilation_w_factor();
+ param.dilation.height_factor = options->dilation_h_factor();
+
std::unique_ptr<ir::Operation> new_op(new ir::operation::Conv2D(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadAvgPool2D(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2D(const Operator *op, ir::Graph &subg,
+ ir::operation::Pool2D::PoolType op_type)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- ir::operation::AvgPool2D::Param param;
+ ir::operation::Pool2D::Param param;
+ param.op_type = op_type;
const auto *options = op->builtin_options_as_Pool2DOptions();
- loadPool2D(param, options);
+ loadPool2DOptions(param, options);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::AvgPool2D(inputs, outputs, param));
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Pool2D(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMaxPool2D(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::MaxPool2D::Param param;
- const auto *options = op->builtin_options_as_Pool2DOptions();
-
- loadPool2D(param, options);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::MaxPool2D(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadConcatenation(const Operator *op,
ir::Graph &subg)
{
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadAdd(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::Add::Param param;
- const auto *options = op->builtin_options_as_AddOptions();
-
- param.activation = convertActivation(options->fused_activation_function());
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Add(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSub(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::Sub::Param param;
- const auto *options = op->builtin_options_as_SubOptions();
-
- param.activation = convertActivation(options->fused_activation_function());
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Sub(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMul(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadAddV2(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- ir::operation::Mul::Param param;
- const auto *options = op->builtin_options_as_MulOptions();
+ ir::operation::BinaryArithmetic::Param param;
+ param.arithmetic_type = ir::operation::BinaryArithmetic::ArithmeticType::ADD;
- param.activation = convertActivation(options->fused_activation_function());
+ if (op->custom_options() == nullptr)
+ {
+ param.activation = ir::Activation::NONE;
+ }
+ else
+ {
+ size_t custom_op_data_size = op->custom_options()->size();
+ auto custom_op_data = op->custom_options()->Data();
+ auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
+ auto attr_map = data_root.AsMap();
+ const auto fused_activation_func = static_cast<typename LoaderDomain::ActivationFunctionType>(
+ attr_map["fused_activation_function"].AsInt8());
+ param.activation = convertActivation(fused_activation_func);
+ }
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Mul(inputs, outputs, param));
+ std::unique_ptr<ir::Operation> new_op(
+ new ir::operation::BinaryArithmetic(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadDiv(const Operator *op, ir::Graph &subg)
+template <ir::operation::BinaryArithmetic::ArithmeticType op_type>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadBinaryArithmetic(const Operator *op,
+ ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- ir::operation::Div::Param param;
- const auto *options = op->builtin_options_as_DivOptions();
-
- param.activation = convertActivation(options->fused_activation_function());
+ ir::operation::BinaryArithmetic::Param param;
+ param.arithmetic_type = op_type;
+ switch (op_type)
+ {
+ case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+ {
+ const auto *add_options = op->builtin_options_as_AddOptions();
+ param.activation = convertActivation(add_options->fused_activation_function());
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+ {
+ const auto *sub_options = op->builtin_options_as_SubOptions();
+ param.activation = convertActivation(sub_options->fused_activation_function());
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+ {
+ const auto *mul_options = op->builtin_options_as_MulOptions();
+ param.activation = convertActivation(mul_options->fused_activation_function());
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+ {
+ const auto *div_options = op->builtin_options_as_DivOptions();
+ param.activation = convertActivation(div_options->fused_activation_function());
+ break;
+ }
+ default:
+ assert(false &&
+ "The function 'loadBinaryArithmetic' supports only BinaryArithmetic operations");
+ break;
+ }
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Div(inputs, outputs, param));
+ std::unique_ptr<ir::Operation> new_op(
+ new ir::operation::BinaryArithmetic(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRelu(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseActivation(
+ const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type,
+ float alpha, float beta)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::ReLU(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
+ ir::operation::ElementwiseActivation::Param param;
+ param.op_type = op_type;
+ param.alpha = alpha;
+ param.beta = beta;
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRelu6(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::ReLU6(inputs, outputs));
+ std::unique_ptr<ir::Operation> new_op(
+ new ir::operation::ElementwiseActivation(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRsqrt(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadResizeNearestNeighbor(const Operator *op,
+ ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
+ auto input = inputs.at(0);
+ auto size = inputs.at(1);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::RSQRT(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
+ if (!subg.operands().at(size).isConstant())
+ throw std::runtime_error("ResizeNearestNeighbor: non-constant 'size' is not supported.");
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSelect(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
+ std::vector<std::int32_t> size_v = subg.operands().at(size).template asVector<std::int32_t>();
- loadOperationIO(op, inputs, outputs);
+ ir::operation::ResizeNearestNeighbor::Param param;
+ param.height_out = size_v[0];
+ param.width_out = size_v[1];
+ param.align_corners = op->builtin_options_as_ResizeNearestNeighborOptions()->align_corners();
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Select(inputs, outputs));
+ std::unique_ptr<ir::Operation> new_op(
+ new ir::operation::ResizeNearestNeighbor({input}, outputs, param));
subg.addOperation(std::move(new_op));
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSqrt(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadSelect(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::SQRT(inputs, outputs));
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Select(inputs, outputs));
subg.addOperation(std::move(new_op));
}
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadTanh(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Tanh(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadTranspose(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadReduce(
- const Operator *op, ir::Graph &subg, ir::operation::Reduce::ReduceType reduce_type)
+template <ir::operation::Reduce::ReduceType reduce_type>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadReduce(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLogistic(const Operator *op, ir::Graph &subg)
+template <ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseBinary(const Operator *op,
+ ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Logistic(inputs, outputs));
+ ir::operation::ElementwiseBinary::Param param;
+ param.op_type = op_type;
+
+ std::unique_ptr<ir::Operation> new_op(
+ new ir::operation::ElementwiseBinary(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadExp(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseUnary(
+ const Operator *op, ir::Graph &subg, ir::operation::ElementwiseUnary::Type op_type)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Exp(inputs, outputs));
+ ir::operation::ElementwiseUnary::Param param;
+ param.op_type = op_type;
+
+ if (op_type == ir::operation::ElementwiseUnary::Type::CAST)
+ {
+ auto qasymm8ToUint8 = [](ir::Operand &operand) {
+ if (operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM)
+ {
+ operand.type(ir::DataType::UINT8);
+ }
+ };
+ qasymm8ToUint8(subg.operands().at(inputs.at(ir::operation::ElementwiseUnary::Input::INPUT)));
+ qasymm8ToUint8(subg.operands().at(outputs.at(0)));
+ }
+
+ std::unique_ptr<ir::Operation> new_op(
+ new ir::operation::ElementwiseUnary(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
}
template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadRank(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+ loadOperationIO(op, inputs, outputs);
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Rank(inputs, outputs));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
Einsum,
BroadcastTo,
FusedBatchNorm,
- StatelessRandomUniform
+ StatelessRandomUniform,
+ Erf
};
// Mapping from custom op name string to BuiltinOP enum
{"FusedBatchNormV3", BuiltinOP::FusedBatchNorm},
{"BroadcastTo", BuiltinOP::BroadcastTo},
{"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform},
+ {"Erf", BuiltinOP::Erf},
};
try
switch (custom_op_id)
{
case BuiltinOP::AddV2:
- loadAdd(op, subg);
+ loadAddV2(op, subg);
break;
case BuiltinOP::ReduceAll:
loadReduceAll(op, subg);
case BuiltinOP::StatelessRandomUniform:
loadStatelessRandomUniform(op, subg);
break;
+ case BuiltinOP::Erf:
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ERF);
+ break;
default:
throw std::runtime_error{
"Loader: Custom OP map is defined but operation loader function is not defined"};
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMinimum(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Min(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMaximum(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Max(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadCast(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- auto qasymm8ToUint8 = [](ir::Operand &operand) {
- if (operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM)
- {
- operand.type(ir::DataType::UINT8);
- }
- };
- qasymm8ToUint8(subg.operands().at(inputs.at(ir::operation::Cast::Input::INPUT)));
- qasymm8ToUint8(subg.operands().at(outputs.at(0)));
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Cast(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadComparison(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadAbs(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Abs(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadCos(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Cos(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSin(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Sin(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadShape(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadNeg(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Neg(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadArgMax(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLog(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Log(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRound(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Round(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadPow(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLogicalNot(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::LogicalNot(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadZerosLike(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::ZerosLike(inputs, outputs));
-
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadRange(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLogicalOr(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::LogicalOr(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadLogSoftmax(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadQuantize(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadL2Normalization(const Operator *op,
+ ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Quantize(inputs, outputs));
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::L2Normalization(inputs, outputs));
subg.addOperation(std::move(new_op));
}
template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadLeakyRelu(const Operator *op, ir::Graph &subg)
+{
+ float alpha = op->builtin_options_as_LeakyReluOptions()->alpha();
+ loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::LEAKY_RELU, alpha,
+ 1.f);
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op, ir::Graph &subg)
{
const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
loadConv2D(op, subg);
return;
case BuiltinOperator::BuiltinOperator_AVERAGE_POOL_2D:
- loadAvgPool2D(op, subg);
+ loadPool2D(op, subg, ir::operation::Pool2D::PoolType::AVG);
return;
case BuiltinOperator::BuiltinOperator_DEPTHWISE_CONV_2D:
loadDepthwiseConv2D(op, subg);
loadSoftmax(op, subg);
return;
case BuiltinOperator::BuiltinOperator_MAX_POOL_2D:
- loadMaxPool2D(op, subg);
+ loadPool2D(op, subg, ir::operation::Pool2D::PoolType::MAX);
return;
case BuiltinOperator::BuiltinOperator_CONCATENATION:
loadConcatenation(op, subg);
loadFC(op, subg);
return;
case BuiltinOperator::BuiltinOperator_ADD:
- loadAdd(op, subg);
+ loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::ADD>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_SUB:
- loadSub(op, subg);
+ loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::SUB>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_MUL:
- loadMul(op, subg);
+ loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::MUL>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_DIV:
- loadDiv(op, subg);
+ loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::DIV>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_PACK:
loadPack(op, subg);
return;
case BuiltinOperator::BuiltinOperator_RELU:
- loadRelu(op, subg);
+ loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU,
+ ir::operation::ElementwiseActivation::infinity, 0.f);
+ return;
+ case BuiltinOperator::BuiltinOperator_RELU_N1_TO_1:
+ loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU, 1.f,
+ -1.f);
return;
case BuiltinOperator::BuiltinOperator_RELU6:
- loadRelu6(op, subg);
+ loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU, 6.f,
+ 0.f);
return;
case BuiltinOperator::BuiltinOperator_RESIZE_BILINEAR:
loadResizeBilinear(op, subg);
return;
+ case BuiltinOperator::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR:
+ loadResizeNearestNeighbor(op, subg);
+ return;
case BuiltinOperator::BuiltinOperator_RSQRT:
- loadRsqrt(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::RSQRT);
return;
case BuiltinOperator::BuiltinOperator_SELECT:
loadSelect(op, subg);
loadSelect(op, subg);
return;
case BuiltinOperator::BuiltinOperator_SQRT:
- loadSqrt(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SQRT);
return;
case BuiltinOperator::BuiltinOperator_SQUARED_DIFFERENCE:
loadSquaredDifference(op, subg);
return;
case BuiltinOperator::BuiltinOperator_TANH:
- loadTanh(op, subg);
+ loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::TANH, 1.f,
+ 1.f);
return;
case BuiltinOperator::BuiltinOperator_TRANSPOSE:
loadTranspose(op, subg);
return;
case BuiltinOperator::BuiltinOperator_MEAN:
- loadReduce(op, subg, ir::operation::Reduce::ReduceType::MEAN);
+ loadReduce<ir::operation::Reduce::ReduceType::MEAN>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_REDUCE_ANY:
- loadReduce(op, subg, ir::operation::Reduce::ReduceType::ANY);
+ loadReduce<ir::operation::Reduce::ReduceType::ANY>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_REDUCE_MAX:
- loadReduce(op, subg, ir::operation::Reduce::ReduceType::MAX);
+ loadReduce<ir::operation::Reduce::ReduceType::MAX>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_REVERSE_V2:
loadReverseV2(op, subg);
return;
case BuiltinOperator::BuiltinOperator_PAD:
+ case BuiltinOperator::BuiltinOperator_PADV2:
loadPad(op, subg);
return;
case BuiltinOperator::BuiltinOperator_LOGISTIC:
- loadLogistic(op, subg);
+ loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::LOGISTIC);
return;
case BuiltinOperator::BuiltinOperator_EXP:
- loadExp(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::EXP);
return;
case BuiltinOperator::BuiltinOperator_EXPAND_DIMS:
loadExpandDims(op, subg);
loadBatchToSpaceND(op, subg);
return;
case BuiltinOperator::BuiltinOperator_SUM:
- loadReduce(op, subg, ir::operation::Reduce::ReduceType::SUM);
+ loadReduce<ir::operation::Reduce::ReduceType::SUM>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_CUSTOM:
loadCustom(op, subg);
loadUnpack(op, subg);
return;
case BuiltinOperator::BuiltinOperator_MINIMUM:
- loadMinimum(op, subg);
+ loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_MAXIMUM:
- loadMaximum(op, subg);
+ loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_CAST:
- loadCast(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::CAST);
return;
case BuiltinOperator::BuiltinOperator_EQUAL:
case BuiltinOperator::BuiltinOperator_NOT_EQUAL:
loadOneHot(op, subg);
return;
case BuiltinOperator::BuiltinOperator_ABS:
- loadAbs(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ABS);
return;
case BuiltinOperator::BuiltinOperator_COS:
- loadCos(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::COS);
return;
case BuiltinOperator::BuiltinOperator_SIN:
- loadSin(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SIN);
return;
case BuiltinOperator::BuiltinOperator_SHAPE:
loadShape(op, subg);
return;
case BuiltinOperator::BuiltinOperator_REDUCE_PROD:
- loadReduce(op, subg, ir::operation::Reduce::ReduceType::PROD);
+ loadReduce<ir::operation::Reduce::ReduceType::PROD>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_IF:
loadIf(op, subg);
loadWhile(op, subg);
return;
case BuiltinOperator::BuiltinOperator_NEG:
- loadNeg(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::NEG);
return;
case BuiltinOperator::BuiltinOperator_ARG_MAX:
loadArgMax(op, subg);
return;
case BuiltinOperator::BuiltinOperator_LOG:
- loadLog(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOG);
return;
case BuiltinOperator::BuiltinOperator_ROUND:
- loadRound(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ROUND);
return;
case BuiltinOperator::BuiltinOperator_POW:
loadPow(op, subg);
return;
case BuiltinOperator::BuiltinOperator_LOGICAL_NOT:
- loadLogicalNot(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOGICAL_NOT);
return;
case BuiltinOperator::BuiltinOperator_LOGICAL_OR:
- loadLogicalOr(op, subg);
+ loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR>(
+ op, subg);
return;
case BuiltinOperator::BuiltinOperator_FILL:
loadFill(op, subg);
return;
case BuiltinOperator::BuiltinOperator_ZEROS_LIKE:
- loadZerosLike(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ZEROS_LIKE);
return;
case BuiltinOperator::BuiltinOperator_TILE:
loadTile(op, subg);
loadLogSoftmax(op, subg);
return;
case BuiltinOperator::BuiltinOperator_QUANTIZE:
- loadQuantize(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::QUANTIZE);
return;
case BuiltinOperator::BuiltinOperator_SPACE_TO_DEPTH:
loadSpaceToDepth(op, subg);
return;
+ case BuiltinOperator::BuiltinOperator_L2_NORMALIZATION:
+ loadL2Normalization(op, subg);
+ break;
+ case BuiltinOperator::BuiltinOperator_LEAKY_RELU:
+ loadLeakyRelu(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_RANK:
+ loadRank(op, subg);
+ return;
default:
throw std::runtime_error(
std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
// Set inputs
for (const std::int32_t input_ind : *circle_subg->inputs())
{
- subg->addInput(tensorIdxToOperandIdx(input_ind));
+ subg->addInput(tensorIdxToOperandIdx(input_ind),
+ _tensor_names.at(_tensor_to_operand[input_ind]));
}
// Set outputs
for (const std::int32_t output_ind : *circle_subg->outputs())
{
- subg->addOutput(tensorIdxToOperandIdx(output_ind));
+ subg->addOutput(tensorIdxToOperandIdx(output_ind),
+ _tensor_names.at(_tensor_to_operand[output_ind]));
}
// Create operations
for (const auto *op : *circle_subg->operators())
}
OperationFactory::Generator
+getElementwiseActivationGenerator(const onert::ir::operation::ElementwiseActivation::Type op_type,
+ float alpha = 0.f, float beta = 0.f)
+{
+ return [op_type, alpha, beta](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 1);
+ assert(init_param.output_count == 1);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::ElementwiseActivation::Param param;
+ param.op_type = op_type;
+ param.alpha = alpha;
+ param.beta = beta;
+
+ return new operation::ElementwiseActivation{inputs, outputs, param};
+ };
+}
+
+OperationFactory::Generator getElementwiseBinaryGenerator(
+ const onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type)
+{
+ return [op_type](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 2);
+ assert(init_param.output_count == 1);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Lefthand side operand
+ // 1 -> Righthand side operand
+
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::ElementwiseBinary::Param param;
+ param.op_type = op_type;
+
+ return new operation::ElementwiseBinary{inputs, outputs, param};
+ };
+}
+
+OperationFactory::Generator
+getElementwiseUnaryGenerator(const onert::ir::operation::ElementwiseUnary::Type op_type)
+{
+ return [op_type](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 1);
+ assert(init_param.output_count == 1);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::ElementwiseUnary::Param param;
+ param.op_type = op_type;
+
+ if (op_type == operation::ElementwiseUnary::Type::CAST)
+ {
+ // NNAPI uses QUANT_UINT8_ASYMM to represent UINT8 type for ANEURALNETWORKS_CAST's
+ // input/output
+ if (operands.at(inputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM)
+ {
+ replaceDataType(operands, inputs.at(0), DataType::UINT8);
+ }
+ if (operands.at(outputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM)
+ {
+ replaceDataType(operands, outputs.at(0), DataType::UINT8);
+ }
+ }
+
+ return new operation::ElementwiseUnary{inputs, outputs, param};
+ };
+}
+
+OperationFactory::Generator
+getBinaryArithmeticGenerator(const onert::ir::operation::BinaryArithmetic::ArithmeticType op_type)
+{
+ return [op_type](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 3);
+ assert(init_param.output_count == 1);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Lefthand side operand
+ // 1 -> Righthand side operand
+
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::BinaryArithmetic::Param param;
+ param.arithmetic_type = op_type;
+ const auto activation_index = OperandIndex{init_param.inputs[2]};
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+
+ return new operation::BinaryArithmetic{inputs, outputs, param};
+ };
+}
+
+OperationFactory::Generator
+getPool2DGenerator(const onert::ir::operation::Pool2D::PoolType pool_type)
+{
+ return [pool_type](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 7 || init_param.input_count == 10);
+ assert(init_param.output_count == 1);
+
+ // In common
+ // 0 -> IFM Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::Pool2D::Param param;
+ param.op_type = pool_type;
+ if (init_param.input_count == 7) // support implicit padding
+ {
+ // Each input should be interpreted as follows:
+ //
+ // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
+ // 2 -> Horizontal (over width) Stride Index
+ // 3 -> Vertial (over height) Stride Index
+ // 4 -> Filter Width Index
+ // 5 -> Filter Height Index
+ // 6 -> FuseCode (activation) Index
+
+ const auto padding_index = OperandIndex{init_param.inputs[1]};
+ const auto hstride_index = OperandIndex{init_param.inputs[2]};
+ const auto vstride_index = OperandIndex{init_param.inputs[3]};
+ const auto kw_index = OperandIndex{init_param.inputs[4]};
+ const auto kh_index = OperandIndex{init_param.inputs[5]};
+ const auto activation_index = OperandIndex{init_param.inputs[6]};
+
+ param.padding.type =
+ NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+ param.stride = makeStride(operands, hstride_index, vstride_index);
+ param.kw = getUint32Scalar(operands, kw_index);
+ param.kh = operands.at(kh_index).asScalar<uint32_t>();
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ }
+ else // support explicit padding
+ {
+ // Each input should be interpreted as follows:
+ //
+ // 1 -> Padding_left index
+ // 2 -> Padding_right index
+ // 3 -> Padding_top index
+ // 4 -> Padding_bottom index
+ // 5 -> Horizontal (over width) Stride Index
+ // 6 -> Vertial (over height) Stride Index
+ // 7 -> Filter Width Index
+ // 8 -> Filter Height Index
+ // 9 -> FuseCode (activation) Index
+
+ const auto padding_left_index = OperandIndex{init_param.inputs[1]};
+ const auto padding_right_index = OperandIndex{init_param.inputs[2]};
+ const auto padding_top_index = OperandIndex{init_param.inputs[3]};
+ const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
+ const auto hstride_index = OperandIndex{init_param.inputs[5]};
+ const auto vstride_index = OperandIndex{init_param.inputs[6]};
+ const auto kw_index = OperandIndex{init_param.inputs[7]};
+ const auto kh_index = OperandIndex{init_param.inputs[8]};
+ const auto activation_index = OperandIndex{init_param.inputs[9]};
+
+ param.padding.type = PaddingType::EXPLICIT;
+ param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
+ padding_top_index, padding_bottom_index);
+ param.stride = makeStride(operands, hstride_index, vstride_index);
+ param.kw = getUint32Scalar(operands, kw_index);
+ param.kh = getUint32Scalar(operands, kh_index);
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ }
+ return new operation::Pool2D{inputs, outputs, param};
+ };
+}
+
+OperationFactory::Generator
getReduceGenerator(const onert::ir::operation::Reduce::ReduceType reduce_type)
{
return [reduce_type](const OperationFactory::Param &init_param, Operands &operands) {
return new T{inputs, outputs};
}
-// A generator function for binary ops with no params
-template <typename T>
-Operation *createPool2DOp(const OperationFactory::Param &init_param, Operands &operands)
+OperationFactory::Generator getComparisonGenerator(operation::Comparison::ComparisonType type)
{
- assert(init_param.input_count == 7 || init_param.input_count == 10);
- assert(init_param.output_count == 1);
+ return [type](const OperationFactory::Param &init_param, Operands &) -> Operation * {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
- // In common
- // 0 -> IFM Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs{init_param.outputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
- typename T::Param param;
- if (init_param.input_count == 7) // support implicit padding
- {
// Each input should be interpreted as follows:
//
- // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
- // 2 -> Horizontal (over width) Stride Index
- // 3 -> Vertial (over height) Stride Index
- // 4 -> Filter Width Index
- // 5 -> Filter Height Index
- // 6 -> FuseCode (activation) Index
-
- const auto padding_index = OperandIndex{init_param.inputs[1]};
- const auto hstride_index = OperandIndex{init_param.inputs[2]};
- const auto vstride_index = OperandIndex{init_param.inputs[3]};
- const auto kw_index = OperandIndex{init_param.inputs[4]};
- const auto kh_index = OperandIndex{init_param.inputs[5]};
- const auto activation_index = OperandIndex{init_param.inputs[6]};
+ // 0 -> input0 Tensor Index
+ // 1 -> input1 Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
- param.padding.type =
- NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.kw = getUint32Scalar(operands, kw_index);
- param.kh = operands.at(kh_index).asScalar<uint32_t>();
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
- else // support explicit padding
- {
- // Each input should be interpreted as follows:
- //
- // 1 -> Padding_left index
- // 2 -> Padding_right index
- // 3 -> Padding_top index
- // 4 -> Padding_bottom index
- // 5 -> Horizontal (over width) Stride Index
- // 6 -> Vertial (over height) Stride Index
- // 7 -> Filter Width Index
- // 8 -> Filter Height Index
- // 9 -> FuseCode (activation) Index
-
- const auto padding_left_index = OperandIndex{init_param.inputs[1]};
- const auto padding_right_index = OperandIndex{init_param.inputs[2]};
- const auto padding_top_index = OperandIndex{init_param.inputs[3]};
- const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
- const auto hstride_index = OperandIndex{init_param.inputs[5]};
- const auto vstride_index = OperandIndex{init_param.inputs[6]};
- const auto kw_index = OperandIndex{init_param.inputs[7]};
- const auto kh_index = OperandIndex{init_param.inputs[8]};
- const auto activation_index = OperandIndex{init_param.inputs[9]};
-
- param.padding.type = PaddingType::EXPLICIT;
- param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
- padding_top_index, padding_bottom_index);
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.kw = getUint32Scalar(operands, kw_index);
- param.kh = getUint32Scalar(operands, kh_index);
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
- return new T{inputs, outputs, param};
+ operation::Comparison::Param param;
+ param.comparison_type = type;
+
+ return new operation::Comparison{inputs, outputs, param};
+ };
}
} // namespace
return new operation::DepthwiseConv2D{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_MAX_POOL_2D] = createPool2DOp<operation::MaxPool2D>;
+ _map[ANEURALNETWORKS_MAX_POOL_2D] = getPool2DGenerator(operation::Pool2D::PoolType::MAX);
- _map[ANEURALNETWORKS_AVERAGE_POOL_2D] = createPool2DOp<operation::AvgPool2D>;
+ _map[ANEURALNETWORKS_AVERAGE_POOL_2D] = getPool2DGenerator(operation::Pool2D::PoolType::AVG);
_map[ANEURALNETWORKS_CONCATENATION] = [](const OperationFactory::Param &init_param,
Operands &operands) {
return new operation::Softmax{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_CAST] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- // NNAPI uses QUANT_UINT8_ASYMM to represent UINT8 type for ANEURALNETWORKS_CAST's input/output
- if (operands.at(inputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM)
- {
- replaceDataType(operands, inputs.at(0), DataType::UINT8);
- }
- if (operands.at(outputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM)
- {
- replaceDataType(operands, outputs.at(0), DataType::UINT8);
- }
-
- return new operation::Cast{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_CAST] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::CAST);
// ANEURALNETWORKS_CAST_EX is deprecated
// TODO Remove ANEURALNETWORKS_CAST_EX
// inputCount is either 7 or 10 acccording to NN API specification.
// - Padding is implicit when inputCount is 7
// - Padding is explicit when inputCount is 10
- assert(init_param.input_count == 7 || init_param.input_count == 10);
+ assert(init_param.input_count == 7 || init_param.input_count == 10 ||
+ init_param.input_count == 13);
assert(init_param.output_count == 1);
// 0 -> IFM Tensor Index
OperandIndexSequence outputs{init_param.outputs[0]};
Conv2D::Param param;
-
if (init_param.input_count == 7) // support implicit padding
{
// Each input should be interpreted as follows:
param.padding.type =
NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
param.stride = makeStride(operands, hstride_index, vstride_index);
+
+ param.dilation.width_factor = 1;
+ param.dilation.height_factor = 1;
+
param.activation =
NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
}
param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
padding_top_index, padding_bottom_index);
param.stride = makeStride(operands, hstride_index, vstride_index);
+
+ param.dilation.width_factor = 1;
+ param.dilation.height_factor = 1;
+
param.activation =
NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
}
+ else if (init_param.input_count == 13) // support dilation
+ {
+ // Each input should be interpreted as follows:
+ //
+ // 3 -> Padding_left Index
+ // 4 -> Padding_right Index
+ // 5 -> Padding_top Index
+ // 6 -> Padding_bottom Index
+ // 7 -> Stride (width) Index
+ // 8 -> Stride (height) Index
+ // 9 -> Activation Index
+ // 11 -> Dilation (width_factor) Index
+ // 12 -> Dilation (height_factor) INdex
- return new Conv2D{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_ADD] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 3);
- assert(init_param.output_count == 1);
+ const auto padding_left_index = OperandIndex{init_param.inputs[3]};
+ const auto padding_right_index = OperandIndex{init_param.inputs[4]};
+ const auto padding_top_index = OperandIndex{init_param.inputs[5]};
+ const auto padding_bottom_index = OperandIndex{init_param.inputs[6]};
+ const auto hstride_index = OperandIndex{init_param.inputs[7]};
+ const auto vstride_index = OperandIndex{init_param.inputs[8]};
+ const auto activation_index = OperandIndex{init_param.inputs[9]};
+ const auto width_factor_index = OperandIndex{init_param.inputs[11]};
+ const auto height_factor_index = OperandIndex{init_param.inputs[12]};
- // Each input should be interpreted as follows:
- //
- // 0 -> Lefthand side operand
- // 1 -> Righthand side operand
+ param.padding.type = PaddingType::EXPLICIT;
+ param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
+ padding_top_index, padding_bottom_index);
+ param.stride = makeStride(operands, hstride_index, vstride_index);
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
- OperandIndexSequence outputs{init_param.outputs[0]};
+ auto width_factor = operands.at(width_factor_index).asScalar<int32_t>();
+ auto height_factor = operands.at(height_factor_index).asScalar<int32_t>();
- operation::Add::Param param;
+ param.dilation.width_factor = width_factor;
+ param.dilation.height_factor = height_factor;
- const auto activation_index = OperandIndex{init_param.inputs[2]};
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ }
+ else
+ {
+ throw std::runtime_error{"Conv2D: unsupported input operand count"};
+ }
- return new operation::Add{inputs, outputs, param};
+ return new Conv2D{inputs, outputs, param};
};
+ _map[ANEURALNETWORKS_ADD] =
+ getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::ADD);
+
_map[ANEURALNETWORKS_ADDV2_EX] = _map[ANEURALNETWORKS_ADD];
_map[ANEURALNETWORKS_REDUCE_SUM] =
// TODO Remove ANEURALNETWORKS_REDUCE_SUM_EX
_map[ANEURALNETWORKS_REDUCE_SUM_EX] = _map[ANEURALNETWORKS_REDUCE_SUM];
- _map[ANEURALNETWORKS_SUB] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 3);
- assert(init_param.output_count == 1);
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Lefthand side operand
- // 1 -> Righthand side operand
-
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- operation::Sub::Param param;
-
- const auto activation_index = OperandIndex{init_param.inputs[2]};
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-
- return new operation::Sub{inputs, outputs, param};
- };
+ _map[ANEURALNETWORKS_SUB] =
+ getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::SUB);
_map[ANEURALNETWORKS_SLICE] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
return new operation::Transpose{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_MUL] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 3 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> LHS Tensor Index
- // 1 -> RHS Tensor Index
- // 2 -> Activation Index
-
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Mul::Param param;
-
- const auto activation_index = OperandIndex{init_param.inputs[2]};
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-
- return new operation::Mul{inputs, outputs, param};
- };
+ _map[ANEURALNETWORKS_MUL] =
+ getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::MUL);
_map[ANEURALNETWORKS_SQUEEZE] = [](const OperationFactory::Param &init_param,
Operands &operands) {
return new operation::Squeeze{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_TANH] = CreateSimpleUnaryOp<operation::Tanh>;
+ _map[ANEURALNETWORKS_TANH] = getElementwiseActivationGenerator(
+ onert::ir::operation::ElementwiseActivation::Type::TANH, 1.f, 1.f);
- _map[ANEURALNETWORKS_LOG] = CreateSimpleUnaryOp<operation::Log>;
+ _map[ANEURALNETWORKS_LOG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOG);
- _map[ANEURALNETWORKS_LOGISTIC] = CreateSimpleUnaryOp<operation::Logistic>;
+ _map[ANEURALNETWORKS_LOGISTIC] = getElementwiseActivationGenerator(
+ onert::ir::operation::ElementwiseActivation::Type::LOGISTIC);
- _map[ANEURALNETWORKS_DIV] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 3 && init_param.output_count == 1);
+ _map[ANEURALNETWORKS_DIV] =
+ getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::DIV);
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> LHS Tensor Index
- // 1 -> RHS Tensor Index
- // 2 -> Activation Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Div::Param param;
-
- const auto activation_index = OperandIndex{init_param.inputs[2]};
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-
- return new operation::Div{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_EXP] = CreateSimpleUnaryOp<operation::Exp>;
+ _map[ANEURALNETWORKS_EXP] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::EXP);
// ANEURALNETWORKS_EXP_EX is deprecated
// TODO Remove ANEURALNETWORKS_EXP_EX
// 1 -> Axis Tensor Index
_map[ANEURALNETWORKS_EXPAND_DIMS] = createSimpleBinaryOp<operation::ExpandDims>;
- _map[ANEURALNETWORKS_GREATER] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::Greater;
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_GREATER_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::GreaterEqual;
-
- return new operation::Comparison{inputs, outputs, param};
- };
+ _map[ANEURALNETWORKS_GREATER] =
+ getComparisonGenerator(operation::Comparison::ComparisonType::Greater);
+ _map[ANEURALNETWORKS_GREATER_EQUAL] =
+ getComparisonGenerator(operation::Comparison::ComparisonType::GreaterEqual);
+ _map[ANEURALNETWORKS_LESS] = getComparisonGenerator(operation::Comparison::ComparisonType::Less);
+ _map[ANEURALNETWORKS_LESS_EQUAL] =
+ getComparisonGenerator(operation::Comparison::ComparisonType::LessEqual);
+ _map[ANEURALNETWORKS_NOT_EQUAL] =
+ getComparisonGenerator(operation::Comparison::ComparisonType::NotEqual);
+ _map[ANEURALNETWORKS_EQUAL] =
+ getComparisonGenerator(operation::Comparison::ComparisonType::Equal);
// ANEURALNETWORKS_GREATER_EQUAL_EX is deprecated
// TODO Remove ANEURALNETWORKS_GREATER_EQUAL_EX
return new operation::Comparison{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_LESS] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::Less;
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_LESS_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::LessEqual;
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
// ANEURALNETWORKS_LESS_EX is deprecated
// TODO Remove ANEURALNETWORKS_LESS_EX
_map[ANEURALNETWORKS_LESS_EX] = [](const OperationFactory::Param &init_param,
// TODO Remove ANEURALNETWORKS_REDUCE_MAX_EX
_map[ANEURALNETWORKS_REDUCE_MAX_EX] = _map[ANEURALNETWORKS_REDUCE_MAX];
- _map[ANEURALNETWORKS_NOT_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input1 Tensor Index
- // 1 -> input2 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::NotEqual;
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
// ANEURALNETWORKS_NOT_EQUAL_EX is deprecated
// TODO Remove ANEURALNETWORKS_NOT_EQUAL_EX
_map[ANEURALNETWORKS_NOT_EQUAL_EX] = [](const OperationFactory::Param &init_param,
return new operation::Comparison{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_LOGICAL_AND] = createSimpleBinaryOp<operation::LogicalAnd>;
+ _map[ANEURALNETWORKS_LOGICAL_AND] = getElementwiseBinaryGenerator(
+ operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND);
// ANEURALNETWORKS_LOGICAL_AND_EX is deprecated
// TODO Remove ANEURALNETWORKS_LOGICAL_AND_EX
replaceDataType(operands, inputs.at(1), DataType::BOOL8);
replaceDataType(operands, outputs.at(0), DataType::BOOL8);
- return new operation::LogicalAnd{inputs, outputs};
+ operation::ElementwiseBinary::Param param;
+ param.op_type = operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND;
+
+ return new operation::ElementwiseBinary{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_RSQRT] = CreateSimpleUnaryOp<operation::RSQRT>;
+ _map[ANEURALNETWORKS_RSQRT] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::RSQRT);
_map[ANEURALNETWORKS_SELECT] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
// TODO Remove ANEURALNETWORKS_RSQRT_EX
_map[ANEURALNETWORKS_RSQRT_EX] = _map[ANEURALNETWORKS_RSQRT];
- _map[ANEURALNETWORKS_RELU] = CreateSimpleUnaryOp<operation::ReLU>;
+ _map[ANEURALNETWORKS_RELU] =
+ getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::RELU,
+ onert::ir::operation::ElementwiseActivation::infinity, 0);
_map[ANEURALNETWORKS_RESIZE_BILINEAR] = [](const OperationFactory::Param &init_param,
Operands &operands) {
return new operation::ResizeBilinear{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_RELU1] = CreateSimpleUnaryOp<operation::ReLU1>;
+ _map[ANEURALNETWORKS_RELU1] = getElementwiseActivationGenerator(
+ onert::ir::operation::ElementwiseActivation::Type::RELU, 1.f, -1.f);
- _map[ANEURALNETWORKS_RELU6] = CreateSimpleUnaryOp<operation::ReLU6>;
+ _map[ANEURALNETWORKS_RELU6] = getElementwiseActivationGenerator(
+ onert::ir::operation::ElementwiseActivation::Type::RELU, 6.f, 0.f);
_map[ANEURALNETWORKS_REVERSE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 2 && init_param.output_count == 1);
return new operation::RNN{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_FLOOR] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::Floor{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_FLOOR] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::FLOOR);
_map[ANEURALNETWORKS_SPACE_TO_BATCH_ND] = [](const OperationFactory::Param &init_param,
Operands &) {
return new operation::SpaceToDepth{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_L2_POOL_2D] = createPool2DOp<operation::L2Pool2D>;
+ _map[ANEURALNETWORKS_L2_POOL_2D] = getPool2DGenerator(operation::Pool2D::PoolType::L2);
_map[ANEURALNETWORKS_EMBEDDING_LOOKUP] = [](const OperationFactory::Param &init_param,
Operands &) {
return new operation::TransposeConv{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_SQRT] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- // 0 -> input Tensor Index
-
- OperandIndexSequence inputs{init_param.inputs[0]};
- return new operation::SQRT{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_SQRT] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SQRT);
// ANEURALNETWORKS_SQRT_EX is deprecated
// TODO Remove ANEURALNETWORKS_SQRT_EX
_map[ANEURALNETWORKS_SQRT_EX] = _map[ANEURALNETWORKS_SQRT];
- _map[ANEURALNETWORKS_LOGICAL_OR] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- return new operation::LogicalOr{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_LOGICAL_OR] = getElementwiseBinaryGenerator(
+ operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR);
// ANEURALNETWORKS_LOGICAL_OR_EX is deprecated
// TODO Remove ANEURALNETWORKS_LOGICAL_OR_EX
replaceDataType(operands, inputs.at(1), DataType::BOOL8);
replaceDataType(operands, outputs.at(0), DataType::BOOL8);
- return new operation::LogicalOr{inputs, outputs};
+ operation::ElementwiseBinary::Param param;
+ param.op_type = operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR;
+
+ return new operation::ElementwiseBinary{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_LOGICAL_NOT] = CreateSimpleUnaryOp<operation::LogicalNot>;
+ _map[ANEURALNETWORKS_LOGICAL_NOT] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOGICAL_NOT);
// ANEURALNETWORKS_LOGICAL_NOT_EX is deprecated
// TODO Remove ANEURALNETWORKS_LOGICAL_NOT_EX
replaceDataType(operands, inputs.at(0), DataType::BOOL8);
replaceDataType(operands, outputs.at(0), DataType::BOOL8);
- return new operation::LogicalNot{inputs, outputs};
+ operation::ElementwiseUnary::Param param;
+ param.op_type = operation::ElementwiseUnary::Type::LOGICAL_NOT;
+
+ return new operation::ElementwiseUnary{inputs, outputs, param};
};
_map[ANEURALNETWORKS_LSTM] = [](const OperationFactory::Param &init_param, Operands &operands) {
return new operation::LSTM{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::Equal;
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
// ANEURALNETWORKS_EQUAL_EX is deprecated
// TODO Remove ANEURALNETWORKS_EQUAL_EX
_map[ANEURALNETWORKS_EQUAL_EX] = [](const OperationFactory::Param &init_param,
// TODO Remove ANEURALNETWORKS_GATHER_EX
_map[ANEURALNETWORKS_GATHER_EX] = _map[ANEURALNETWORKS_GATHER];
- _map[ANEURALNETWORKS_NEG] = CreateSimpleUnaryOp<operation::Neg>;
+ _map[ANEURALNETWORKS_NEG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::NEG);
// ANEURALNETWORKS_NEG_EX is deprecated
// TODO Remove ANEURALNETWORKS_NEG_EX
_map[ANEURALNETWORKS_NEG_EX] = _map[ANEURALNETWORKS_NEG];
- _map[ANEURALNETWORKS_ABS] = CreateSimpleUnaryOp<operation::Abs>;
+ _map[ANEURALNETWORKS_ABS] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ABS);
// ANEURALNETWORKS_ABS_EX is deprecated
// TODO Remove ANEURALNETWORKS_ABS_EX
operation::ArgMax::Param param;
param.axis = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
+ // NNAPI ARGMAX output type is always int32
+ param.output_type = DataType::INT32;
return new operation::ArgMax{inputs, outputs, param};
};
// TODO Remove ANEURALNETWORKS_ARGMAX_EX
_map[ANEURALNETWORKS_ARGMAX_EX] = _map[ANEURALNETWORKS_ARGMAX];
- _map[ANEURALNETWORKS_DEQUANTIZE] = CreateSimpleUnaryOp<operation::Dequantize>;
+ _map[ANEURALNETWORKS_DEQUANTIZE] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::DEQUANTIZE);
_map[ANEURALNETWORKS_MEAN] = [](const OperationFactory::Param &init_param, Operands &operands) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
_map[ANEURALNETWORKS_PAD_V2] = _map[ANEURALNETWORKS_PAD];
- _map[ANEURALNETWORKS_MINIMUM] = createSimpleBinaryOp<operation::Min>;
+ _map[ANEURALNETWORKS_MINIMUM] =
+ getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MIN);
- _map[ANEURALNETWORKS_MAXIMUM] = createSimpleBinaryOp<operation::Max>;
+ _map[ANEURALNETWORKS_MAXIMUM] =
+ getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MAX);
_map[ANEURALNETWORKS_ONE_HOT_EX] = [](const OperationFactory::Param &init_param,
Operands &operands) {
return new operation::OneHot{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_COS_EX] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- return new operation::Cos{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_COS_EX] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::COS);
- _map[ANEURALNETWORKS_SIN] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- return new operation::Sin{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_SIN] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SIN);
_map[ANEURALNETWORKS_SHAPE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 1 && init_param.output_count == 1);
_map[ANEURALNETWORKS_REDUCE_PROD] =
getReduceGenerator(onert::ir::operation::Reduce::ReduceType::PROD);
- _map[ANEURALNETWORKS_ROUND_EX] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::Round{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_ROUND_EX] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ROUND);
_map[ANEURALNETWORKS_RANGE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
// 1 -> A 1-D tensor, specifying the value
_map[ANEURALNETWORKS_FILL_EX] = createSimpleBinaryOp<operation::Fill>;
- _map[ANEURALNETWORKS_ZEROS_LIKE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::ZerosLike{inputs, outputs};
- };
-
+ _map[ANEURALNETWORKS_ZEROS_LIKE_EX] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ZEROS_LIKE);
// Each input should be interpreted as follows:
// 0 -> Input Tensor Index
// 1 -> Multiple Tensor Index
return new operation::LogSoftmax{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_QUANTIZE] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- return new operation::Quantize{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_QUANTIZE] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::QUANTIZE);
}
Operation *OperationFactory::create(ANeuralNetworksOperationType type,
// Set inputs
for (const std::int32_t input_ind : *tflite_subg->inputs())
{
- subg->addInput(tensorIdxToOperandIdx(input_ind));
+ subg->addInput(tensorIdxToOperandIdx(input_ind),
+ _tensor_names.at(_tensor_to_operand[input_ind]));
}
// Set outputs
for (const std::int32_t output_ind : *tflite_subg->outputs())
{
- subg->addOutput(tensorIdxToOperandIdx(output_ind));
+ subg->addOutput(tensorIdxToOperandIdx(output_ind),
+ _tensor_names.at(_tensor_to_operand[output_ind]));
}
// Create operations
for (const auto *op : *tflite_subg->operators())
#include <ir/TypeInfo.h>
#include <ir/DataType.h>
-#include <ir/operation/Add.h>
-#include <ir/operation/Sub.h>
-#include <ir/operation/Mul.h>
+#include <ir/operation/BinaryArithmetic.h>
#include <ir/operation/FullyConnected.h>
#include <gtest/gtest.h>
template <typename NodeT, typename... Types>
OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args)
{
- typename NodeT::Param op_params{Activation::NONE};
- auto op = std::make_unique<NodeT>(std::forward<Types>(args)..., op_params);
+ auto op = std::make_unique<NodeT>(std::forward<Types>(args)...);
auto op_idx = graph->addOperation(std::move(op));
// For now in scheduler test all operations in tested graphs has same size (for simplicity)
assert(calcOpSize(graph, op_idx) == OPERATION_SIZE);
auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<Add>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx});
+ BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
// Create sub node
auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<Sub>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx});
+ BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}, sub_op_params);
// Create mul node
auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<Mul>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx});
+ BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params);
graph->finishBuilding();
return graph;
auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<Add>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx});
+ BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
// Create mul1 node
auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<Mul>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx});
+ BinaryArithmetic::Param mul1_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx},
+ mul1_op_params);
// Create mul2 node
auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<Mul>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx});
+ BinaryArithmetic::Param mul2_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx},
+ mul2_op_params);
// Create fc1 node
auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx});
+ FullyConnected::Param fc1_op_params{Activation::NONE};
+ create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}, fc1_op_params);
// Create fc2 node
auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx});
+ FullyConnected::Param fc2_op_params{Activation::NONE};
+ create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}, fc2_op_params);
- // Create add2 node
+ // Create sub node
auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<Sub>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx});
+ BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params);
graph->finishBuilding();
return graph;
#include "ir/Graph.h"
#include "compiler/Compiler.h"
#include "exec/Execution.h"
-#include "ir/operation/Add.h"
+#include "ir/operation/BinaryArithmetic.h"
namespace
{
.at(operand_rhs2)
.data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
// 2nd add operations (result2 <= result1 + rhs2)
- operation::Add::Param param1;
+ operation::BinaryArithmetic::Param param1;
+ param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
param1.activation = Activation::NONE;
auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
auto output_set1 = OperandIndexSequence{operand_result1};
- graph->addOperation(std::make_unique<operation::Add>(input_set1, output_set1, param1));
- operation::Add::Param param2;
+ graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+ operation::BinaryArithmetic::Param param2;
+ param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
param2.activation = Activation::NONE;
auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
auto output_set2 = OperandIndexSequence{operand_result2};
- graph->addOperation(std::make_unique<operation::Add>(input_set2, output_set2, param2));
+ graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
// Identify model inputs and outputs
graph->addInput(operand_lhs);
graph->addInput(operand_rhs1);
#include "ir/Graph.h"
#include "interp/InterpExecutor.h"
#include "exec/Execution.h"
-#include "ir/operation/Add.h"
+#include "ir/operation/BinaryArithmetic.h"
namespace
{
// Add operations
- operation::Add::Param param;
+ operation::BinaryArithmetic::Param param;
+ param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
param.activation = Activation::NONE;
auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
auto output_set = OperandIndexSequence{operand_result};
- _graph->addOperation(std::make_unique<operation::Add>(input_set, output_set, param));
+ _graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
// Identify model inputs and outputs
// 2nd add operations (result2 <= result1 + rhs2)
- operation::Add::Param param1;
+ operation::BinaryArithmetic::Param param1;
+ param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
param1.activation = Activation::NONE;
auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
auto output_set1 = OperandIndexSequence{operand_result1};
- _graph->addOperation(std::make_unique<operation::Add>(input_set1, output_set1, param1));
+ _graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
- operation::Add::Param param2;
+ operation::BinaryArithmetic::Param param2;
+ param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
param2.activation = Activation::NONE;
auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
auto output_set2 = OperandIndexSequence{operand_result2};
- _graph->addOperation(std::make_unique<operation::Add>(input_set2, output_set2, param2));
+ _graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
// Identify model inputs and outputs
// Add operations
- operation::Add::Param param;
+ operation::BinaryArithmetic::Param param;
+ param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
param.activation = Activation::NONE;
auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
auto output_set = OperandIndexSequence{operand_result};
- _graph->addOperation(std::make_unique<operation::Add>(input_set, output_set, param));
+ _graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
// Identify model inputs and outputs
Stride stride{3, 7};
Padding padding{PaddingType::SAME};
- operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE};
- auto infered_out_shape = onert::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param);
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
- operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE};
- infered_out_shape = onert::shape_inference::inferMaxPoolShape(in_shape, max_pool_param);
+ operation::Pool2D::Param max_pool_param{
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
Stride stride{3, 7};
Padding padding{PaddingType::VALID};
- operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE};
- auto infered_out_shape = onert::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param);
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
- operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE};
- infered_out_shape = onert::shape_inference::inferMaxPoolShape(in_shape, max_pool_param);
+ operation::Pool2D::Param max_pool_param{
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
Stride stride{3, 7};
Padding padding{4, 3, 2, 1};
- operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE};
- auto infered_out_shape = onert::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param);
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
- operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE};
- infered_out_shape = onert::shape_inference::inferMaxPoolShape(in_shape, max_pool_param);
+ operation::Pool2D::Param max_pool_param{
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
Shape in_shape{10, 6, 12, 20};
Shape ker_shape{30, 3, 6, 20};
- operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE};
+ operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE,
+ Dilation{1, 1}};
auto infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
- param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE};
+ param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE,
+ Dilation{1, 1}};
infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
- param = operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE};
+ param =
+ operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
ASSERT_EQ(infered_out_shape.rank(), 4);
GeneratedTests.conv_quant8_large_weights_as_inputs
GeneratedTests.conv_quant8_overflow_weights_as_inputs
GeneratedTests.conv_quant8_weights_as_inputs
+GeneratedTests.conv2d_dilation_nnfw
+GeneratedTests.conv2d_dilation_nnfw_quant8
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
+GeneratedTests.conv2d_dilation_nnfw_2
+GeneratedTests.conv2d_dilation_nnfw_quant8_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
GeneratedTests.cos_ex_1D_float_nnfw
GeneratedTests.cos_ex_4D_float_nnfw
GeneratedTests.cos_ex_dynamic_nnfw
GeneratedTests.log_softmax_nnfw_2
GeneratedTests.log_softmax_nnfw_3
GeneratedTests.log_softmax_nnfw_4
+GeneratedTests.log_softmax_nnfw_5
+GeneratedTests.log_softmax_nnfw_quant8
GeneratedTests.logical_not
GeneratedTests.logical_not_1D_nnfw
GeneratedTests.logical_not_4D_nnfw
GeneratedTests.conv_quant8_large_weights_as_inputs
GeneratedTests.conv_quant8_overflow_weights_as_inputs
GeneratedTests.conv_quant8_weights_as_inputs
+GeneratedTests.conv2d_dilation_nnfw
+GeneratedTests.conv2d_dilation_nnfw_quant8
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
+GeneratedTests.conv2d_dilation_nnfw_2
+GeneratedTests.conv2d_dilation_nnfw_quant8_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
GeneratedTests.cos_ex_1D_float_nnfw
GeneratedTests.cos_ex_4D_float_nnfw
GeneratedTests.cos_ex_dynamic_nnfw
GeneratedTests.log_softmax_nnfw_2
GeneratedTests.log_softmax_nnfw_3
GeneratedTests.log_softmax_nnfw_4
+GeneratedTests.log_softmax_nnfw_5
+GeneratedTests.log_softmax_nnfw_quant8
GeneratedTests.logical_not
GeneratedTests.logical_not_1D_nnfw
GeneratedTests.logical_not_4D_nnfw
GeneratedTests.conv_quant8_large_weights_as_inputs
GeneratedTests.conv_quant8_overflow_weights_as_inputs
GeneratedTests.conv_quant8_weights_as_inputs
+GeneratedTests.conv2d_dilation_nnfw
+GeneratedTests.conv2d_dilation_nnfw_quant8
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
+GeneratedTests.conv2d_dilation_nnfw_2
+GeneratedTests.conv2d_dilation_nnfw_quant8_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
GeneratedTests.cos_ex_1D_float_nnfw
GeneratedTests.cos_ex_4D_float_nnfw
GeneratedTests.cos_ex_dynamic_nnfw
GeneratedTests.log_softmax_nnfw_2
GeneratedTests.log_softmax_nnfw_3
GeneratedTests.log_softmax_nnfw_4
+GeneratedTests.log_softmax_nnfw_5
+GeneratedTests.log_softmax_nnfw_quant8
GeneratedTests.logical_not
GeneratedTests.logical_not_1D_nnfw
GeneratedTests.logical_not_4D_nnfw
GeneratedTests.conv_quant8_large_weights_as_inputs
GeneratedTests.conv_quant8_overflow_weights_as_inputs
GeneratedTests.conv_quant8_weights_as_inputs
+GeneratedTests.conv2d_dilation_nnfw
+GeneratedTests.conv2d_dilation_nnfw_quant8
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
+GeneratedTests.conv2d_dilation_nnfw_2
+GeneratedTests.conv2d_dilation_nnfw_quant8_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
GeneratedTests.cos_ex_1D_float_nnfw
GeneratedTests.cos_ex_4D_float_nnfw
GeneratedTests.cos_ex_dynamic_nnfw
GeneratedTests.log_softmax_nnfw_2
GeneratedTests.log_softmax_nnfw_3
GeneratedTests.log_softmax_nnfw_4
+GeneratedTests.log_softmax_nnfw_5
+GeneratedTests.log_softmax_nnfw_quant8
GeneratedTests.logical_not
GeneratedTests.logical_not_1D_nnfw
GeneratedTests.logical_not_4D_nnfw
GeneratedTests.conv_quant8_overflow
GeneratedTests.conv_quant8_overflow_weights_as_inputs
GeneratedTests.conv_quant8_weights_as_inputs
+GeneratedTests.conv2d_dilation_nnfw
+GeneratedTests.conv2d_dilation_nnfw_quant8
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
+GeneratedTests.conv2d_dilation_nnfw_2
+GeneratedTests.conv2d_dilation_nnfw_quant8_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
GeneratedTests.cos_ex_1D_float_nnfw
GeneratedTests.cos_ex_4D_float_nnfw
GeneratedTests.cos_ex_dynamic_nnfw
GeneratedTests.log_softmax_nnfw_2
GeneratedTests.log_softmax_nnfw_3
GeneratedTests.log_softmax_nnfw_4
+GeneratedTests.log_softmax_nnfw_5
+GeneratedTests.log_softmax_nnfw_quant8
GeneratedTests.logical_and_1D_nnfw
GeneratedTests.logical_and_2D_nnfw
GeneratedTests.logical_and_3D_nnfw
--- /dev/null
+#
+# Copyright (C) 2018 The Android Open Source Project
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+layout = BoolScalar("layout", False) # NHWC
+
+# TEST 1: dilation set to 1 (default)
+i1 = Input("op1", "TENSOR_FLOAT32", "{1, 3, 3, 1}")
+f1 = Parameter("op2", "TENSOR_FLOAT32", "{1, 2, 2, 1}", [.25, .25, .25, .25])
+b1 = Parameter("op3", "TENSOR_FLOAT32", "{1}", [0])
+o1 = Output("op4", "TENSOR_FLOAT32", "{1, 2, 2, 1}")
+Model().Operation("CONV_2D", i1, f1, b1, 0, 0, 0, 0, 1, 1, 0, layout, 1, 1).To(o1)
+
+# Additional data type
+quant8 = DataTypeConverter().Identify({
+ i1: ("TENSOR_QUANT8_ASYMM", 0.5, 0),
+ f1: ("TENSOR_QUANT8_ASYMM", 0.125, 0),
+ b1: ("TENSOR_INT32", 0.0625, 0),
+ o1: ("TENSOR_QUANT8_ASYMM", 0.125, 0)
+})
+
+# Instantiate an example
+example = Example({
+ i1: [1.0, 1.0, 1.0, 1.0, 0.5, 1.0, 1.0, 1.0, 1.0],
+ o1: [.875, .875, .875, .875]
+}).AddInput(f1, b1).AddVariations("relaxed", quant8, "float16")
+
+
+# TEST 2: dilation set to 3
+i2 = Input("op1", "TENSOR_FLOAT32", "{1, 9, 9, 1}")
+f2 = Parameter("op2", "TENSOR_FLOAT32", "{1, 3, 3, 1}", [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
+b2 = Parameter("op3", "TENSOR_FLOAT32", "{1}", [0])
+o2 = Output("op4", "TENSOR_FLOAT32", "{1, 3, 3, 1}")
+Model().Operation("CONV_2D", i2, f2, b2, 0, 0, 0, 0, 1, 1, 0, layout, 3, 3).To(o2)
+
+# Additional data type
+quant8 = DataTypeConverter().Identify({
+ i2: ("TENSOR_QUANT8_ASYMM", 0.5, 0),
+ f2: ("TENSOR_QUANT8_ASYMM", 0.125, 0),
+ b2: ("TENSOR_INT32", 0.0625, 0),
+ o2: ("TENSOR_QUANT8_ASYMM", 0.125, 0)
+})
+
+# Instantiate an example
+example = Example({
+ i2: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+ o2: [5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0]
+}).AddInput(f2, b2).AddVariations("relaxed", quant8, "float16")
output_data=[-4.14297, -10.14297, -2.14297, -.142971,
-7.00104, -12.00104, -.00104087, -9.00104],
)
+
+def quant8_test(input0, output0, input_data, beta, axis, output_data):
+ model = Model().Operation("LOG_SOFTMAX", input0, beta, axis).To(output0)
+ quant8 = DataTypeConverter().Identify({
+ input0: ["TENSOR_QUANT8_ASYMM", 10 / 255.0],
+ output0: ["TENSOR_QUANT8_ASYMM", 16 / 256.0, 255],
+ })
+
+ Example({
+ input0: input_data,
+ output0: output_data,
+ }, model=model).AddVariations(quant8)
+
+quant8_test(
+ input0=Input("input0", "TENSOR_FLOAT32", "{1, 1, 2, 4}"),
+ output0=Output("output0", "TENSOR_FLOAT32", "{1, 1, 2, 4}"),
+ input_data=[0, 6, 2, 4,
+ 3, 2, 10, 1],
+ beta=1.0,
+ axis=3,
+ output_data=[-6.145078, -.145078, -4.145078, -2.145078,
+ -7.001370, -8.001370, -.001370, -9.001370],
+)
add_executable(${RUNTIME_NNFW_API_TEST} ${RUNTIME_NNFW_API_TEST_SRC})
+nnfw_find_package(ARMCompute QUIET)
+if(ARMCompute_FOUND)
+ target_compile_definitions(${RUNTIME_NNFW_API_TEST} PRIVATE TEST_ACL_BACKEND)
+endif(ARMCompute_FOUND)
+
set(RUNTIME_NNFW_API_TEST_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/include
${CMAKE_CURRENT_SOURCE_DIR}/src)
target_include_directories(${RUNTIME_NNFW_API_TEST} PRIVATE ${RUNTIME_NNFW_API_TEST_INCLUDE})
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleGen.h"
+
+CircleGen::CircleGen() : _subgraph_contexts(1) // Create primary subgraph
+{
+ // 0th buffer is always the empty buffer for non-const tensors
+ addBuffer(nullptr, 0);
+}
+
+template <typename T> uint32_t addBuffer(const std::vector<T> &buf_vec)
+{
+ auto buf = reinterpret_cast<const uint8_t *>(buf_vec.data());
+ auto size = buf_vec.size() * sizeof(T);
+ return addBuffer(buf, size);
+}
+
+uint32_t CircleGen::addBuffer(const uint8_t *buf, size_t size)
+{
+ uint32_t ind = _buffers.size();
+ _buffers.emplace_back(buildBuffer(buf, size));
+ return ind;
+}
+
+uint32_t CircleGen::addTensor(const TensorParams ¶ms)
+{
+ int ind = curSubgCtx().tensors.size();
+ curSubgCtx().tensors.emplace_back(buildTensor(params));
+ return ind;
+}
+
+void CircleGen::setInputsAndOutputs(const std::vector<int> &inputs, const std::vector<int> &outputs)
+{
+ curSubgCtx().inputs = inputs;
+ curSubgCtx().outputs = outputs;
+}
+
+uint32_t CircleGen::nextSubgraph()
+{
+ uint32_t ind = _subgraph_contexts.size();
+ _subgraph_contexts.push_back({});
+ return ind;
+}
+
+CircleBuffer CircleGen::finish()
+{
+ std::vector<flatbuffers::Offset<circle::SubGraph>> subgraphs;
+ for (auto &ctx : _subgraph_contexts)
+ subgraphs.push_back(buildSubGraph(ctx));
+ auto model =
+ circle::CreateModelDirect(_fbb, 3, &_opcodes, &subgraphs, "CircleGen generated", &_buffers);
+ _fbb.Finish(model);
+ return CircleBuffer{std::move(_fbb)};
+}
+
+// ===== Add Operator methods begin =====
+
+uint32_t CircleGen::addOperatorAdd(const OperatorParams ¶ms,
+ circle::ActivationFunctionType actfn)
+{
+ auto options = circle::CreateAddOptions(_fbb, actfn).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_ADD,
+ circle::BuiltinOptions_AddOptions, options);
+}
+
+uint32_t CircleGen::addOperatorAveragePool2D(const OperatorParams ¶ms, circle::Padding padding,
+ int stride_w, int stride_h, int filter_w, int filter_h,
+ circle::ActivationFunctionType actfn)
+{
+ auto options =
+ circle::CreatePool2DOptions(_fbb, padding, stride_w, stride_h, filter_w, filter_h, actfn)
+ .Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_AVERAGE_POOL_2D,
+ circle::BuiltinOptions_Pool2DOptions, options);
+}
+
+uint32_t CircleGen::addOperatorConcatenation(const OperatorParams ¶ms, int axis,
+ circle::ActivationFunctionType actfn)
+{
+ auto options = circle::CreateConcatenationOptions(_fbb, axis, actfn).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_CONCATENATION,
+ circle::BuiltinOptions_ConcatenationOptions, options);
+}
+
+uint32_t CircleGen::addOperatorCos(const OperatorParams ¶ms)
+{
+ auto options = circle::CreateCosOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_COS,
+ circle::BuiltinOptions_CosOptions, options);
+}
+
+uint32_t CircleGen::addOperatorL2Normalization(const OperatorParams ¶ms)
+{
+ auto options = circle::CreateL2NormOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_L2_NORMALIZATION,
+ circle::BuiltinOptions_L2NormOptions, options);
+}
+
+uint32_t CircleGen::addOperatorLess(const OperatorParams ¶ms)
+{
+ auto options = circle::CreateLessOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_LESS,
+ circle::BuiltinOptions_LessOptions, options);
+}
+
+uint32_t CircleGen::addOperatorLeakyRelu(const OperatorParams ¶ms, float alpha)
+{
+ auto options = circle::CreateLeakyReluOptions(_fbb, alpha).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_LEAKY_RELU,
+ circle::BuiltinOptions_LeakyReluOptions, options);
+}
+
+uint32_t CircleGen::addOperatorNeg(const OperatorParams ¶ms)
+{
+ auto options = circle::CreatePadOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_NEG,
+ circle::BuiltinOptions_NegOptions, options);
+}
+
+uint32_t CircleGen::addOperatorPad(const OperatorParams ¶ms)
+{
+ auto options = circle::CreatePadOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_PAD,
+ circle::BuiltinOptions_PadOptions, options);
+}
+
+uint32_t CircleGen::addOperatorPadV2(const OperatorParams ¶ms)
+{
+ auto options = circle::CreatePadOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_PADV2,
+ circle::BuiltinOptions_PadV2Options, options);
+}
+
+uint32_t CircleGen::addOperatorRank(const OperatorParams ¶ms)
+{
+ auto options = circle::CreateRankOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_RANK,
+ circle::BuiltinOptions_RankOptions, options);
+}
+
+uint32_t CircleGen::addOperatorResizeNearestNeighbor(const OperatorParams ¶ms)
+{
+ auto options = circle::CreateResizeNearestNeighborOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
+ circle::BuiltinOptions_ResizeNearestNeighborOptions, options);
+}
+
+uint32_t CircleGen::addOperatorWhile(const OperatorParams ¶ms, uint32_t cond_subg,
+ uint32_t body_subg)
+{
+ auto options = circle::CreateWhileOptions(_fbb, cond_subg, body_subg).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_WHILE,
+ circle::BuiltinOptions_WhileOptions, options);
+}
+
+// NOTE Please add addOperator functions ABOVE this lie
+//
+// % How to add a new addOperatorXXX fuction
+// 0. Copy code from one of the existing addOperatorXXX function
+// 1. Change the function signature (need BuiltinOperator params)
+// 2. Change enum BuiltinOperator
+// 3. Change enum BuiltinOptions
+// 4. Change CreateXXXOptions accordingly
+
+// ===== Add Operator methods end =====
+
+uint32_t CircleGen::addOperatorWithOptions(const OperatorParams ¶ms,
+ circle::BuiltinOperator opcode,
+ circle::BuiltinOptions options_type,
+ flatbuffers::Offset<void> options)
+{
+ uint32_t opcode_ind = addOperatorCode(opcode);
+ auto op = circle::CreateOperatorDirect(_fbb, opcode_ind, ¶ms.inputs, ¶ms.outputs,
+ options_type, options);
+
+ uint32_t ind = curSubgCtx().operators.size();
+ curSubgCtx().operators.emplace_back(op);
+ return ind;
+}
+
+uint32_t CircleGen::addOperatorCode(circle::BuiltinOperator opcode)
+{
+ // TODO If the same OperatorCode is registered already, just return it
+ uint32_t ind = _opcodes.size();
+ _opcodes.emplace_back(circle::CreateOperatorCode(_fbb, opcode));
+ return ind;
+}
+
+flatbuffers::Offset<circle::Buffer> CircleGen::buildBuffer(const uint8_t *buf, size_t size)
+{
+ if (buf == nullptr && size == 0)
+ return circle::CreateBuffer(_fbb);
+ auto buffer = _fbb.CreateVector(buf, size);
+ return circle::CreateBuffer(_fbb, buffer);
+}
+
+flatbuffers::Offset<circle::Tensor> CircleGen::buildTensor(const TensorParams ¶ms)
+{
+ auto shape = _fbb.CreateVector(params.shape);
+ auto name = _fbb.CreateString(params.name);
+ return circle::CreateTensor(_fbb, shape, params.tensor_type, params.buffer, name,
+ 0 /* QuantParam */, false /* is_variable */, 0 /* sparsity */,
+ 0 /* shape_signature */);
+}
+
+flatbuffers::Offset<circle::SubGraph> CircleGen::buildSubGraph(const SubgraphContext &ctx)
+{
+ return circle::CreateSubGraphDirect(_fbb, &ctx.tensors, &ctx.inputs, &ctx.outputs, &ctx.operators,
+ nullptr);
+}
_fbb.Finished(); // The build must have been finished, so check that here
}
- uint8_t *buffer() { return _fbb.GetBufferPointer(); }
- size_t size() { return _fbb.GetSize(); }
+ uint8_t *buffer() const { return _fbb.GetBufferPointer(); }
+ size_t size() const { return _fbb.GetSize(); }
private:
flatbuffers::FlatBufferBuilder _fbb;
int version = 1;
};
-public:
- CircleGen()
+ struct SubgraphContext
{
- // 0th buffer is always the empty buffer for non-const tensors
- addBuffer(nullptr, 0);
- }
+ std::vector<int> inputs;
+ std::vector<int> outputs;
+ std::vector<flatbuffers::Offset<circle::Tensor>> tensors;
+ std::vector<flatbuffers::Offset<circle::Operator>> operators;
+ };
+
+public:
+ CircleGen();
template <typename T> uint32_t addBuffer(const std::vector<T> &buf_vec)
{
auto size = buf_vec.size() * sizeof(T);
return addBuffer(buf, size);
}
-
- uint32_t addBuffer(const uint8_t *buf, size_t size)
- {
- uint32_t ind = _buffers.size();
- _buffers.emplace_back(buildBuffer(buf, size));
- return ind;
- }
-
- uint32_t addTensor(const TensorParams ¶ms)
- {
- int ind = _tensors.size();
- _tensors.emplace_back(buildTensor(params));
- return ind;
- }
-
- uint32_t setInputsAndOutputs(const std::vector<int> &inputs, const std::vector<int> &outputs)
- {
- _inputs = inputs;
- _outputs = outputs;
- }
-
- CircleBuffer finish()
- {
- // TODO Support multiple subgraphs, for now only single subgraph model is supported.
- std::vector<flatbuffers::Offset<circle::SubGraph>> subgraphs{buildSubGraph()};
- auto model =
- circle::CreateModelDirect(_fbb, 3, &_opcodes, &subgraphs, "CircleGen generated", &_buffers);
- _fbb.Finish(model);
- return CircleBuffer{std::move(_fbb)};
- }
+ uint32_t addBuffer(const uint8_t *buf, size_t size);
+ uint32_t addTensor(const TensorParams ¶ms);
+ void setInputsAndOutputs(const std::vector<int> &inputs, const std::vector<int> &outputs);
+ uint32_t nextSubgraph();
+ CircleBuffer finish();
// ===== Add Operator methods begin =====
- uint32_t addOperatorAdd(const OperatorParams ¶ms, circle::ActivationFunctionType actfn)
- {
- auto options = circle::CreateAddOptions(_fbb, actfn).Union();
- return addOperatorWithOptions(params, circle::BuiltinOperator_ADD,
- circle::BuiltinOptions_AddOptions, options);
- }
-
+ uint32_t addOperatorAdd(const OperatorParams ¶ms, circle::ActivationFunctionType actfn);
uint32_t addOperatorAveragePool2D(const OperatorParams ¶ms, circle::Padding padding,
int stride_w, int stride_h, int filter_w, int filter_h,
- circle::ActivationFunctionType actfn)
- {
- auto options =
- circle::CreatePool2DOptions(_fbb, padding, stride_w, stride_h, filter_w, filter_h, actfn)
- .Union();
- return addOperatorWithOptions(params, circle::BuiltinOperator_AVERAGE_POOL_2D,
- circle::BuiltinOptions_Pool2DOptions, options);
- }
+ circle::ActivationFunctionType actfn);
+ uint32_t addOperatorConcatenation(const OperatorParams ¶ms, int axis,
+ circle::ActivationFunctionType actfn);
+ uint32_t addOperatorCos(const OperatorParams ¶ms);
+ uint32_t addOperatorL2Normalization(const OperatorParams ¶ms);
+ uint32_t addOperatorLeakyRelu(const OperatorParams ¶ms, float alpha);
+ uint32_t addOperatorLess(const OperatorParams ¶ms);
+ uint32_t addOperatorNeg(const OperatorParams ¶ms);
+ uint32_t addOperatorPad(const OperatorParams ¶ms);
+ uint32_t addOperatorPadV2(const OperatorParams ¶ms);
+ uint32_t addOperatorRank(const OperatorParams ¶ms);
+ uint32_t addOperatorResizeNearestNeighbor(const OperatorParams ¶ms);
+ uint32_t addOperatorWhile(const OperatorParams ¶ms, uint32_t cond_subg, uint32_t body_subg);
// NOTE Please add addOperator functions ABOVE this lie
- //
- // % How to add a new addOperatorXXX fuction
- // 0. Copy code from one of the existing addOperatorXXX function
- // 1. Change the function signature (need BuiltinOperator params)
- // 2. Change enum BuiltinOperator
- // 3. Change enum BuiltinOptions
- // 4. Change CreateXXXOptions accordingly
-
// ===== Add Operator methods end =====
private:
uint32_t addOperatorWithOptions(const OperatorParams ¶ms, circle::BuiltinOperator opcode,
circle::BuiltinOptions options_type,
- flatbuffers::Offset<void> options)
- {
- uint32_t opcode_ind = addOperatorCode(opcode);
- auto op = circle::CreateOperatorDirect(_fbb, opcode_ind, ¶ms.inputs, ¶ms.outputs,
- options_type, options);
+ flatbuffers::Offset<void> options);
+ uint32_t addOperatorCode(circle::BuiltinOperator opcode);
+ flatbuffers::Offset<circle::Buffer> buildBuffer(const uint8_t *buf, size_t size);
+ flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams ¶ms);
+ flatbuffers::Offset<circle::SubGraph> buildSubGraph(const SubgraphContext &ctx);
- uint32_t ind = _operators.size();
- _operators.emplace_back(op);
- return ind;
- }
-
- uint32_t addOperatorCode(circle::BuiltinOperator opcode)
- {
- // TODO If the same OperatorCode is registered already, just return it
- uint32_t ind = _opcodes.size();
- _opcodes.emplace_back(circle::CreateOperatorCode(_fbb, opcode));
- return ind;
- }
-
- flatbuffers::Offset<circle::Buffer> buildBuffer(const uint8_t *buf, size_t size)
- {
- if (buf == nullptr && size == 0)
- return circle::CreateBuffer(_fbb);
- auto buffer = _fbb.CreateVector(buf, size);
- return circle::CreateBuffer(_fbb, buffer);
- }
-
- flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams ¶ms)
- {
- auto shape = _fbb.CreateVector(params.shape);
- auto name = _fbb.CreateString(params.name);
- return circle::CreateTensor(_fbb, shape, params.tensor_type, params.buffer, name,
- 0 /* QuantParam */, false /* is_variable */, 0 /* sparsity */,
- 0 /* shape_signature */);
- }
-
- flatbuffers::Offset<circle::SubGraph> buildSubGraph()
- {
- return circle::CreateSubGraphDirect(_fbb, &_tensors, &_inputs, &_outputs, &_operators, nullptr);
- }
+ SubgraphContext &curSubgCtx() { return _subgraph_contexts.back(); }
private:
flatbuffers::FlatBufferBuilder _fbb{1024};
std::vector<flatbuffers::Offset<circle::Buffer>> _buffers;
std::vector<flatbuffers::Offset<circle::OperatorCode>> _opcodes;
-
- // per-subgraph
- std::vector<int> _inputs;
- std::vector<int> _outputs;
- std::vector<flatbuffers::Offset<circle::Tensor>> _tensors;
- std::vector<flatbuffers::Offset<circle::Operator>> _operators;
+ std::vector<SubgraphContext> _subgraph_contexts;
};
#endif // __NNFW_API_TEST_CIRCLE_GEN_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <nnfw_internal.h>
+
+#include <fstream>
+#include <string>
+
+#include "CircleGen.h"
+#include "fixtures.h"
+
+struct TestCaseData
+{
+ /**
+ * @brief A vector of input buffers
+ *
+ * @todo support other types as well as float
+ */
+ std::vector<std::vector<float>> inputs;
+ /**
+ * @brief A vector of output buffers
+ *
+ * @todo support other types as well as float
+ */
+ std::vector<std::vector<float>> outputs;
+};
+
+class GenModelTestContext
+{
+public:
+ GenModelTestContext(CircleBuffer &&cbuf) : _cbuf{std::move(cbuf)}, _backends{"cpu"} {}
+
+ /**
+ * @brief Return circle buffer
+ *
+ * @return CircleBuffer& the circle buffer
+ */
+ const CircleBuffer &cbuf() const { return _cbuf; }
+
+ /**
+ * @brief Return test cases
+ *
+ * @return std::vector<TestCaseData>& the test cases
+ */
+ const std::vector<TestCaseData> &test_cases() const { return _test_cases; }
+
+ /**
+ * @brief Return backends
+ *
+ * @return const std::vector<std::string>& the backends to be tested
+ */
+ const std::vector<std::string> &backends() const { return _backends; }
+
+ /**
+ * @brief Return test is defined to fail on compile
+ *
+ * @return bool test is defined to fail on compile
+ */
+ const bool fail_compile() const { return _fail_compile; }
+
+ /**
+ * @brief Add a test case
+ *
+ * @param tc the test case to be added
+ */
+ void addTestCase(const TestCaseData &tc) { _test_cases.emplace_back(tc); }
+
+ /**
+ * @brief Add a test case
+ *
+ * @param tc the test case to be added
+ */
+ void setBackends(const std::vector<std::string> &backends)
+ {
+ _backends.clear();
+
+ for (auto backend : backends)
+ {
+#ifdef TEST_ACL_BACKEND
+ if (backend == "acl_cl" || backend == "acl_neon")
+ {
+ _backends.push_back(backend);
+ }
+#endif
+ if (backend == "cpu")
+ {
+ _backends.push_back(backend);
+ }
+ }
+ }
+
+ /**
+ * @brief Set the Test Fail
+ */
+ void setCompileFail() { _fail_compile = true; }
+
+private:
+ CircleBuffer _cbuf;
+ std::vector<TestCaseData> _test_cases;
+ std::vector<std::string> _backends;
+ bool _fail_compile{false};
+};
+
+/**
+ * @brief Generated Model test fixture for a one time inference
+ *
+ * This fixture is for one-time inference test with variety of generated models.
+ * It is the test maker's responsiblity to create @c _context which contains
+ * test body, which are generated circle buffer, model input data and output data and
+ * backend list to be tested.
+ * The rest(calling API functions for execution) is done by @c Setup and @c TearDown .
+ *
+ */
+class GenModelTest : public ::testing::Test
+{
+protected:
+ void SetUp() override
+ { // DO NOTHING
+ }
+
+ void TearDown() override
+ {
+ for (std::string backend : _context->backends())
+ {
+ // NOTE If we can prepare many times for one model loading on same session,
+ // we can move nnfw_create_session to SetUp and
+ // nnfw_load_circle_from_buffer to outside forloop
+ NNFW_ENSURE_SUCCESS(nnfw_create_session(&_so.session));
+ auto &cbuf = _context->cbuf();
+ NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(_so.session, cbuf.buffer(), cbuf.size()));
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_so.session, backend.data()));
+
+ if (_context->fail_compile())
+ {
+ ASSERT_EQ(nnfw_prepare(_so.session), NNFW_STATUS_ERROR);
+
+ NNFW_ENSURE_SUCCESS(nnfw_close_session(_so.session));
+ continue;
+ }
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(_so.session));
+
+ // In/Out buffer settings
+ uint32_t num_inputs;
+ NNFW_ENSURE_SUCCESS(nnfw_input_size(_so.session, &num_inputs));
+ _so.inputs.resize(num_inputs);
+ for (uint32_t ind = 0; ind < _so.inputs.size(); ind++)
+ {
+ nnfw_tensorinfo ti;
+ NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_so.session, ind, &ti));
+ uint64_t input_elements = num_elems(&ti);
+ _so.inputs[ind].resize(input_elements);
+
+ ASSERT_EQ(nnfw_set_input(_so.session, ind, ti.dtype, _so.inputs[ind].data(),
+ sizeof(float) * input_elements),
+ NNFW_STATUS_NO_ERROR);
+ }
+
+ uint32_t num_outputs;
+ NNFW_ENSURE_SUCCESS(nnfw_output_size(_so.session, &num_outputs));
+ _so.outputs.resize(num_outputs);
+ for (uint32_t ind = 0; ind < _so.outputs.size(); ind++)
+ {
+ nnfw_tensorinfo ti;
+ NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_so.session, ind, &ti));
+ uint64_t output_elements = num_elems(&ti);
+ _so.outputs[ind].resize(output_elements);
+ ASSERT_EQ(nnfw_set_output(_so.session, ind, ti.dtype, _so.outputs[ind].data(),
+ sizeof(float) * output_elements),
+ NNFW_STATUS_NO_ERROR);
+ }
+
+ // Set input values, run, and check output values
+ for (auto &test_case : _context->test_cases())
+ {
+ auto &ref_inputs = test_case.inputs;
+ auto &ref_outputs = test_case.outputs;
+ ASSERT_EQ(_so.inputs.size(), ref_inputs.size());
+ for (uint32_t i = 0; i < _so.inputs.size(); i++)
+ {
+ // Fill the values
+ ASSERT_EQ(_so.inputs[i].size(), ref_inputs[i].size());
+ memcpy(_so.inputs[i].data(), ref_inputs[i].data(), _so.inputs[i].size() * sizeof(float));
+ }
+
+ NNFW_ENSURE_SUCCESS(nnfw_run(_so.session));
+
+ ASSERT_EQ(_so.outputs.size(), ref_outputs.size());
+ for (uint32_t i = 0; i < _so.outputs.size(); i++)
+ {
+ // Check output tensor values
+ auto &ref_output = ref_outputs[i];
+ auto &output = _so.outputs[i];
+ ASSERT_EQ(output.size(), ref_output.size());
+ for (uint32_t e = 0; e < ref_output.size(); e++)
+ EXPECT_NEAR(ref_output[e], output[e], 0.001); // TODO better way for handling FP error?
+ }
+ }
+
+ NNFW_ENSURE_SUCCESS(nnfw_close_session(_so.session));
+ }
+ }
+
+protected:
+ SessionObject _so;
+ std::unique_ptr<GenModelTestContext> _context;
+};
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-#include <nnfw_internal.h>
-
-#include <fstream>
-
-#include "CircleGen.h"
-#include "fixtures.h"
-
-/**
- * @brief Generated Model test fixture for a one time inference
- *
- * This fixture is for one-time inference test with variety of generated models.
- * It is the user's responsiblity to create @c _cbuf , @c _ref_inputs and @c _ref_outputs in the
- * test body, which are generated circle buffer, model input data and output data respectively.
- * The rest(calling API functions for execution) is done by @c Setup and @c TearDown .
- *
- */
-class GenModelTest : public ::testing::Test
-{
-protected:
- void SetUp() override { NNFW_ENSURE_SUCCESS(nnfw_create_session(&_so.session)); }
-
- void TearDown() override
- {
- NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(_so.session, _cbuf.buffer(), _cbuf.size()));
- NNFW_ENSURE_SUCCESS(nnfw_prepare(_so.session));
-
- // In/Out buffer settings
- {
- uint32_t num_inputs;
- NNFW_ENSURE_SUCCESS(nnfw_input_size(_so.session, &num_inputs));
- _so.inputs.resize(num_inputs);
- for (uint32_t ind = 0; ind < _so.inputs.size(); ind++)
- {
- nnfw_tensorinfo ti;
- NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_so.session, ind, &ti));
- uint64_t input_elements = num_elems(&ti);
- _so.inputs[ind].resize(input_elements);
-
- ASSERT_EQ(nnfw_set_input(_so.session, ind, ti.dtype, _so.inputs[ind].data(),
- sizeof(float) * input_elements),
- NNFW_STATUS_NO_ERROR);
- }
-
- uint32_t num_outputs;
- NNFW_ENSURE_SUCCESS(nnfw_output_size(_so.session, &num_outputs));
- _so.outputs.resize(num_outputs);
- for (uint32_t ind = 0; ind < _so.outputs.size(); ind++)
- {
- nnfw_tensorinfo ti;
- NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_so.session, ind, &ti));
- uint64_t output_elements = num_elems(&ti);
- _so.outputs[ind].resize(output_elements);
- ASSERT_EQ(nnfw_set_output(_so.session, ind, ti.dtype, _so.outputs[ind].data(),
- sizeof(float) * output_elements),
- NNFW_STATUS_NO_ERROR);
- }
- }
-
- // Set input values, run, and check output values
- {
- ASSERT_EQ(_so.inputs.size(), _ref_inputs.size());
- for (uint32_t i = 0; i < _so.inputs.size(); i++)
- {
- // Fill the values
- ASSERT_EQ(_so.inputs[i].size(), _ref_inputs[i].size());
- memcpy(_so.inputs[i].data(), _ref_inputs[i].data(), _so.inputs[i].size() * sizeof(float));
- }
-
- NNFW_ENSURE_SUCCESS(nnfw_run(_so.session));
-
- ASSERT_EQ(_so.outputs.size(), _ref_outputs.size());
- for (uint32_t i = 0; i < _so.outputs.size(); i++)
- {
- // Check output tensor values
- auto &ref_output = _ref_outputs[i];
- auto &output = _so.outputs[i];
- ASSERT_EQ(output.size(), ref_output.size());
- for (uint32_t e = 0; e < ref_output.size(); e++)
- ASSERT_FLOAT_EQ(ref_output[e], output[e]);
- }
- }
-
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_so.session));
- }
-
-protected:
- SessionObject _so;
- CircleBuffer _cbuf;
- std::vector<std::vector<float>> _ref_inputs;
- std::vector<std::vector<float>> _ref_outputs;
-};
-
-TEST_F(GenModelTest, OneOp_Add_VarToConst)
-{
- CircleGen cgen;
- std::vector<float> rhs_data{5, 4, 7, 4};
- uint32_t rhs_buf = cgen.addBuffer(rhs_data);
- int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
- int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({lhs}, {out});
- _cbuf = cgen.finish();
-
- _ref_inputs = {{1, 3, 2, 4}};
- _ref_outputs = {{6, 7, 9, 8}};
-}
-
-TEST_F(GenModelTest, OneOp_Add_VarToVar)
-{
- CircleGen cgen;
- int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({lhs, rhs}, {out});
- _cbuf = cgen.finish();
-
- _ref_inputs = {{1, 3, 2, 4}, {5, 4, 7, 4}};
- _ref_outputs = {{6, 7, 9, 8}};
-}
-
-TEST_F(GenModelTest, OneOp_AvgPool2D)
-{
- CircleGen cgen;
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
- _cbuf = cgen.finish();
-
- _ref_inputs = {{1, 3, 2, 4}};
- _ref_outputs = {{2.5}};
-}
#include "common.h"
#include "fixtures.h"
#include "NNPackages.h"
+#include "CircleGen.h"
void set_input_output(nnfw_session *session, const std::vector<float> &input,
std::vector<float> &actual_output)
*
* @note Run this test with "cpu" backend
*/
+// TODO Rewrite this with CircleGen
class TestDynamicTensorReshapeModelLoaded
: public ValidationTestModelLoaded<NNPackages::DYNAMIC_TENSOR_RESHAPE>
{
// Trying to set unknown dim to other value before calling nnfw_prepare()
//
-class TestInputUnknownDimInputConcatModelLoaded
- : public ValidationTestModelLoaded<NNPackages::UNKNOWN_DIM_INPUT_CONCAT>
-{
-protected:
- void prepare_apply_set_input_output(const std::vector<float> &input0,
- const std::vector<float> &input1,
- std::vector<float> *actual_output, nnfw_tensorinfo input0_ti)
- {
- NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
- NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &input0_ti));
-
- ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input0.data(),
- sizeof(float) * input0.size()),
- NNFW_STATUS_NO_ERROR);
- ASSERT_EQ(nnfw_set_input(_session, 1, NNFW_TYPE_TENSOR_FLOAT32, input1.data(),
- sizeof(float) * input1.size()),
- NNFW_STATUS_NO_ERROR);
-
- ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, actual_output->data(),
- sizeof(float) * actual_output->size()),
- NNFW_STATUS_NO_ERROR);
- }
-};
-
/**
* @brief Testing the following model:
*
*
* @note Run this test with "cpu" backend
*/
-TEST_F(TestInputUnknownDimInputConcatModelLoaded, concat_input0_to_2x3)
+auto build_model_buf_Concatenation_unknwon_dims()
{
- NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+ // Model is not important
+ CircleGen cgen;
+ auto f32 = circle::TensorType::TensorType_FLOAT32;
+ int in1 = cgen.addTensor({{1, 1}, f32}); // consider this [None, None]
+ int in2 = cgen.addTensor({{2, 3}, f32});
+ int out = cgen.addTensor({{}, f32}); // scalar, meaning output shape is unspecified
+ cgen.addOperatorConcatenation({{in1, in2}, {out}}, 0, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in1, in2}, {out});
+ auto cbuf = cgen.finish();
+ return cbuf;
+}
+
+TEST(TestDynamicTensor, concat_unknown_dim_input0_to_2x3)
+{
+ nnfw_session *session = nullptr;
+ NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+ const auto model_buf = build_model_buf_Concatenation_unknwon_dims();
+ NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, model_buf.buffer(), model_buf.size()));
+
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
const std::vector<float> input0 = {1, 2, 3}; // of shape [1, 3]
const std::vector<float> input1 = {4, 5, 6, 7, 8, 9}; // of shape [2, 3]
// input reshaping to [1, 3]
nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 3}};
- NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &ti));
- NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
+ NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &ti));
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
- set_input_output(_session, input0, input1, actual_output);
+ set_input_output(session, input0, input1, actual_output);
// Do inference
- NNFW_STATUS res = nnfw_run(_session);
+ NNFW_STATUS res = nnfw_run(session);
NNFW_ENSURE_SUCCESS(res);
// output value check
*
* @note Run this test with "cpu" backend and "linear" executor
*/
-TEST_F(TestInputUnknownDimInputConcatModelLoaded, neg_concat_input0_to_wrong_shape)
+TEST(TestDynamicTensor, neg_concat_input0_to_wrong_shape)
{
- NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+ nnfw_session *session = nullptr;
+ NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+ const auto model_buf = build_model_buf_Concatenation_unknwon_dims();
+ NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, model_buf.buffer(), model_buf.size()));
+
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
const std::vector<float> input0 = {1, 2, 3}; // of shape [3, 1], wrong shape
const std::vector<float> input1 = {4, 5, 6, 7, 8, 9}; // of shape [2, 3]
// input reshaping to [3, 1]
nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 2, {3, 1}};
- NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &ti));
+ NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &ti));
- ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_ERROR);
+ ASSERT_EQ(nnfw_prepare(session), NNFW_STATUS_ERROR);
}
//
*
* @note Run this test with "cpu" backend
*/
-using TestDynamicTensorApplyTensorInfoBinaryOp =
- ValidationTestModelLoaded<NNPackages::ADD_UNSPECIFIED_RANK_INPUTS>;
+auto build_model_buf_Add_unspecified_rank()
+{
+ // Model is not important
+ CircleGen cgen;
+ auto f32 = circle::TensorType::TensorType_FLOAT32;
+ int in1 = cgen.addTensor({{}, f32}); // scalar, meaning shape is unspecified
+ int in2 = cgen.addTensor({{1, 2, 3}, f32});
+ int op_out = cgen.addTensor({{}, f32}); // unspecified
+ int out = cgen.addTensor({{}, f32}); // unspecified
+ cgen.addOperatorAdd({{in1, in2}, {op_out}}, circle::ActivationFunctionType_NONE);
+ cgen.addOperatorAdd({{op_out, op_out}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in1, in2}, {out});
+ auto cbuf = cgen.finish();
+ return cbuf;
+}
-TEST_F(TestDynamicTensorApplyTensorInfoBinaryOp, set_input_tensorinfo_after_compilation_add)
+TEST(TestDynamicTensor, set_input_tensorinfo_after_compilation_add)
{
- NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+ nnfw_session *session = nullptr;
+ NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+ const auto model_buf = build_model_buf_Add_unspecified_rank();
+ NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, model_buf.buffer(), model_buf.size()));
+
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
// input reshaping to [2, 2, 3]
nnfw_tensorinfo input0_ti = {NNFW_TYPE_TENSOR_FLOAT32, 3, {2, 2, 3}};
std::vector<float> expected_output = {1.1 * 2, 2.1 * 2, 3.1 * 2, 4.1 * 2, 5.1 * 2, 6.1 * 2,
7.1 * 2, 8.1 * 2, 9.1 * 2, 10.1 * 2, 11.1 * 2, 12.1 * 2};
- NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
- NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &input0_ti));
+ NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &input0_ti));
- set_input_output(_session, input0, input1, actual_output);
+ set_input_output(session, input0, input1, actual_output);
// Do inference
- NNFW_STATUS res = nnfw_run(_session);
+ NNFW_STATUS res = nnfw_run(session);
NNFW_ENSURE_SUCCESS(res);
// output value check
*
* @note Run this test with "cpu" backend
*/
-using TestDynamicTensorApplyTensorInfoUnaryOp = ValidationTestModelLoaded<NNPackages::NEG>;
-TEST_F(TestDynamicTensorApplyTensorInfoUnaryOp, set_input_tensorinfo_after_compilation_neg)
+auto build_model_buf_NEG()
{
- NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+ // Model is not important
+ CircleGen cgen;
+ int in = cgen.addTensor({{4, 4}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{4, 4}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorNeg({{in}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+ auto cbuf = cgen.finish();
+ return cbuf;
+}
+
+TEST(TestDynamicTensor, set_input_tensorinfo_after_compilation_neg)
+{
+ nnfw_session *session = nullptr;
+ NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+ const auto model_buf = build_model_buf_NEG();
+ nnfw_load_circle_from_buffer(session, model_buf.buffer(), model_buf.size());
+
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
nnfw_tensorinfo input0_ti_original = {NNFW_TYPE_TENSOR_FLOAT32, 2, {4, 4}};
expected_output[i] = -1 * input0[i];
}
- NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
// input shape check
{
nnfw_tensorinfo ti = {};
- NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_session, 0, &ti));
+ NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(session, 0, &ti));
ASSERT_TRUE(tensorInfoEqual(input0_ti_original, ti));
}
- NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &input0_ti));
+ NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &input0_ti));
// input shape check
{
nnfw_tensorinfo ti = {};
- NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_session, 0, &ti));
+ NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(session, 0, &ti));
ASSERT_TRUE(tensorInfoEqual(input0_ti, ti));
}
- set_input_output(_session, input0, actual_output);
+ set_input_output(session, input0, actual_output);
// Do inference
- NNFW_STATUS res = nnfw_run(_session);
+ NNFW_STATUS res = nnfw_run(session);
NNFW_ENSURE_SUCCESS(res);
// output value check
set_input_output(_session, while_dynamic_input0, actual_output0);
- // TODO Change error code NNFW_STATUS_ERROR -> NNFW_INSUFFICIENT_OUTPUT_SIZE
- ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_ERROR);
+ ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE);
}
using TestIfDynamicModelLoaded = ValidationTestModelLoaded<NNPackages::IF_DYNAMIC>;
"add", "add_no_manifest", "add_invalid_manifest",
// for dynamic tensor test
- "input_reshaping_add", "dynamic_tensor_reshape", "unknown_dim_input_concat",
- "add_unspecified_rank_inputs", "neg", "while_dynamic", "if_dynamic",
+ "input_reshaping_add", "dynamic_tensor_reshape", "while_dynamic", "if_dynamic",
};
NNPackages &NNPackages::get()
* @brief A helper class to find NN Packages for testing
* To add a nnpackage for your test, please do the followings:
* 0. Prerequisite: the actual file must be uploaded on the server
- * Add `config.sh` file to `tests/scripts/nnfw_api_gtest_models`
+ * Add `config.sh` file to `tests/scripts/models/nnfw_api_gtest`
* 1. Append an enum value to @c NNPackages::TestPackages
* 2. Append a string literal to @c TEST_PACKAGE_NAMES in the source file
*/
// for dynamic tensor test
INPUT_RESHAPING_ADD,
DYNAMIC_TENSOR_RESHAPE,
- UNKNOWN_DIM_INPUT_CONCAT,
- ADD_UNSPECIFIED_RANK_INPUTS,
- NEG,
WHILE_DYNAMIC,
IF_DYNAMIC,
#include "fixtures.h"
#include "NNPackages.h"
+#include <nnfw_internal.h>
+
+#include "CircleGen.h"
+
TEST_F(RegressionTest, github_1535)
{
auto package_path = NNPackages::get().getModelAbsolutePath(NNPackages::ADD);
NNFW_ENSURE_SUCCESS(nnfw_close_session(session1));
NNFW_ENSURE_SUCCESS(nnfw_close_session(session2));
+
+ SUCCEED();
+}
+
+TEST_F(RegressionTest, neg_github_3826)
+{
+ // Model is not important
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+ auto cbuf = cgen.finish();
+
+ nnfw_session *session = nullptr;
+ NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+ NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, cbuf.buffer(), cbuf.size()));
+ // To test when there is no backends loaded for the session
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "unavailable_backend"));
+ ASSERT_EQ(nnfw_prepare(session), NNFW_STATUS_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_close_session(session));
}
using ValidationTestAddModelLoaded = ValidationTestModelLoaded<NNPackages::ADD>;
-TEST_F(ValidationTestAddModelLoaded, prepare_001) { NNFW_ENSURE_SUCCESS(nnfw_prepare(_session)); }
+TEST_F(ValidationTestAddModelLoaded, prepare_001)
+{
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
+
+ SUCCEED();
+}
TEST_F(ValidationTestAddModelLoaded, set_available_backends_001)
{
NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+
+ SUCCEED();
}
TEST_F(ValidationTestAddModelLoaded, get_input_size)
ASSERT_EQ(tensor_info.dims[0], 1);
}
+TEST_F(ValidationTestAddModelLoaded, input_output_tensorindex)
+{
+ uint32_t in_ind = 100;
+ NNFW_ENSURE_SUCCESS(nnfw_input_tensorindex(_session, "X_input", &in_ind));
+ ASSERT_EQ(in_ind, 0);
+
+ uint32_t out_ind = 100;
+ NNFW_ENSURE_SUCCESS(nnfw_output_tensorindex(_session, "ADD_TOP", &out_ind));
+ ASSERT_EQ(out_ind, 0);
+}
+
TEST_F(ValidationTestAddModelLoaded, neg_run)
{
// nnfw_prepare is not called
// tensor_info is null
ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
}
+
+TEST_F(ValidationTestAddModelLoaded, neg_input_output_tensorindex)
+{
+ uint32_t in_ind = 100;
+ ASSERT_EQ(nnfw_input_tensorindex(_session, "ADD_TOP", &in_ind), NNFW_STATUS_ERROR);
+ ASSERT_EQ(in_ind, 100);
+ ASSERT_EQ(nnfw_input_tensorindex(_session, "y_var", &in_ind), NNFW_STATUS_ERROR);
+ ASSERT_EQ(in_ind, 100);
+
+ uint32_t out_ind = 100;
+ ASSERT_EQ(nnfw_output_tensorindex(_session, "X_input", &out_ind), NNFW_STATUS_ERROR);
+ ASSERT_EQ(out_ind, 100);
+}
{
NNFW_ENSURE_SUCCESS(nnfw_run(_objects[0].session));
NNFW_ENSURE_SUCCESS(nnfw_run(_objects[1].session));
+
+ SUCCEED();
}
TEST_F(ValidationTestFourAddModelsSetInput, run_002)
for (auto obj : _objects)
NNFW_ENSURE_SUCCESS(nnfw_run(obj.session));
}
+
+ SUCCEED();
}
TEST_F(ValidationTestFourAddModelsSetInput, run_async)
NNFW_ENSURE_SUCCESS(nnfw_run_async(obj.session));
for (auto obj : _objects)
NNFW_ENSURE_SUCCESS(nnfw_await(obj.session));
+
+ SUCCEED();
}
{
NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
+
+ SUCCEED();
}
TEST_F(ValidationTestSessionCreated, neg_load_session_1)
{
NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
+
+ SUCCEED();
}
TEST_F(ValidationTestSingleSession, query_info_u32)
{
uint32_t val = 0;
NNFW_ENSURE_SUCCESS(nnfw_query_info_u32(nullptr, NNFW_INFO_ID_VERSION, &val));
+
+ SUCCEED();
}
TEST_F(ValidationTestSingleSession, neg_create_001)
#include <array>
#include <gtest/gtest.h>
-#include <nnfw.h>
+#include <nnfw_experimental.h>
#include "NNPackages.h"
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_Add_VarToConst)
+{
+ CircleGen cgen;
+ std::vector<float> rhs_data{5, 4, 7, 4};
+ uint32_t rhs_buf = cgen.addBuffer(rhs_data);
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase({{{1, 3, 2, 4}}, {{6, 7, 9, 8}}});
+ _context->addTestCase({{{0, 1, 2, 3}}, {{5, 5, 9, 7}}});
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Add_VarToVar)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase({{{1, 3, 2, 4}, {5, 4, 7, 4}}, {{6, 7, 9, 8}}});
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_InvalidShape)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_InvalidShapeConst)
+{
+ CircleGen cgen;
+ std::vector<float> rhs_data{5, 4, 0, 7, 4, 0};
+ uint32_t rhs_buf = cgen.addBuffer(rhs_data);
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_OneOperand)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{in}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_AvgPool2D)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase({{{1, 3, 2, 4}}, {{2.5}}});
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_AvgPool2D)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_Cos)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorCos({{in}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ const float pi = 3.141592653589793;
+ _context->addTestCase({{{0, pi / 2, pi, 7}}, {{1, 0, -1, 0.75390225434}}});
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Cos_TwoOperand)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out1 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out2 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorCos({{lhs, rhs}, {out1, out2}});
+ cgen.setInputsAndOutputs({lhs, rhs}, {out1, out2});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_L2Normalization)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorL2Normalization({{in}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase({{{0, 3, 4, 0, 5, 12, 0, 8, 15, 0, 7, 24}},
+ {{0, 0.6, 0.8, 0, 0.38461539149284363, 0.92307698726654053, 0,
+ 0.47058823704719543, 0.88235294818878174, 0, 0.28, 0.96}}});
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
* limitations under the License.
*/
-#include "ir/operation/Quantize.h"
+#include "GenModelTest.h"
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
+TEST_F(GenModelTest, OneOp_LeakyRelu)
{
+ CircleGen cgen;
+ int in = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorLeakyRelu({{in}, {out}}, 0.5);
+ cgen.setInputsAndOutputs({in}, {out});
-void Quantize::accept(OperationVisitor &v) const { v.visit(*this); }
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase({{{0, 1.0, 3.0, 1.0, -1.0, -2.0f}}, {{0, 1.0, 3.0, 1.0, -0.5, -1.0}}});
+ _context->setBackends({"acl_cl", "acl_neon"});
-Quantize::Quantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
+ SUCCEED();
}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_Pad)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+ int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorPad({{in, padding}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase({{{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}}});
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadRank)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> padding_data{1, 1, 1, 1};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, padding_buf});
+ int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorPad({{in, padding}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadDim0)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> padding_data{1, 1, 1, 1};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+ int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorPad({{in, padding}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadDim1)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> padding_data{1, 1, 1, 1};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4, 1}, circle::TensorType::TensorType_INT32, padding_buf});
+ int out = cgen.addTensor({{2, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorPad({{in, padding}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_PadV2)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+ std::vector<float> padding_value_data{3.0};
+ uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+ int padding_value =
+ cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
+
+ int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorPadV2({{in, padding, padding_value}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase({{{1, 2, 3, 4}}, {{3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 4, 3, 3, 3, 3, 3}}});
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_PadV2_InvalidPadRank)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> padding_data{1, 1, 1, 1};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, padding_buf});
+ std::vector<float> padding_value_data{3.0};
+ uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+ int padding_value =
+ cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
+
+ int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorPad({{in, padding, padding_value}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_PadV2_InvalidPadDim0)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> padding_data{1, 1, 1, 1};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+ std::vector<float> padding_value_data{3.0};
+ uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+ int padding_value =
+ cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
+
+ int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorPad({{in, padding, padding_value}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_PadV2_InvalidPadDim1)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> padding_data{1, 1, 1, 1};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4, 1}, circle::TensorType::TensorType_INT32, padding_buf});
+ std::vector<float> padding_value_data{3.0};
+ uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+ int padding_value =
+ cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
+
+ int out = cgen.addTensor({{2, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorPad({{in, padding, padding_value}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+// WORKAROUND Handle int32_t type input/output
+union float_int {
+ int32_t i;
+ float f;
+};
+
+TEST_F(GenModelTest, OneOp_Rank)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
+
+ // TODO handle many type in addTestCase
+ float_int output_data;
+ output_data.i = 4;
+
+ cgen.addOperatorRank({{in}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ {{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}}, {{output_data.f}}});
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Rank_Int32)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_INT32});
+ int out = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
+
+ // TODO handle many type in addTestCase
+ float_int output_data;
+ output_data.i = 4;
+
+ cgen.addOperatorRank({{in}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ {{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}}, {{output_data.f}}});
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_ResizeNearestNeighbor)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> size_data{3, 3};
+ uint32_t size_buf = cgen.addBuffer(size_data);
+ int size = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32, size_buf});
+
+ int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorResizeNearestNeighbor({{in, size}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase({{{3, 4, 6, 10, 9, 10, 12, 16}},
+ {{3, 4, 3, 4, 6, 10, 3, 4, 3, 4, 6, 10, 9, 10, 9, 10, 12, 16}}});
+ _context->setBackends({"acl_cl"});
+
+ SUCCEED();
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_While)
+{
+ // The model looks just like the below pseudocode
+ //
+ // function model(x)
+ // {
+ // while (x < 100.0)
+ // {
+ // x = x + 10.0;
+ // }
+ // return x
+ // }
+
+ CircleGen cgen;
+ std::vector<float> incr_data{10};
+ uint32_t incr_buf = cgen.addBuffer(incr_data);
+ std::vector<float> end_data{100};
+ uint32_t end_buf = cgen.addBuffer(end_data);
+
+ // primary subgraph
+ {
+ int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ cgen.addOperatorWhile({{x_in}, {x_out}}, 1, 2);
+ cgen.setInputsAndOutputs({x_in}, {x_out});
+ }
+
+ // cond subgraph
+ {
+ cgen.nextSubgraph();
+ int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf});
+ int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
+ cgen.addOperatorLess({{x, end}, {result}});
+ cgen.setInputsAndOutputs({x}, {result});
+ }
+
+ // body subgraph
+ {
+ cgen.nextSubgraph();
+ int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
+ int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({x_in}, {x_out});
+ }
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase({{{0}}, {{100}}});
+ _context->addTestCase({{{2}}, {{102}}});
+ _context->addTestCase({{{22}}, {{102}}});
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
file(GLOB MODEL_TEST_SCRIPT "models/run_test.sh")
install(PROGRAMS ${MODEL_TEST_SCRIPT} DESTINATION test/models)
-# Install models test list file
-file(GLOB MODEL_TEST_DIR models/config)
-install(DIRECTORY ${MODEL_TEST_DIR} DESTINATION test/models)
+# Install models test config
+file(GLOB TFLITE_CONFIG_DIR models/tflite)
+install(DIRECTORY ${TFLITE_CONFIG_DIR} DESTINATION test/models)
# Install nnpackage test config
-file(GLOB MODEL_TEST_DIR LIST_DIRECTORIES true nnfw_api_gtest/models/*)
-install(DIRECTORY ${MODEL_TEST_DIR} DESTINATION test/models/nnpackage)
+file(GLOB NNPACKAGE_MODEL_CONFIG_DIR models/nnfw_api_gtest)
+install(DIRECTORY ${NNPACKAGE_MODEL_CONFIG_DIR} DESTINATION test/models)
# Install test list
file(GLOB TEST_LIST_DIR list)
if [[ $DOWNLOAD_MODEL == "all" ]] || [[ $DOWNLOAD_MODEL == "nnpackage" ]]; then
# Download nnpackage model
- NNPACKAGE_CONFIG_DIR=$INSTALL_DIR/test/models/nnpackage/
+ NNPACKAGE_CONFIG_DIR=$INSTALL_DIR/test/models/nnfw_api_gtest/
NNPACKAGE_CACHE_DIR=$INSTALL_DIR/unittest_standalone/nnfw_api_gtest_models/
$INSTALL_DIR/test/models/run_test.sh --download=on --run=off --md5=$MD5_CHECK \
--configdir=$NNPACKAGE_CONFIG_DIR --cachedir=$NNPACKAGE_CACHE_DIR
exit $RET
fi
- local RESULT=`grep -E '^- Mean:' $LOG_FILE | sed -e 's/ms//g' | awk '{print $3}'`
+ local RESULT=`grep -E '^- MEAN ' $LOG_FILE | awk '{print $4}'`
echo "$RESULT"
}
-NET_000[0-5,7-9]
-NET_001[0,2-9]
-NET_002[0-2,4-9]
-NET_003[0-9]
-NET_004[0-9]
-UNIT_Add_*
-UNIT_AvgPool_*
-UNIT_BiasAdd_*
-UNIT_ConcatV2_*
-UNIT_Conv2D_*
-UNIT_Conv2DBackpropInput_*
-UNIT_DepthwiseConv2dNative_*
-UNIT_MaxPool_*
-UNIT_Mean_*
-UNIT_Mul_*
-UNIT_Pad_*
-UNIT_RealDiv_*
-UNIT_Relu6_*
-UNIT_Relu_*
-UNIT_Reshape_*
-UNIT_Rsqrt_*
-UNIT_Softmax_*
-UNIT_Sqrt_*
-UNIT_SquaredDifference_*
-UNIT_Squeeze_*
-UNIT_Sub_*
-UNIT_Tanh_000
+Add_000.opt
+#ArgMax_000.opt
+#ArgMax_001.opt
+#ArgMax_002.opt
+#ArgMax_003.opt
+AveragePool2D_000.opt
+AveragePool2D_U8_000.opt
+Concatenation_000.opt
+Conv2D_000.opt
+Conv2D_001.opt
+Conv2D_002.opt
+Conv2D_004.opt
+DepthwiseConv2D_000.opt
+DepthwiseConv2D_002.opt
+FullyConnected_000.opt
+FullyConnected_001.opt
+FullyConnected_003.opt
+#L2Normalize_U8_000.opt
+Logistic_000.opt
+#Logistic_U8_000.opt
+MaxPool2D_000.opt
+Mean_000.opt
+Mean_001.opt
+Mul_000.opt
+#Net_TConv_BN_000.opt
+Net_UnpackAdd_001.opt
+Pad_000.opt
+Quantization_000.opt
+Reshape_000.opt
+Reshape_001.opt
+Reshape_002.opt
+Softmax_000.opt
+SpaceToDepth_U8_000.opt
+Split_000.opt
+#Tanh_U8_000.opt
+#TransposeConv_000.opt
+#TransposeConv_001.opt
+Transpose_000.opt
+Unpack_000.opt
+Unpack_001.opt
+Unpack_002.opt
+#Unpack_003.opt
-NET_000[0-5,7-9]
-NET_001[0-9]
-NET_002[0-2,4-9]
-NET_003[0-9]
-NET_004[0-9]
-UNIT_Add_*
-UNIT_AvgPool_*
-UNIT_BiasAdd_*
-UNIT_ConcatV2_*
-UNIT_Conv2D_*
-UNIT_Conv2DBackpropInput_*
-UNIT_DepthwiseConv2dNative_*
-UNIT_MaxPool_*
-UNIT_Mean_*
-UNIT_Mul_*
-UNIT_Pad_*
-UNIT_RealDiv_*
-UNIT_Relu6_*
-UNIT_Relu_*
-UNIT_Reshape_*
-UNIT_Rsqrt_*
-UNIT_Softmax_*
-UNIT_Sqrt_*
-UNIT_SquaredDifference_*
-UNIT_Squeeze_*
-UNIT_Sub_*
-UNIT_Tanh_000
+Add_000.opt
+#ArgMax_000.opt
+#ArgMax_001.opt
+#ArgMax_002.opt
+#ArgMax_003.opt
+AveragePool2D_000.opt
+AveragePool2D_U8_000.opt
+Concatenation_000.opt
+Conv2D_000.opt
+Conv2D_001.opt
+Conv2D_002.opt
+Conv2D_004.opt
+#DepthwiseConv2D_000.opt
+DepthwiseConv2D_002.opt
+FullyConnected_000.opt
+FullyConnected_001.opt
+FullyConnected_003.opt
+#L2Normalize_U8_000.opt
+Logistic_000.opt
+#Logistic_U8_000.opt
+MaxPool2D_000.opt
+Mean_000.opt
+Mean_001.opt
+Mul_000.opt
+#Net_TConv_BN_000.opt
+Net_UnpackAdd_001.opt
+Pad_000.opt
+Quantization_000.opt
+Reshape_000.opt
+Reshape_001.opt
+Reshape_002.opt
+#Softmax_000.opt
+SpaceToDepth_U8_000.opt
+Split_000.opt
+#Tanh_U8_000.opt
+#TransposeConv_000.opt
+#TransposeConv_001.opt
+Transpose_000.opt
+Unpack_000.opt
+Unpack_001.opt
+Unpack_002.opt
+#Unpack_003.opt
-NET_000[0-4,6-9]
-NET_001[0-3,6-9]
-NET_002[0-2,4-7,9]
-NET_003[0-9]
-NET_004[0-9]
-UNIT_Add_*
-UNIT_AvgPool_*
-UNIT_ConcatV2_*
-UNIT_Conv2D_*
-UNIT_DepthwiseConv2dNative_*
-UNIT_MaxPool_*
-UNIT_Mul_*
-UNIT_Pad_*
-UNIT_Reshape_*
-UNIT_Softmax_*
-UNIT_Squeeze_*
-UNIT_Sub_*
+Add_000.opt
+ArgMax_000.opt
+ArgMax_001.opt
+ArgMax_002.opt
+ArgMax_003.opt
+AveragePool2D_000.opt
+AveragePool2D_U8_000.opt
+Concatenation_000.opt
+Conv2D_000.opt
+Conv2D_001.opt
+Conv2D_002.opt
+Conv2D_004.opt
+DepthwiseConv2D_000.opt
+DepthwiseConv2D_002.opt
+FullyConnected_000.opt
+FullyConnected_001.opt
+FullyConnected_003.opt
+L2Normalize_U8_000.opt
+Logistic_000.opt
+Logistic_U8_000.opt
+MaxPool2D_000.opt
+Mean_000.opt
+Mean_001.opt
+Mul_000.opt
+#Net_TConv_BN_000.opt
+Net_UnpackAdd_001.opt
+Pad_000.opt
+Quantization_000.opt
+Reshape_000.opt
+Reshape_001.opt
+Reshape_002.opt
+Softmax_000.opt
+SpaceToDepth_U8_000.opt
+Split_000.opt
+Tanh_U8_000.opt
+#TransposeConv_000.opt
+#TransposeConv_001.opt
+Transpose_000.opt
+Unpack_000.opt
+Unpack_001.opt
+Unpack_002.opt
+Unpack_003.opt
+++ /dev/null
-NET_000[1,3,7-9]
-NET_001[6,9]
-NET_002[2,8]
-UNIT_Conv2D_*
-UNIT_Conv2DBackpropInput_*
-NET_000[0-9]
-NET_001[0-4,6-9]
-NET_002[0-2,4-9]
-NET_003[0-9]
-NET_004[0-9]
-UNIT_Add_*
-UNIT_AvgPool_*
-UNIT_BiasAdd_*
-UNIT_ConcatV2_*
-UNIT_Conv2D_*
-UNIT_Conv2DBackpropInput_*
-UNIT_DepthwiseConv2dNative_*
-UNIT_MaxPool_*
-UNIT_Mul_*
-UNIT_Pad_*
-UNIT_Reshape_*
-UNIT_Softmax_*
-UNIT_Squeeze_*
-UNIT_Sub_*
+Add_000.opt
+#ArgMax_000.opt
+#ArgMax_001.opt
+#ArgMax_002.opt
+#ArgMax_003.opt
+AveragePool2D_000.opt
+#AveragePool2D_U8_000.opt
+Concatenation_000.opt
+Conv2D_000.opt
+Conv2D_001.opt
+Conv2D_002.opt
+Conv2D_004.opt
+DepthwiseConv2D_000.opt
+DepthwiseConv2D_002.opt
+FullyConnected_000.opt
+FullyConnected_001.opt
+FullyConnected_003.opt
+#L2Normalize_U8_000.opt
+Logistic_000.opt
+#Logistic_U8_000.opt
+MaxPool2D_000.opt
+#Mean_000.opt
+#Mean_001.opt
+Mul_000.opt
+#Net_TConv_BN_000.opt
+#Net_UnpackAdd_001.opt
+Pad_000.opt
+Quantization_000.opt
+Reshape_000.opt
+Reshape_001.opt
+Reshape_002.opt
+Softmax_000.opt
+#SpaceToDepth_U8_000.opt
+#Split_000.opt
+#Tanh_U8_000.opt
+#TransposeConv_000.opt
+#TransposeConv_001.opt
+#Transpose_000.opt
+#Unpack_000.opt
+#Unpack_001.opt
+#Unpack_002.opt
+#Unpack_003.opt
MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
NNFW_HOME="$(dirname $(dirname $(dirname ${MY_PATH})))"
CACHE_ROOT_PATH=$MY_PATH/"cache"
-TEST_ROOT_PATH=$MY_PATH/"config"
+TEST_ROOT_PATH=$MY_PATH/"tflite"
REPORT_DIR="report"
RUN_DISABLED="true"
fi
# Check test driver setting
-if [ ! command_exists $DRIVER_BIN ] && [ "$RUN_TEST" = "on" ]; then
+if ! command_exists $DRIVER_BIN && [ "$RUN_TEST" = "on" ]; then
echo "Cannot find test driver" $DRIVER_BIN ": please set proper DRIVER_BIN"
exit 1
fi
+++ /dev/null
-#!/usr/bin/env bash
-
-# TODO Reuse the fuction in run_test.sh. This is its duplication.
-function need_download()
-{
- LOCAL_PATH=$1
- REMOTE_URL=$2
- if [ ! -e $LOCAL_PATH ]; then
- return 0;
- fi
- # Ignore checking md5 in cache
- if [ ! -z $IGNORE_MD5 ] && [ "$IGNORE_MD5" == "1" ]; then
- return 1
- fi
-
- LOCAL_HASH=$(md5sum $LOCAL_PATH | awk '{ print $1 }')
- REMOTE_HASH=$(curl -ss $REMOTE_URL | md5sum | awk '{ print $1 }')
- # TODO Emit an error when Content-MD5 field was not found. (Server configuration issue)
- if [ "$LOCAL_HASH" != "$REMOTE_HASH" ]; then
- echo "Downloaded file is outdated or incomplete."
- return 0
- fi
- return 1
-}
-
-# TODO Reuse the fuction in run_test.sh. This is its duplication.
-download_tests()
-{
- SELECTED_TESTS=$@
-
- echo ""
- echo "Downloading tests:"
- echo "======================"
- for TEST_NAME in $SELECTED_TESTS; do
- echo $TEST_NAME
- done
- echo "======================"
-
- for TEST_NAME in $SELECTED_TESTS; do
- # Test configure initialization
- MODELFILE_SERVER_PATH=""
- MODELFILE_NAME=""
- source $TEST_ROOT_PATH/$TEST_NAME/config.sh
-
- TEST_CACHE_PATH=$CACHE_ROOT_PATH/$TEST_NAME
- MODELFILE=$TEST_CACHE_PATH/$MODELFILE_NAME
- MODELFILE_URL="$MODELFILE_SERVER/$MODELFILE_NAME"
- if [ -n "$FIXED_MODELFILE_SERVER" ]; then
- MODELFILE_URL="$FIXED_MODELFILE_SERVER/$MODELFILE_NAME"
- fi
-
- # Download model file
- if [ ! -e $TEST_CACHE_PATH ]; then
- mkdir -p $TEST_CACHE_PATH
- fi
-
- # Download unless we have it in cache (Also check md5sum)
- if need_download "$MODELFILE" "$MODELFILE_URL"; then
- echo ""
- echo "Download test file for $TEST_NAME"
- echo "======================"
-
- rm -f $MODELFILE # Remove invalid file if exists
- pushd $TEST_CACHE_PATH
- wget -nv $MODELFILE_URL
- if [ "${MODELFILE_NAME##*.}" == "zip" ]; then
- unzip -o $MODELFILE_NAME
- rm *.zip
- fi
- popd
- fi
-
- done
-}
-
-realpath()
-{
- readlink -e -- "$@"
-}
-
-usage()
-{
- echo "Usage: $0 --modelfile-server=MODELFILE_SERVER --install-path=INSTALL_DIR"
- echo " MODELFILE_SERVER : Base URL of the model file server"
- echo " INSTALL_DIR : Path to be installed"
- exit 1
-}
-
-while [[ $# -gt 0 ]]
-do
- key="$(echo $1 | awk '{print tolower($0)}')"
- case "$key" in
- -?|-h|--help)
- usage
- exit 1
- ;;
- --modelfile-server)
- MODELFILE_SERVER="$2"
- shift
- ;;
- --modelfile-server=*)
- MODELFILE_SERVER="${1#*=}"
- ;;
- --install-dir)
- INSTALL_DIR="$2"
- shift
- ;;
- --install-dir=*)
- INSTALL_DIR="${1#*=}"
- ;;
- *)
- echo "Invalid option '$1'"
- usage
- exit 1
- ;;
- esac
- shift
-done
-
-if [ -z "$MODELFILE_SERVER" ]; then
- echo "Please specify a value for --modelfile-server or MODELFILE_SERVER(env)."
- usage
- exit 1
-fi
-
-if [ -z "$INSTALL_DIR" ]; then
- echo "Please specify a value for --install-dir or INSTALL_DIR(env)."
- usage
- exit 1
-fi
-
-set -e
-
-THIS_SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE}))
-source ${THIS_SCRIPT_DIR}/../common.sh
-
-CACHE_ROOT_PATH=$INSTALL_DIR
-FIXED_MODELFILE_SERVER="${MODELFILE_SERVER:-}"
-TEST_ROOT_PATH=${THIS_SCRIPT_DIR}/models
-
-# All models in the directory are the target models
-pushd ${TEST_ROOT_PATH}
-MODELS=$(ls -d */)
-popd
-
-download_tests $MODELS
-
-set +e
print_with_dots "$EXECUTOR $BACKEND without scheduler"
- RESULT=$(get_result_of_benchmark_test $RUN_TEST_SH $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
+ RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
printf -v RESULT_INT '%d' $RESULT 2>/dev/null
PERCENTAGE=$((100-RESULT_SCH_INT*100/RESULT_INT))
export GRAPH_DOT_DUMP=1
print_with_dots "Parallel with scheduler"
- RESULT=$(get_result_of_benchmark_test $RUN_TEST_SH $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
+ RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
echo "$RESULT ms"
printf -v RESULT_SCH_INT '%d' $RESULT 2>/dev/null
- mv "after_lower.dot" $REPORT_MODEL_DIR/"after_lower_parallel.dot"
+ mv "after_lower_subg-0.dot" $REPORT_MODEL_DIR/"after_lower_subg-0_parallel.dot"
##################################################################################
# Run Linear executor with scheduler
export GRAPH_DOT_DUMP=1
print_with_dots "Linear with scheduler"
- RESULT=$(get_result_of_benchmark_test $RUN_TEST_SH $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
+ RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
printf -v RESULT_INT '%d' $RESULT 2>/dev/null
PERCENTAGE=$((100-RESULT_SCH_INT*100/RESULT_INT))
# for operations with input&output sizes the same as the model
mv "exec_time.json" $REPORT_MODEL_DIR
# Save the dot graph
- mv "after_lower.dot" $REPORT_MODEL_DIR/"after_lower_linear.dot"
+ mv "after_lower_subg-0.dot" $REPORT_MODEL_DIR/"after_lower_subg-0_linear.dot"
unset GRAPH_DOT_DUMP
##################################################################################
return ret;
}
-// param shape_str is a form of, e.g., "[1, [2, 3], 3, []]"
-void handleShapeParam(nnpkg_run::TensorShapeMap &shape_map, const std::string &shape_str)
+// param shape_str is a form of, e.g., "[1, [2, 3], 3, []]" or "h5"
+void handleShapeJsonParam(nnpkg_run::TensorShapeMap &shape_map, const std::string &shape_str)
{
Json::Value root;
Json::Reader reader;
};
auto process_shape_prepare = [&](const std::string &shape_str) {
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+ if (shape_str == "H5" || shape_str == "h5")
+ {
+ _when_to_use_h5_shape = WhenToUseH5Shape::PREPARE;
+ return;
+ }
+#endif
try
{
- handleShapeParam(_shape_prepare, shape_str);
+ handleShapeJsonParam(_shape_prepare, shape_str);
}
catch (const std::exception &e)
{
};
auto process_shape_run = [&](const std::string &shape_str) {
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+ if (shape_str == "H5" || shape_str == "h5")
+ {
+ _when_to_use_h5_shape = WhenToUseH5Shape::RUN;
+ return;
+ }
+#endif
try
{
- handleShapeParam(_shape_run, shape_str);
+ handleShapeJsonParam(_shape_run, shape_str);
}
catch (const std::exception &e)
{
"e.g. nnpackage_run-UNIT_Add_000-acl_cl.csv.\n"
"{nnpkg} name may be changed to realpath if you use symbolic-link.")
("shape_prepare", po::value<std::string>()->default_value("[]")->notifier(process_shape_prepare),
- "set shape of specified tensor before compilation\n"
- "e.g. '[0, [1, 2], 2, []]' to set 0th tensor to [1, 2] and 2nd tensor to [].\n")
+ "set shape of specified tensor before compilation (before calling nnfw_prepare()).\n"
+ "'h5': read shape(s) from H5 input file. '--load' should also be provided.\n"
+ "'[0, [1, 2], 2, []]': set 0th tensor to [1, 2] and 2nd tensor to [].")
("shape_run", po::value<std::string>()->default_value("[]")->notifier(process_shape_run),
- "set shape of specified tensor right before running\n"
- "e.g. '[1, [1, 2]]` to set 1st tensor to [1, 2].\n")
+ "set shape of specified tensor before running (before calling nnfw_run()).\n"
+ "'h5': read shape(s) from H5 input file. '--load' should also be provided.\n"
+ "'[0, [1, 2], 2, []]': set 0th tensor to [1, 2] and 2nd tensor to [].")
("verbose_level,v", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _verbose_level = v; }),
"Verbose level\n"
"0: prints the only result. Messages btw run don't print\n"
#include <vector>
#include <boost/program_options.hpp>
+#include "types.h"
+
namespace po = boost::program_options;
namespace nnpkg_run
{
-using TensorShapeMap = std::unordered_map<uint32_t, std::vector<int>>;
+using TensorShapeMap = std::unordered_map<uint32_t, TensorShape>;
+
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+enum class WhenToUseH5Shape
+{
+ DO_NOT_USE, // don't use shapes in h5 file
+ PREPARE, // read shapes in h5 file and set them as inputs' shape before calling nnfw_prepare()
+ RUN, // read shapes in h5 file and set them as inputs' shape before calling nnfw_run()
+};
+#endif
class Args
{
#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
const std::string &getDumpFilename(void) const { return _dump_filename; }
const std::string &getLoadFilename(void) const { return _load_filename; }
+ WhenToUseH5Shape getWhenToUseH5Shape(void) const { return _when_to_use_h5_shape; }
#endif
const int getNumRuns(void) const { return _num_runs; }
const int getWarmupRuns(void) const { return _warmup_runs; }
const bool getMemoryPoll(void) const { return _mem_poll; }
const bool getWriteReport(void) const { return _write_report; }
const bool printVersion(void) const { return _print_version; }
- const TensorShapeMap &getShapeMapForPrepare() { return _shape_prepare; }
- const TensorShapeMap &getShapeMapForRun() { return _shape_run; }
+ TensorShapeMap &getShapeMapForPrepare() { return _shape_prepare; }
+ TensorShapeMap &getShapeMapForRun() { return _shape_run; }
const int getVerboseLevel(void) const { return _verbose_level; }
private:
#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
std::string _dump_filename;
std::string _load_filename;
+ WhenToUseH5Shape _when_to_use_h5_shape = WhenToUseH5Shape::DO_NOT_USE;
#endif
TensorShapeMap _shape_prepare;
TensorShapeMap _shape_run;
#include <stdexcept>
#include <H5Cpp.h>
+namespace
+{
+nnpkg_run::TensorShape getShape(H5::DataSet &data_set)
+{
+ std::vector<hsize_t> h5_shape; // hsize_t is unsigned long long
+ H5::DataSpace data_space = data_set.getSpace();
+ int rank = data_space.getSimpleExtentNdims();
+ h5_shape.resize(rank);
+
+ // read shape info from H5 file
+ data_space.getSimpleExtentDims(h5_shape.data(), NULL);
+
+ nnpkg_run::TensorShape shape;
+ for (auto dim : h5_shape)
+ shape.emplace_back(static_cast<int>(dim));
+
+ return shape;
+}
+} // namespace
+
namespace nnpkg_run
{
static const char *h5_value_grpname = "value";
+std::vector<TensorShape> H5Formatter::readTensorShapes(const std::string &filename)
+{
+ uint32_t num_inputs;
+ NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
+ std::vector<TensorShape> tensor_shapes;
+
+ try
+ {
+ H5::Exception::dontPrint();
+
+ H5::H5File file(filename, H5F_ACC_RDONLY);
+ H5::Group value_group = file.openGroup(h5_value_grpname);
+
+ // Constraints: if there are n data set names, they should be unique and
+ // one of [ "0", "1", .. , "n-1" ]
+ for (uint32_t i = 0; i < num_inputs; ++i)
+ {
+ H5::DataSet data_set = value_group.openDataSet(std::to_string(i));
+ H5::DataType type = data_set.getDataType();
+ auto shape = getShape(data_set);
+
+ tensor_shapes.emplace_back(shape);
+ }
+
+ return tensor_shapes;
+ }
+ catch (const H5::Exception &e)
+ {
+ H5::Exception::printErrorStack();
+ std::exit(-1);
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << e.what() << std::endl;
+ std::exit(-1);
+ }
+}
+
void H5Formatter::loadInputs(const std::string &filename, std::vector<Allocation> &inputs)
{
uint32_t num_inputs;
{
nnfw_tensorinfo ti;
NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
+
+ // TODO Add Assert(nnfw shape, h5 file shape size)
+
// allocate memory for data
auto bufsz = bufsize_for(&ti);
inputs[i].alloc(bufsz);
case NNFW_TYPE_TENSOR_BOOL:
{
H5::DataSet data_set =
- value_group.createDataSet(std::to_string(i), H5::PredType::STD_I8LE, data_space);
+ value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8LE, data_space);
data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
break;
}
#include <string>
#include <vector>
+#include "types.h"
#include "allocation.h"
struct nnfw_session;
{
public:
H5Formatter(nnfw_session *sess) : session_(sess) {}
+ std::vector<TensorShape> readTensorShapes(const std::string &filename);
void loadInputs(const std::string &filename, std::vector<Allocation> &inputs);
void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs);
#include <unordered_map>
#include <vector>
-static const char *default_backend_cand = "acl_cl";
+static const char *default_backend_cand = "cpu";
-NNFW_STATUS resolve_op_backend(nnfw_session *session)
+void overwriteShapeMap(nnpkg_run::TensorShapeMap &shape_map,
+ std::vector<nnpkg_run::TensorShape> shapes)
{
- static std::unordered_map<std::string, std::string> operation_map = {
- {"TRANSPOSE_CONV", "OP_BACKEND_TransposeConv"}, {"CONV_2D", "OP_BACKEND_Conv2D"},
- {"DEPTHWISE_CONV_2D", "OP_BACKEND_DepthwiseConv2D"}, {"MEAN", "OP_BACKEND_Mean"},
- {"AVERAGE_POOL_2D", "OP_BACKEND_AvgPool2D"}, {"MAX_POOL_2D", "OP_BACKEND_MaxPool2D"},
- {"INSTANCE_NORM", "OP_BACKEND_InstanceNorm"}, {"ADD", "OP_BACKEND_Add"}};
-
- for (auto i : operation_map)
- {
- char *default_backend = std::getenv(i.second.c_str());
- if (default_backend)
- {
- NNFW_STATUS return_result = nnfw_set_op_backend(session, i.first.c_str(), default_backend);
- if (return_result == NNFW_STATUS_ERROR)
- return return_result;
- }
- }
-
- return NNFW_STATUS_NO_ERROR;
+ for (uint32_t i = 0; i < shapes.size(); i++)
+ shape_map[i] = shapes[i];
}
int main(const int argc, char **argv)
char *available_backends = std::getenv("BACKENDS");
if (available_backends)
NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, available_backends));
- NNPR_ENSURE_STATUS(resolve_op_backend(session));
uint32_t num_inputs;
NNPR_ENSURE_STATUS(nnfw_input_size(session, &num_inputs));
verifyInputTypes();
verifyOutputTypes();
- // set input shape before compilation
+// set input shape before compilation
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+ if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::PREPARE)
+ {
+ auto shapes = H5Formatter(session).readTensorShapes(args.getLoadFilename());
+ overwriteShapeMap(args.getShapeMapForPrepare(), shapes);
+ }
+#endif
setTensorInfo(args.getShapeMapForPrepare());
// prepare execution
NNPR_ENSURE_STATUS(nnfw_prepare(session));
});
- // set input shape after compilation and before execution
+// set input shape after compilation and before execution
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+ if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::RUN)
+ {
+ auto shapes = H5Formatter(session).readTensorShapes(args.getLoadFilename());
+ overwriteShapeMap(args.getShapeMapForRun(), shapes);
+ }
+#endif
setTensorInfo(args.getShapeMapForRun());
// prepare input
* limitations under the License.
*/
-#include "CircleExpContract.h"
+#ifndef __NNPACKAGE_RUN_TYPES_H__
+#define __NNPACKAGE_RUN_TYPES_H__
-#include <oops/InternalExn.h>
-
-#include <fstream>
-#include <iostream>
-
-bool CircleExpContract::store(const char *ptr, const size_t size) const
+namespace nnpkg_run
{
- if (!ptr)
- INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
- std::ofstream fs(_filepath.c_str(), std::ofstream::binary);
- fs.write(ptr, size);
+using TensorShape = std::vector<int>;
+
+} // end of namespace nnpkg_run
- return fs.good();
-}
+#endif // __NNPACKAGE_RUN_TYPES_H__
// Generate unsigned 8-bit integer input
auto tensor_view = nnfw::tflite::TensorView<uint8_t>::make(*interpreter, o);
- uint8_t value = 0;
+ auto fp = static_cast<uint8_t (nnfw::misc::RandomGenerator::*)(
+ const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
+ &nnfw::misc::RandomGenerator::generate<uint8_t>);
+ const nnfw::misc::tensor::Object<uint8_t> data(tensor_view.shape(),
+ std::bind(fp, randgen, _1, _2));
nnfw::misc::tensor::iterate(tensor_view.shape())
<< [&](const nnfw::misc::tensor::Index &ind) {
- // TODO Generate random values
+ const auto value = data.at(ind);
tensor_view.at(ind) = value;
- value = (value + 1) & 0xFF;
};
}
else if (tensor->type == kTfLiteBool)
Options:
-h show this help
-o set nnpackage output directory (default=.)
+ -p set nnpackage output name (default=[modelfile name])
Examples:
- model2nnpkg.sh add.tflite => create nnpackage in ./
- model2nnpkg.sh -o out add.tflite => create nnpackage in out/
+ model2nnpkg.sh add.tflite => create nnpackage 'add' in ./
+ model2nnpkg.sh -o out add.tflite => create nnpackage 'add' in out/
+ model2nnpkg.sh -o out -p addpkg add.tflite => create nnpackage 'addpkg' in out/
```
progname=$(basename "${BASH_SOURCE[0]}")
outdir="."
+name=""
usage() {
echo "Usage: $progname [options] modelfile"
echo "Options:"
echo " -h show this help"
echo " -o set nnpackage output directory (default=$outdir)"
+ echo " -p set nnpackage output name (default=[modelfile name])"
echo ""
echo "Examples:"
- echo " $progname add.tflite => create nnpackage in $outdir/"
- echo " $progname -o out add.tflite => create nnpackage in out/"
+ echo " $progname add.tflite => create nnpackage 'add' in $outdir/"
+ echo " $progname -o out add.tflite => create nnpackage 'add' in out/"
+ echo " $progname -o out -p addpkg add.tflite => create nnpackage 'addpkg' in out/"
exit 1
}
exit 1
fi
-while getopts "ho:" OPTION; do
+while getopts "ho:p:" OPTION; do
case "${OPTION}" in
h) usage;;
o) outdir=$OPTARG;;
+ p) name=$OPTARG;;
?) exit 1;;
esac
done
exit 1
fi
-name=${modelfile%.*}
+if [ -z "$name" ]; then
+ name=${modelfile%.*}
+fi
extension=${modelfile##*.}
echo "Generating nnpackage "$name" in "$outdir""
"major-version" : "1",
"minor-version" : "0",
"patch-version" : "0",
- "models" : [ "$name.$extension" ],
+ "models" : [ "$modelfile" ],
"model-types" : [ "$extension" ]
}
EOF
nnfw_root="$( cd "${script_dir%*/*/*/*}" && pwd )"
outdir="."
flatc=${flatc:-"$nnfw_root/build/externals/FLATBUFFERS/build/flatc"}
-tflite_schema=${tflite_schema:-"$nnfw_root/externals/TENSORFLOW-1.12/tensorflow/contrib/lite/schema/schema.fbs"}
+tflite_schema=${tflite_schema:-"$nnfw_root/externals/TENSORFLOW-1.13.1/tensorflow/lite/schema/schema.fbs"}
circle_schema=${circle_schema:-"$nnfw_root/nnpackage/schema/circle_schema.fbs"}
if ! [ -x "$flatc" ]; then
# convert
mkdir -p "${outdir}"
-${flatc} -o ${outdir} --defaults-json --strict-json -t ${tflite_schema} -- $1
+${flatc} -o ${outdir} --strict-json -t ${tflite_schema} -- $1
${script_dir}/tflitejson2circlejson.py "${outdir}/${name}.json" > "${outdir}/${name}.circle"
${flatc} -o ${outdir} -b ${circle_schema} "${outdir}/${name}.circle"
rm -f ${outdir}/${name}.json
with open(json_path, "r") as f:
try:
json_dict = json.load(f, object_pairs_hook=OrderedDict)
- for subgraph in json_dict["subgraphs"]:
- subgraph["data_format"] = "CHANNELS_LAST"
json_dict["version"] = 0
print(json.dumps(json_dict, indent=2))
except KeyError:
--- /dev/null
+# Content
+
+- git_release.sh
+- onert_version.sh
+
+# git_release.sh
+
+This tool helps you to automate GitHub releases.
+
+## Usage
+```
+$ ./git_release.sh --tag TAG --release_note RELEASE_NOTE \
+--token TOKEN [--release_name RELEASE_NAME] [--commitish COMMITISH] [--draft] \
+[--host_name HOST_NAME] [--repo_owner REPO_OWNER] [--repo_name REPO_NAME] [--asset] ...
+```
+
+## Options
+```
+--tag The name of the tag
+--release_name The name of the release
+--release_note Path of text file describing the contents of the release
+--commitish The commitish value that determines where the Git tag is created from
+--draft Create a draft release
+--token User token for authentication
+--host_name Host name for endpoint URL [Enterprise-specific endpoint only]
+--repo_owner Owner of the repository
+--repo_name The name of the repository
+--asset Path of release asset
+```
+
+## Examples
+```
+$ ./git_release.sh --tag 1.9.0 --commitish release/1.9.0 --token 0de25f1ca5d1d758fe877b18c06 \
+ --repo_owner mhs4670go --repo_name test_repo --release_note local/repo/release_note \
+ --asset ONE-compiler.tar.gz --asset ONE-runtime.tar.gz"
+
+$ ./git_release.sh --tag v1.1 --commitish c024e85d0ce6cb1ed2fbc66f1a9c1c2814da7575 \
+ --token 0de25f1ca5d1d758fe877b18c06 --repo_owner Samsung --repo_name ONE \
+ --release_name "Release Automation" --release_note /home/mhs4670go/ONE/release_doc \
+ --host_name github.sec.company.net --draft
+```
+
+## Reference
+https://developer.github.com/v3/repos/releases/#create-a-release
+
+
+# onert_version.sh
+
+onert_version.sh updates version information.
+
+## Usage
+```
+$ ./onert_version.sh -h
+Usage: onert_version.sh version
+Update or show onert version information
+```
+
+## Options
+```
+-h show this help
+-s set onert version
+```
+
+## Examples
+```
+$ ./onert_version.sh => show current onert version
+$ ./onert_version.sh -s 1.6.0 => set onert version info in all sources
+```
--- /dev/null
+#!/bin/bash
+# This script is to automate the process of monthly release with github API
+
+# Test if getopt is enhanced version
+getopt --test > /dev/null
+if [ $? -ne 4 ]; then
+ echo "[ERROR] Your system doesn't have enhanced getopt"
+ echo 2
+fi
+
+function Usage()
+{
+ echo "Usage: ./$(basename ${BASH_SOURCE[0]}) --tag TAG --release_note RELEASE_NOTE \
+--token TOKEN [--release_name RELEASE_NAME] [--commitish COMMITISH] [--draft] \
+[--host_name HOST_NAME] [--repo_owner REPO_OWNER] [--repo_name REPO_NAME] [--asset] ..."
+ echo ""
+ echo "[OPTIONS]"
+ echo "--tag The name of the tag"
+ echo "--release_name The name of the release"
+ echo "--release_note Path of text file describing the contents of the release"
+ echo "--commitish The commitish value that determines where the Git tag is created from"
+ echo "--draft Create a draft release"
+ echo "--token User token for authentication"
+ echo "--host_name Host name for endpoint URL [Enterprise-specific endpoint only]"
+ echo "--repo_owner Owner of the repository"
+ echo "--repo_name The name of the repository"
+ echo "--asset Path of release asset"
+ echo ""
+ echo "[EXAMPLE]"
+ echo "$ ./git_release.sh --tag 1.9.0 --commitish release/1.9.0 --token 0de25f1ca5d1d758fe877b18c06 \\"
+ echo " --repo_owner mhs4670go --repo_name test_repo --release_note local/repo/release_note \\"
+ echo " --asset ONE-compiler.tar.gz --asset ONE-runtime.tar.gz"
+ echo ""
+ echo "$ ./git_release.sh --tag v1.1 --commitish c024e85d0ce6cb1ed2fbc66f1a9c1c2814da7575 \\"
+ echo " --token 0de25f1ca5d1d758fe877b18c06 --repo_owner Samsung --repo_name ONE \\"
+ echo " --release_name \"Release Automation\" --release_note /home/mhs4670go/ONE/release_doc \\"
+ echo " --host_name github.sec.company.net --draft"
+ echo ""
+ echo "[REFERENCE]"
+ echo "https://developer.github.com/v3/repos/releases/#create-a-release"
+
+}
+
+SHORT_OPTS=h
+LONG_OPTS="\
+help,\
+tag:,\
+release_name:,\
+release_note:,\
+commitish:,\
+draft,\
+token:,\
+host_name:,\
+repo_owner:,\
+repo_name:,\
+asset:"
+
+OPTS=$(getopt --options "$SHORT_OPTS" --longoptions "$LONG_OPTS" --name "$0" -- "$@")
+
+if [ $? != 0 ] ; then echo "[ERROR] Failed to parse options" ; exit 2 ; fi
+
+eval set -- "$OPTS"
+
+unset TAG_NAME
+unset RELEASE_NAME
+unset RELEASE_NOTE
+unset TARGET_COMMITISH
+unset USER_TOKEN
+unset HOST_NAME
+unset REPO_OWNER
+unset REPO_NAME
+IS_DRAFT=false
+ASSET_PATHS=()
+
+while true ; do
+ case "$1" in
+ -h|--help )
+ Usage
+ exit 0
+ ;;
+ --tag ) # REQUIRED
+ TAG_NAME="$2"
+ shift 2
+ ;;
+ --release_name )
+ RELEASE_NAME="$2"
+ shift 2
+ ;;
+ --release_note ) # REQUIRED
+ RELEASE_NOTE="$2"
+ shift 2
+ ;;
+ --commitish )
+ TARGET_COMMITISH="$2"
+ shift 2
+ ;;
+ --draft )
+ IS_DRAFT=true
+ shift
+ ;;
+ --token ) # REQUIRED
+ USER_TOKEN="$2"
+ shift 2
+ ;;
+ --host_name )
+ HOST_NAME="$2/api/v3"
+ shift 2
+ ;;
+ --repo_owner )
+ REPO_OWNER="$2"
+ shift 2
+ ;;
+ --repo_name )
+ REPO_NAME="$2"
+ shift 2
+ ;;
+ --asset )
+ ASSET_PATHS+=("$2")
+ shift 2
+ ;;
+ -- )
+ shift
+ break
+ ;;
+ *)
+ echo "[ERROR] getopt internal error"
+ exit 2
+ ;;
+ esac
+done
+
+# Check if required options are specified
+if [ -z ${TAG_NAME} ]; then
+ echo "[ERROR] You must specify '--tag' option"
+ Usage
+ exit 0
+fi
+if [ -z ${RELEASE_NOTE} ]; then
+ echo "[ERROR] You must specify '--release_note' option"
+ Usage
+ exit 0
+fi
+if [ -z ${USER_TOKEN} ]; then
+ echo "[ERROR] You must specify '--token' option"
+ Usage
+ exit 0
+fi
+
+# Print variables and set default value
+DEFAULT_RELEASE_NAME="ONE Release ${TAG_NAME}"
+DEFAULT_HOST_NAME="api.github.com"
+DEFAULT_REPO_OWNER="Samsung"
+DEFAULT_REPO_NAME="ONE"
+echo "======================[RELEASE INFO]======================"
+echo "TAG_NAME : ${TAG_NAME}"
+echo "RELEASE_NAME : ${RELEASE_NAME:=${DEFAULT_RELEASE_NAME}}"
+echo "RELEASE_NOTE : ${RELEASE_NOTE}"
+echo "TARGET_COMMITISH : ${TARGET_COMMITISH:=${TAG_NAME}}"
+echo "IS_DRAFT : ${IS_DRAFT}"
+echo "USER_TOKEN : ${USER_TOKEN}"
+echo "HOST_NAME : ${HOST_NAME:=${DEFAULT_HOST_NAME}}"
+echo "REPO_OWNER : ${REPO_OWNER:=${DEFAULT_REPO_OWNER}}"
+echo "REPO_NAME : ${REPO_NAME:=${DEFAULT_REPO_NAME}}"
+echo "ASSETS : ${ASSET_PATHS[@]}"
+echo "==========================================================="
+
+function generate_release_data()
+{
+ cat <<EOF
+{
+ "tag_name": "${TAG_NAME}",
+ "target_commitish": "${TARGET_COMMITISH}",
+ "name": "${RELEASE_NAME}",
+ "body": "$(cat $1 | sed 's/$/\\n/' | tr -d '\n')",
+ "draft": ${IS_DRAFT},
+ "prerelease": false
+}
+EOF
+}
+
+# Check if the release already exists
+RELEASE_URL=$(curl -s --request GET --header "Authorization: token ${USER_TOKEN}" \
+https://${HOST_NAME}/repos/${REPO_OWNER}/${REPO_NAME}/releases/tags/${TAG_NAME} | \
+jq -r '.url')
+
+if [ $RELEASE_URL != null ]; then
+ echo "[ERROR] The tag name you specified already exists."
+ exit 2
+fi
+
+# Create a release (with assinging upload_url using jq)
+UPLOAD_URL=$(curl -s --request POST --header "Authorization: token ${USER_TOKEN}" \
+--header "Accept: application/json" \
+--data "$(eval generate_release_data '${RELEASE_NOTE}')" \
+"https://${HOST_NAME}/repos/${REPO_OWNER}/${REPO_NAME}/releases" | \
+jq -r '.upload_url')
+
+UPLOAD_URL=$(echo ${UPLOAD_URL} | cut -d "{" -f 1)?name=
+
+# Upload the assets
+for ASSET_PATH in "${ASSET_PATHS[@]}"; do
+ curl -s --request POST --header "Authorization: token ${USER_TOKEN}" \
+ --header "Content-Type: $(file -b --mime-type ${ASSET_PATH})" \
+ --data-binary @${ASSET_PATH} \
+ ${UPLOAD_URL}${ASSET_PATH} > /dev/null
+done
--- /dev/null
+#!/bin/bash
+
+set -eu
+
+progname=$(basename "${BASH_SOURCE[0]}")
+script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+nnfw_root="$( cd "${script_dir%*/*/*}" && pwd )"
+
+usage() {
+ echo "Usage: $progname version"
+ echo "Update or show onert version information"
+ echo ""
+ echo "Options:"
+ echo " -h show this help"
+ echo " -s set onert version"
+ echo ""
+ echo "Examples:"
+ echo " $progname => show current onert version"
+ echo " $progname -s 1.6.0 => set onert version info in all sources"
+ exit 1
+}
+
+show_version() {
+ version_line=$(cat ${nnfw_root}/packaging/nnfw.spec | grep "Version:")
+ echo ${version_line#"Version:"}
+
+ exit 0
+}
+
+set_version() {
+ version=$1
+ perl -pi -e "s/^release = .*/release = \'$version\'/" ${nnfw_root}/docs/conf.py
+ perl -pi -e "s/^Version: .*/Version: $version/" ${nnfw_root}/packaging/nnfw.spec
+
+ IFS=. read M m p <<< "$version"
+ hex=$(printf '0x%08x' $(( (($M << 24)) | (($m << 8)) | $p )))
+ perl -pi -e "s/^#define NNFW_VERSION.*/#define NNFW_VERSION $hex/" ${nnfw_root}/runtime/onert/api/include/nnfw_version.h
+
+ perl -pi -e "s/versionName .*$/versionName \"$version\"/" ${nnfw_root}/runtime/contrib/android/api/build.gradle
+}
+
+if [ $# -eq 0 ]; then
+ show_version
+fi
+
+while getopts "hs:" OPTION; do
+case "${OPTION}" in
+ h) usage;;
+ s) set_version "$OPTARG";;
+ ?) exit 1;;
+esac
+done
+
+shift $((OPTIND-1))
import os
import sys
import numpy
-
-sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tflite'))
-flatbuffersPath = '../../externals/flatbuffers'
-sys.path.append(
- os.path.join(os.path.dirname(os.path.abspath(__file__)), flatbuffersPath + '/python'))
-
import flatbuffers
import tflite.Model
import tflite.SubGraph
--- /dev/null
+flatbuffers>=1.12
+numpy
import os
import sys
import numpy
-
-sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tflite'))
-sys.path.append(
- os.path.join(
- os.path.dirname(os.path.abspath(__file__)), '../../externals/flatbuffers/python'))
-
import flatbuffers
import tflite.Model
import tflite.SubGraph
conv2d_options.StrideW())
tflite.Conv2DOptions.Conv2DOptionsAddStrideH(new_builder,
conv2d_options.StrideH())
+ tflite.Conv2DOptions.Conv2DOptionsAddDilationWFactor(
+ new_builder, conv2d_options.DilationWFactor())
+ tflite.Conv2DOptions.Conv2DOptionsAddDilationHFactor(
+ new_builder, conv2d_options.DilationHFactor())
tflite.Conv2DOptions.Conv2DOptionsAddFusedActivationFunction(
new_builder, conv2d_options.FusedActivationFunction())
return tflite.Conv2DOptions.Conv2DOptionsEnd(new_builder)
# GreaterOptions: not supported
# GreaterEqualOptions: not supported
# LessEqualOptions: not supported
- # SelectOptions: not supported
+
+ # SelectOptions
+ import tflite.SelectOptions
+ if builtin_option_type == tflite.BuiltinOptions.BuiltinOptions().SelectOptions:
+
+ select_option = tflite.SelectOptions.SelectOptions()
+ select_option.Init(selected_builtin_option.Bytes, selected_builtin_option.Pos)
+
+ tflite.SelectOptions.SelectOptionsStart(new_builder)
+ return tflite.SelectOptions.SelectOptionsEnd(new_builder)
+
# SliceOptions: not supported
# TransposeConvOptions
# FloorModOptions: not supported
# RangeOptions: not supported
# ResizeNearestNeighborOptions: not supported
- # LeakyReluOptions: not supported
+
+ # LeakyReluOptions
+ import tflite.LeakyReluOptions
+ if builtin_option_type == tflite.BuiltinOptions.BuiltinOptions().LeakyReluOptions:
+
+ leaky_relu_option = tflite.LeakyReluOptions.LeakyReluOptions()
+ leaky_relu_option.Init(selected_builtin_option.Bytes, selected_builtin_option.Pos)
+
+ tflite.LeakyReluOptions.LeakyReluOptionsStart(new_builder)
+ tflite.LeakyReluOptions.LeakyReluOptionsAddAlpha(new_builder,
+ leaky_relu_option.Alpha())
+ return tflite.LeakyReluOptions.LeakyReluOptionsEnd(new_builder)
# SquaredDifferenceOptions
import tflite.SquaredDifferenceOptions
return tflite.WhileOptions.WhileOptionsEnd(new_builder)
# Cannot handle builtin option type yet
- print("Cannot handle this option yet")
+ print("Cannot handle BuiltinOptions {} yet. See BuiltinOptions.py for op name".format(
+ builtin_option_type))
exit(1)
+++ /dev/null
-#!/bin/bash
-
-set -eu
-
-progname=$(basename "${BASH_SOURCE[0]}")
-script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-nnfw_root="$( cd "${script_dir%*/*/*}" && pwd )"
-
-usage() {
- echo "Usage: $progname version"
- echo "Update all version information"
- echo ""
- echo "Options:"
- echo " -h show this help"
- echo ""
- echo "Examples:"
- echo " $progname 1.6.0"
- exit 1
-}
-
-if [ $# -eq 0 ]; then
- echo "For help, type $progname -h"
- exit 1
-fi
-
-while getopts "ho:" OPTION; do
-case "${OPTION}" in
- h) usage;;
- ?) exit 1;;
-esac
-done
-
-shift $((OPTIND-1))
-
-if [ $# -ne 1 ]; then
- echo "error: wrong argument (no argument or too many arguments)."
- echo "For help, type $progname -h"
- exit 1
-fi
-
-version=$1
-
-perl -pi -e "s/^release = .*/release = \'$version\'/" ${nnfw_root}/docs/conf.py
-
-perl -pi -e "s/^Version: .*/Version: $version/" ${nnfw_root}/packaging/nnfw.spec
-
-IFS=. read M m p <<< "$version"
-hex=$(printf '0x%08x' $(( (($M << 24)) | (($m << 8)) | $p )))
-perl -pi -e "s/^#define NNFW_VERSION.*/#define NNFW_VERSION $hex/" ${nnfw_root}/runtime/onert/api/include/nnfw_version.h
-
-perl -pi -e "s/versionName .*$/versionName \"$version\"/" ${nnfw_root}/runtime/contrib/android/api/build.gradle