* limitations under the License.
*/
-#ifndef __NNFW_CKER_ADD_H__
-#define __NNFW_CKER_ADD_H__
+#ifndef __NNFW_CKER_BINARY_ARITHMETIC_OPS_H__
+#define __NNFW_CKER_BINARY_ARITHMETIC_OPS_H__
+#include <functional>
#include "cker/Shape.h"
#include "cker/Types.h"
#include "cker/Utils.h"
namespace cker
{
-struct AddParam
+struct BinaryArithmeticOpParam
{
// Shape dependent / common to data / op types.
// BroadcastableOpCategory broadcast_category;
};
template <typename T>
-inline void Add(const AddParam ¶ms, const Shape &input1_shape, const T *input1_data,
- const Shape &input2_shape, const T *input2_data, const Shape &output_shape,
- T *output_data)
+inline void BinaryArithmeticOp(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
+ const T *input1_data, const Shape &input2_shape,
+ const T *input2_data, const Shape &output_shape, T *output_data,
+ const std::function<T(const T &, const T &)> &fn)
{
const int32_t flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; ++i)
{
- output_data[i] = ActivationFunctionWithMinMax(input1_data[i] + input2_data[i],
+ output_data[i] = ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
params.quantized_activation_min,
params.quantized_activation_max);
}
}
template <>
-inline void Add(const AddParam ¶ms, const Shape &input1_shape, const float *input1_data,
- const Shape &input2_shape, const float *input2_data, const Shape &output_shape,
- float *output_data)
+inline void BinaryArithmeticOp(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
+ const float *input1_data, const Shape &input2_shape,
+ const float *input2_data, const Shape &output_shape,
+ float *output_data,
+ const std::function<float(const float &, const float &)> &fn)
{
const int size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < size; i++)
{
- auto x = input1_data[i] + input2_data[i];
output_data[i] =
- ActivationFunctionWithMinMax(x, params.float_activation_min, params.float_activation_max);
+ ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
+ params.float_activation_min, params.float_activation_max);
}
}
} // namespace cker
} // namespace nnfw
-#endif // __NNFW_CKER_ADD_H__
+#endif // __NNFW_CKER_BINARY_ARITHMETIC_OPS_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_CKER_SUB_H__
-#define __NNFW_CKER_SUB_H__
-
-#include "cker/Shape.h"
-#include "cker/Types.h"
-#include "cker/Utils.h"
-
-namespace nnfw
-{
-namespace cker
-{
-
-struct SubParam
-{
- // Shape dependent / common to data / op types.
- // BroadcastableOpCategory broadcast_category;
- // uint8 inference params.
- int32_t input1_offset;
- int32_t input2_offset;
- int32_t output_offset;
- int32_t output_multiplier;
- int32_t output_shift;
- // Sub / Sub, not Mul, uint8 inference params.
- int32_t left_shift;
- int32_t input1_multiplier;
- int32_t input1_shift;
- int32_t input2_multiplier;
- int32_t input2_shift;
- // uint8, etc, activation params.
- int32_t quantized_activation_min;
- int32_t quantized_activation_max;
- // float activation params.
- float float_activation_min;
- float float_activation_max;
-
- // Processed output dimensions.
- // Let input "a" be the one that broadcasts in the faster-changing dimension.
- // Then, after coalescing, for shapes {a0, a1, a2, a3, a4} and
- // {b0, b1, b2, b3, b4},
- // broadcast_shape[4] = b0 = a0.
- // broadcast_shape[3] = b1; a1 = 1.
- // broadcast_shape[2] = b2 = a2.
- // broadcast_shape[1] = a3; b3 = 1.
- // broadcast_shape[0] = b4 = a4.
- // int broadcast_shape[5];
-};
-
-template <typename T>
-inline void Sub(const SubParam ¶ms, const Shape &input1_shape, const T *input1_data,
- const Shape &input2_shape, const T *input2_data, const Shape &output_shape,
- T *output_data)
-{
- const int32_t flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
- for (int i = 0; i < flat_size; ++i)
- {
- output_data[i] = ActivationFunctionWithMinMax(input1_data[i] - input2_data[i],
- params.quantized_activation_min,
- params.quantized_activation_max);
- }
-}
-
-template <>
-inline void Sub(const SubParam ¶ms, const Shape &input1_shape, const float *input1_data,
- const Shape &input2_shape, const float *input2_data, const Shape &output_shape,
- float *output_data)
-{
- const int size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
- for (int i = 0; i < size; i++)
- {
- auto x = input1_data[i] - input2_data[i];
- output_data[i] =
- ActivationFunctionWithMinMax(x, params.float_activation_min, params.float_activation_max);
- }
-}
-
-} // namespace cker
-} // namespace nnfw
-
-#endif // __NNFW_CKER_SUB_H__
#include "kernel/DepthwiseConvolutionLayer.h"
#include "kernel/AddLayer.h"
#include "kernel/SubLayer.h"
+#include "kernel/MulLayer.h"
#include "kernel/GatherLayer.h"
#include "kernel/LogisticLayer.h"
#include "kernel/PadLayer.h"
_execution_builder->append(std::move(fn));
}
-void KernelGenerator::visit(const model::operation::Mul &) { throw std::runtime_error("NYI"); }
-
void KernelGenerator::visit(const model::operation::Reshape &node)
{
const auto output_index{node.getOutputs().at(0)};
void KernelGenerator::visit(const model::operation::Sub &node)
{
+ // The same as Add
const auto ofm_index{node.getOutputs().at(0)};
const auto lhs_index{node.getInputs().at(model::operation::Sub::Input::LHS)};
const auto rhs_index{node.getInputs().at(model::operation::Sub::Input::RHS)};
_execution_builder->append(std::move(fn));
}
+void KernelGenerator::visit(const model::operation::Mul &node)
+{
+ // The same as Add
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(model::operation::Sub::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(model::operation::Sub::Input::RHS)};
+
+ const auto ofm_backend_descr =
+ ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ofm_index), _current_subg_layout);
+ const auto lhs_backend_descr =
+ ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(lhs_index), _current_subg_layout);
+ const auto rhs_backend_descr =
+ ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(rhs_index), _current_subg_layout);
+
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::MulLayer>();
+
+ fn->configure(lhs_alloc->buffer(), lhs_backend_descr, rhs_alloc->buffer(), rhs_backend_descr,
+ activation, ofm_alloc->buffer(), ofm_backend_descr);
+
+ _execution_builder->append(std::move(fn));
+}
+
void KernelGenerator::visit(const model::operation::Permute &node)
{
const auto output_index{node.getOutputs().at(0)};
void visit(const model::operation::AvgPool2D &) override;
void visit(const model::operation::Concat &) override;
void visit(const model::operation::FullyConnected &) override;
- void visit(const model::operation::Mul &) override;
void visit(const model::operation::Reshape &) override;
void visit(const model::operation::Squeeze &) override;
void visit(const model::operation::Softmax &) override;
void visit(const model::operation::Add &) override;
void visit(const model::operation::Sub &) override;
+ void visit(const model::operation::Mul &) override;
void visit(const model::operation::Permute &) override;
void visit(const model::operation::Gather &) override;
void visit(const model::operation::Custom &node) override;
#include "kernel/DepthwiseConvolutionLayer.h"
#include "kernel/AddLayer.h"
#include "kernel/SubLayer.h"
+#include "kernel/MulLayer.h"
#include "kernel/GatherLayer.h"
#include <backend/Backend.h>
void ShapeFixer::visit(const model::operation::Permute &) { /* DO NOTHING */}
void ShapeFixer::visit(const model::operation::Sub &node)
{
- // The same as AddNode
+ // The same as Add
const auto lhs_index{node.getInputs().at(model::operation::Sub::Input::LHS)};
const auto rhs_index{node.getInputs().at(model::operation::Sub::Input::RHS)};
const_cast<::neurun::model::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
}
}
+void ShapeFixer::visit(const model::operation::Mul &node)
+{
+ // The same as Add
+ const auto lhs_index{node.getInputs().at(model::operation::Sub::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(model::operation::Sub::Input::RHS)};
+
+ // Quantization : not supported
+ if (_ctx.at(lhs_index).typeInfo().type() == model::DataType::QUANT8_ASYMM)
+ {
+ throw std::runtime_error{"ShapeFixer: NYI for quantized Mul"};
+ }
+ // Broadcast
+ if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
+ const_cast<::neurun::model::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
+ const_cast<::neurun::model::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
+ }
+}
void ShapeFixer::visit(const model::operation::Custom &) { /* DO NOTHING */}
void visit(const model::operation::Add &) override;
void visit(const model::operation::Gather &) override;
void visit(const model::operation::Sub &) override;
+ void visit(const model::operation::Mul &) override;
void visit(const model::operation::Permute &) override;
void visit(const model::operation::Custom &) override;
void visit(const model::operation::Logistic &) override;
#include "AddLayer.h"
-#include <cker/operation/Add.h>
+#include <cker/operation/BinaryArithmeticOps.h>
#include "OperationUtils.h"
{
float output_activation_min, output_activation_max;
CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::AddParam op_params;
+ nnfw::cker::BinaryArithmeticOpParam op_params;
op_params.float_activation_max = output_activation_max;
op_params.float_activation_min = output_activation_min;
- nnfw::cker::Add(op_params, convertTensorDescriptorToCkerShape(_lhsDescr), _lhsData.f,
- convertTensorDescriptorToCkerShape(_rhsDescr), _rhsData.f,
- convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f);
+ const std::function<float(const float &, const float &)> fn = [](const float &a, const float &b) {
+ return a + b;
+ };
+ nnfw::cker::BinaryArithmeticOp(op_params, convertTensorDescriptorToCkerShape(_lhsDescr),
+ _lhsData.f, convertTensorDescriptorToCkerShape(_rhsDescr),
+ _rhsData.f, convertTensorDescriptorToCkerShape(_outputDescr),
+ _outputData.f, fn);
}
void AddLayer::addQuant8()
int32_t output_activation_min, output_activation_max;
CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min,
&output_activation_max);
- // nnfw::cker::AddParam op_params;
+ // nnfw::cker::BinaryArithmeticOpParam op_params;
// op_params.quantized_activation_max = output_activation_max;
// op_params.quantized_activation_min = output_activation_min;
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MulLayer.h"
+
+#include <cker/operation/BinaryArithmeticOps.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+void MulLayer::mulFloat32()
+{
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+ nnfw::cker::BinaryArithmeticOpParam op_params;
+ op_params.float_activation_max = output_activation_max;
+ op_params.float_activation_min = output_activation_min;
+
+ const std::function<float(const float &, const float &)> fn = [](const float &a, const float &b) {
+ return a * b;
+ };
+ nnfw::cker::BinaryArithmeticOp(op_params, convertTensorDescriptorToCkerShape(_lhsDescr),
+ _lhsData.f, convertTensorDescriptorToCkerShape(_rhsDescr),
+ _rhsData.f, convertTensorDescriptorToCkerShape(_outputDescr),
+ _outputData.f, fn);
+}
+
+void MulLayer::mulQuant8()
+{
+ int32_t output_activation_min, output_activation_max;
+ CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min,
+ &output_activation_max);
+ // nnfw::cker::BinaryArithmeticOpParam op_params;
+ // op_params.quantized_activation_max = output_activation_max;
+ // op_params.quantized_activation_min = output_activation_min;
+
+ // cker quant8 mul is not implemented yet
+ throw std::runtime_error{"Mull NYI for quantized"};
+}
+
+void MulLayer::configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData,
+ const TensorDescriptor &rhsDescr, const model::Activation activation,
+ uint8_t *outputData, const TensorDescriptor &outputDescr)
+{
+ _lhsData.u8 = lhsData;
+ _lhsDescr = lhsDescr;
+ _rhsData.u8 = rhsData;
+ _rhsDescr = rhsDescr;
+ _inputType = lhsDescr.type;
+ _activation = activation;
+ _outputData.u8 = outputData;
+ _outputDescr = outputDescr;
+}
+
+void MulLayer::run()
+{
+ if (_inputType == OperandType::FLOAT32)
+ {
+ mulFloat32();
+ }
+ else if (_inputType == OperandType::QUANT8_ASYMM)
+ {
+ mulQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_BACKEND_CPU_KERNEL_MULLAYER_H__
+#define __NEURUN_BACKEND_CPU_KERNEL_MULLAYER_H__
+
+#include <exec/IFunction.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class MulLayer : public ::neurun::exec::IFunction
+{
+public:
+ MulLayer() : _lhsData(), _rhsData(), _outputData(), _lhsDescr(), _rhsDescr(), _outputDescr()
+ {
+ // DO NOTHING
+ }
+
+public:
+ void mulFloat32();
+
+ void mulQuant8();
+
+ void configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData,
+ const TensorDescriptor &rhsDescr, const model::Activation activation,
+ uint8_t *outputData, const TensorDescriptor &outputDescr);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ DataPtr _lhsData;
+ DataPtr _rhsData;
+ DataPtr _outputData;
+
+ TensorDescriptor _lhsDescr;
+ TensorDescriptor _rhsDescr;
+ TensorDescriptor _outputDescr;
+
+ model::Activation _activation{model::Activation::NONE};
+
+ OperandType _inputType{OperandType::FLOAT32};
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
+
+#endif // __NEURUN_BACKEND_CPU_KERNEL_MULLAYER_H__
#include "SubLayer.h"
-#include <cker/operation/Sub.h>
+#include <cker/operation/BinaryArithmeticOps.h>
#include "OperationUtils.h"
{
float output_activation_min, output_activation_max;
CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::SubParam op_params;
+ nnfw::cker::BinaryArithmeticOpParam op_params;
op_params.float_activation_max = output_activation_max;
op_params.float_activation_min = output_activation_min;
-
- nnfw::cker::Sub(op_params, convertTensorDescriptorToCkerShape(_lhsDescr), _lhsData.f,
- convertTensorDescriptorToCkerShape(_rhsDescr), _rhsData.f,
- convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f);
+ const std::function<float(const float &, const float &)> fn = [](const float &a, const float &b) {
+ return a - b;
+ };
+ nnfw::cker::BinaryArithmeticOp(op_params, convertTensorDescriptorToCkerShape(_lhsDescr),
+ _lhsData.f, convertTensorDescriptorToCkerShape(_rhsDescr),
+ _rhsData.f, convertTensorDescriptorToCkerShape(_outputDescr),
+ _outputData.f, fn);
}
void SubLayer::subQuant8()
* limitations under the License.
*/
-#include <cker/operation/Add.h>
+#include <cker/operation/BinaryArithmeticOps.h>
#include "OperationUtil.h"
}
}
-inline void setActivationParams(float min, float max, nnfw::cker::AddParam *params)
+inline void setActivationParams(float min, float max, nnfw::cker::BinaryArithmeticOpParam *params)
{
params->float_activation_min = min;
params->float_activation_max = max;
}
-inline void setActivationParams(int32_t min, int32_t max, nnfw::cker::AddParam *params)
+inline void setActivationParams(int32_t min, int32_t max,
+ nnfw::cker::BinaryArithmeticOpParam *params)
{
params->quantized_activation_min = min;
params->quantized_activation_max = max;
const auto rhs_buffer = rhs_tensor->bufferRO();
auto out_buffer = out_tensor->buffer();
- nnfw::cker::AddParam cker_param;
+ nnfw::cker::BinaryArithmeticOpParam cker_param;
raw_type activation_min, activation_max;
calculateActivationRange(param.activation, &activation_min, &activation_max);
setActivationParams(activation_min, activation_max, &cker_param);
raw_type *out_ptr = reinterpret_cast<raw_type *>(out_buffer);
// Calculate
- nnfw::cker::Add(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr, out_shape, out_ptr);
+ const std::function<raw_type(const raw_type &, const raw_type &)> fn =
+ [](const raw_type &a, const raw_type &b) { return a + b; };
+ nnfw::cker::BinaryArithmeticOp(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr, out_shape,
+ out_ptr, fn);
}
void invokeAdd(const ExecEnv *env, const model::Operation &node)
GeneratedTests.lsh_projection*
GeneratedTests.lstm*
GeneratedTests.mobilenet*
-GeneratedTests.mul*
+GeneratedTests.mul_broadcast*
+GeneratedTests.mul_quant*
GeneratedTests.neg*
GeneratedTests.notequal*
GeneratedTests.prelu_ex*
GeneratedTests.lsh_projection*
GeneratedTests.lstm*
GeneratedTests.mobilenet*
-GeneratedTests.mul*
+GeneratedTests.mul_broadcast*
+GeneratedTests.mul_quant*
GeneratedTests.neg*
GeneratedTests.notequal*
GeneratedTests.prelu_ex*