`BiasAdd` and `Scale` are restricted versions of equivalent Elementwise ops and are going to be removed.
Signed-off-by: Sergei Barannikov <s.barannikov@samsung.com>
_passManager.registerPass(std::unique_ptr<Pass>(new CombineTransposes()));
_passManager.registerPass(std::unique_ptr<Pass>(new SinkTranspose()));
_passManager.registerPass(std::unique_ptr<Pass>(new SinkRelu()));
+#if 0
+ // TODO Support broadcasting.
_passManager.registerPass(std::unique_ptr<Pass>(new FuseArithmeticOps()));
+#endif
_passManager.registerPass(std::unique_ptr<Pass>(new RemoveDeadEnds()));
}
} // registerOptimizationPass
mir::TensorVariant getResult(const mir::Operation::Output* tensor);
void visit(mir::ops::BatchNormOp& op) override;
- void visit(mir::ops::BiasAddOp& op) override;
void visit(mir::ops::CappedReluOp& op) override;
void visit(mir::ops::ConcatOp& op) override;
void visit(mir::ops::ConstantOp& op) override;
void visit(mir::ops::ReluOp& op) override;
void visit(mir::ops::ReshapeOp& op) override;
void visit(mir::ops::ResizeOp& op) override;
- void visit(mir::ops::ScaleOp& op) override;
void visit(mir::ops::SigmoidOp& op) override;
void visit(mir::ops::SliceOp& op) override;
void visit(mir::ops::SoftmaxOp& op) override;
#include "mir/Operation.h"
#include "mir/ops/BatchNormOp.h"
-#include "mir/ops/BiasAddOp.h"
#include "mir/ops/CappedReluOp.h"
#include "mir/ops/ConcatOp.h"
#include "mir/ops/ConstantOp.h"
#include "mir/ops/ReluOp.h"
#include "mir/ops/ReshapeOp.h"
#include "mir/ops/ResizeOp.h"
-#include "mir/ops/ScaleOp.h"
#include "mir/ops/SigmoidOp.h"
#include "mir/ops/SoftmaxOp.h"
#include "mir/ops/SqrtOp.h"
genActivation(op, "LU_BOUNDED_RELU", op.getCap());
}
-void AclCppOpGenerator::visit(ops::BiasAddOp& op) {
- assert(op.getNumInputs() == 2);
- const auto* ir_input = op.getInput(0)->getProducer();
- const auto* ir_weights = op.getInput(1)->getProducer();
- const auto* ir_output = op.getOutput(0);
-
- auto ir_weights_op = dynamic_cast<const ops::ConstantOp*>(ir_weights->getNode());
- if (ir_weights_op == nullptr)
- throw AclCppException("Unsupported operation type");
-
- const auto& ir_weights_tensor = ir_weights_op->getValue();
- assert(ir_weights_tensor.getShape().rank() == 1);
-
- // Get the input node tensor id in the DOM.
- shared_ptr<ArtifactId> input = AF::id(tensorName(ir_input));
-
- const string output_tensor_name = tensorName(ir_output);
-
- shared_ptr<ArtifactId> transposed_input;
- Shape transposed_output_shape;
- shared_ptr<ArtifactId> transposed_output;
-
- // Create the output tensor in the DOM and obtain its identifier.
- const Shape& out_shape = ir_output->getShape();
- const string transposed_output_name = output_tensor_name + "_transposed_output";
-
- switch (out_shape.rank()) {
- case 4: {
- // transpose input to NCHW format supported by ACL
- const string transposed_input_name = output_tensor_name + "_transposed_input";
- transposed_output_shape = transposeShape<0, 3, 1, 2>(out_shape);
- transposed_input = genTransposeMIRtoACL(transposed_input_name, ir_input->getShape(), input);
-
- transposed_output =
- genTensor(transposed_output_name, transposed_output_shape);
- break;
- }
- case 2:
- case 1:
- transposed_output_shape = out_shape;
- transposed_input = input;
- transposed_output = genTensor(tensorName(ir_output), out_shape);
- break;
- default:
- throw AclCppException("Unsupported number of dimensions: " + to_string(out_shape.rank()));
- }
-
- // Prefix used for the name of variables related to the operation implementation.
- string layer_name = transposed_output->name() + "_bias_add_layer";
-
- // Reshape the IR biases tensor and generate the corresponding DOM tensor.
- const auto& ir_input_shape = ir_input->getShape();
- Shape ir_biases_shape(ir_input_shape.rank());
-
- // ACL CLArithmeticAddition supports input tensors broadcasting.
- for (int i = 0; i < ir_input_shape.rank(); ++i)
- ir_biases_shape.dim(i) = 1;
-
- ir_biases_shape.dim(1) = ir_weights_tensor.getShape().dim(0);
- auto biases = genTensor(layer_name + "_biases", ir_biases_shape);
-
- // Instantiate the CLArithmeticAddition object.
- auto layer = genLayer("arm_compute::CLArithmeticAddition", layer_name,
- {AF::ref(transposed_input), AF::ref(biases), AF::ref(transposed_output),
- AF::lit("arm_compute::ConvertPolicy::WRAP")});
-
- addToPersistentTensors(biases);
- // Save the IR biases tensor to later read this in the artifact.
- serializeTensor(biases, ir_weights_tensor);
- genTensorAllocation(_infBlock, transposed_output);
- genLayerExecution(layer);
-
- if (out_shape.rank() == 4) {
- // Generate output in NHWC format
- shared_ptr<ArtifactId> output =
- genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
-
- genTensorDeallocation(_infBlock, transposed_input);
- genTensorDeallocation(_infBlock, transposed_output);
- }
-}
-
void AclCppOpGenerator::visit(ops::InputOp& op) {
shared_ptr<ArtifactId> tensor;
tensor = genTensor(op.getOutput(0));
genLayerExecution(layer);
}
-void AclCppOpGenerator::visit(ops::ScaleOp& op) {
- // May be not a perfect implementation, using the CLPixelWiseMultiplication ACL function taking
- // two input tensors with the same shapes.
- assert(op.getNumInputs() == 2);
- const auto* ir_input = op.getInput(0)->getProducer();
- const auto* ir_weights = op.getInput(1)->getProducer();
- const auto* ir_output = op.getOutput(0);
-
- auto ir_weights_op = dynamic_cast<const ops::ConstantOp*>(ir_weights->getNode());
- if (ir_weights_op == nullptr)
- throw AclCppException("Unsupported operation type");
-
- const auto& ir_weights_tensor = ir_weights_op->getValue();
- assert(ir_weights_tensor.getShape().rank() == 1);
-
- // Get input tensor identifier in the generated artifact.
- auto input = AF::id(tensorName(ir_input));
-
- const string output_tensor_name = tensorName(ir_output);
-
- // transpose input to NCHW format supported by ACL
- const string transposed_input_name = output_tensor_name + "_transposed_input";
- shared_ptr<ArtifactId> transposed_input =
- genTransposeMIRtoACL(transposed_input_name, ir_input->getShape(), input);
-
- // Create the output tensor in the DOM and obtain its identifier.
- const Shape& out_shape = ir_output->getShape();
- Shape transposed_output_shape;
- switch (out_shape.rank()) {
- case 4:
- transposed_output_shape = transposeShape<0, 3, 1, 2>(out_shape);
- break;
- case 2:
- case 1:
- transposed_output_shape = out_shape;
- break;
- default:
- throw AclCppException("Unsupported number of dimensions: " + to_string(out_shape.rank()));
- }
-
- const string transposed_output_name = output_tensor_name + "_transposed_output";
- shared_ptr<ArtifactId> transposed_output =
- genTensor(transposed_output_name, transposed_output_shape);
-
- auto operation_name = transposed_output->name() + "_scale_layer";
-
- // Reshape the IR scales tensor and generate the corresponding DOM tensor.
- const Shape ir_input_shape = transposeShape<0, 3, 1, 2>(ir_input->getShape());
- Shape ir_scales_shape(ir_input_shape.rank());
-
- // ACL CLArithmeticDivision supports input tensors broadcasting.
- for (int i = 0; i < ir_input_shape.rank(); ++i)
- ir_scales_shape.dim(i) = 1;
-
- ir_scales_shape.dim(1) = ir_weights_tensor.getShape().dim(0);
- auto scales = genTensor(operation_name + "_scales", ir_scales_shape);
-
- // We do not use the genMultiplication() function here because the input needs broadcasting.
-
- // Create a unit tensor in the DOM.
- auto unit = genTensor(operation_name + "_unit", ir_input_shape);
-
- // Create a tmp tensor in the DOM to store the result of 1 / scale.
-
- auto tmp = genTensor(operation_name + "_tmp", ir_input_shape);
-
- // Create an instance of the CLArithmeticDivision class as a member of the artifact class.
- auto layer1 = genLayer("arm_compute::CLArithmeticDivision",
- operation_name + "_arithmetic_div_layer_1",
- {AF::ref(unit), AF::ref(scales), AF::ref(tmp)});
- genLayerExecution(layer1);
-
- // Create an instance of the CLArithmeticDivision class as a member of the artifact class.
- auto layer2 = genLayer("arm_compute::CLArithmeticDivision",
- operation_name + "_arithmetic_div_layer_2",
- {AF::ref(transposed_input), AF::ref(tmp), AF::ref(transposed_output)});
-
- addToPersistentTensors(scales);
- // Save the IR scales tensor to later read this in the artifact.
- serializeTensor(scales, ir_weights_tensor);
- addToPersistentTensors(unit);
- // Fill the unit tensor with the 1 value.
- fillTensor(unit, "1");
- addToPersistentTensors(tmp);
- genTensorAllocation(_infBlock, transposed_output);
- genLayerExecution(layer2);
-
- // Generate output in NHWC format
- shared_ptr<ArtifactId> output =
- genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
-
- genTensorDeallocation(_infBlock, transposed_input);
- genTensorDeallocation(_infBlock, transposed_output);
-}
-
void AclCppOpGenerator::visit(mir::ops::SliceOp& /*op*/) {
throw AclCppException("Unimplemented operation: SliceOp");
}
* @param op
*/
void visit(mir::ops::BatchNormOp& op) override;
- void visit(mir::ops::BiasAddOp& op) override;
void visit(mir::ops::CappedReluOp& op) override;
void visit(mir::ops::ConcatOp& op) override;
void visit(mir::ops::ConstantOp& op) override;
void visit(mir::ops::ReluOp& op) override;
void visit(mir::ops::ReshapeOp& op) override;
void visit(mir::ops::ResizeOp& op) override;
- void visit(mir::ops::ScaleOp& op) override;
void visit(mir::ops::SigmoidOp& op) override;
void visit(mir::ops::SliceOp& op) override;
void visit(mir::ops::SoftmaxOp& op) override;
#include "passes/interpreter/Interpreter.h"
#include "mir/ops/BatchNormOp.h"
-#include "mir/ops/BiasAddOp.h"
#include "mir/ops/CappedReluOp.h"
#include "mir/ops/ConcatOp.h"
#include "mir/ops/ConstantOp.h"
#include "mir/ops/ReduceOp.h"
#include "mir/ops/ReluOp.h"
#include "mir/ops/ResizeOp.h"
-#include "mir/ops/ScaleOp.h"
#include "mir/ops/SigmoidOp.h"
#include "mir/ops/SliceOp.h"
#include "mir/ops/SoftmaxOp.h"
#include "mir/ops/TransposeOp.h"
#include "ops/BatchNorm.h"
-#include "ops/Bias.h"
#include "ops/Concat.h"
#include "ops/Conv2D.h"
#include "ops/DeConv2D.h"
#include "ops/Pool.h"
#include "ops/Reduce.h"
#include "ops/Reshape.h"
-#include "ops/Scale.h"
#include "ops/Softmax.h"
#include "ops/Transpose.h"
#include "ops/common.h"
#include <cmath>
#include <cassert>
-#include <cfenv>
#include <iostream>
#include <vector>
setOutputTensors(op, std::move(outputs));
}
-void NNInterpreter::visit(ops::BiasAddOp& op) {
- auto inputs = getInputTensors(op);
- auto outputs = BiasAdd(inputs[0], inputs[1])();
- setOutputTensors(op, std::move(outputs));
-}
-
void NNInterpreter::visit(ops::BatchNormOp& op) {
auto inputs = getInputTensors(op);
auto outputs = BatchNorm<float>(inputs[0], op)();
setOutputTensors(op, std::move(outputs));
}
-void NNInterpreter::visit(ops::ScaleOp& op) {
- auto inputs = getInputTensors(op);
- auto outputs = Scale(inputs[0], inputs[1])();
- setOutputTensors(op, std::move(outputs));
-}
-
void NNInterpreter::visit(ops::SliceOp& op) {
auto inputs = getInputTensors(op);
auto input = Tensor<float>(inputs[0]);
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Bias.h"
-#include "Fill.h"
-
-namespace nnc {
-
-BiasAdd::BiasAdd(const mir::TensorVariant& in1, const mir::TensorVariant& in2)
- : _input1(in1), _input2(in2) {
- assert(_input2.getShape().rank() == 1);
- assert(_input1.getShape().dim(-1) == _input2.getShape().dim(0));
-}
-
-std::vector<mir::TensorVariant> BiasAdd::operator()() {
- return Fill<float>(_input1.getShape(), [this](const mir::Index& idx) {
- return _input1.at(idx) + _input2.atOffset(idx.at(idx.rank() - 1));
- })();
-}
-
-} // namespace nnc
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _NNC_BACKEND_INTERPRETER_BIAS_
-#define _NNC_BACKEND_INTERPRETER_BIAS_
-
-#include "OperationImpl.h"
-#include "mir/Tensor.h"
-
-namespace nnc {
-
-class BiasAdd : public OperationImpl<float> {
-public:
- BiasAdd(const mir::TensorVariant& in1, const mir::TensorVariant& in2);
-
- std::vector<mir::TensorVariant> operator()() override;
-
-private:
- const mir::Tensor<float> _input1;
- const mir::Tensor<float> _input2;
-};
-
-} // namespace nnc
-
-#endif //_NNC_BACKEND_INTERPRETER_BIAS_
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Scale.h"
-#include "Fill.h"
-
-namespace nnc {
-
-Scale::Scale(const mir::TensorVariant& in1, const mir::TensorVariant& in2)
- : _input1(in1), _input2(in2) {
- assert(_input2.getShape().rank() == 1);
- assert(_input1.getShape().dim(-1) == _input2.getShape().dim(0));
-}
-
-std::vector<mir::TensorVariant> Scale::operator()() {
- return Fill<float>(_input1.getShape(), [this](const mir::Index& idx) {
- return _input1.at(idx) * _input2.atOffset(idx.at(idx.rank() - 1));
- })();
-}
-
-} // namespace nnc
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _NNC_CORE_BACKEND_INTERPRETER_SCALE_IMPL_
-#define _NNC_CORE_BACKEND_INTERPRETER_SCALE_IMPL_
-
-#include "OperationImpl.h"
-#include "mir/Tensor.h"
-
-namespace nnc {
-
-class Scale : public OperationImpl<float> {
-public:
- Scale(const mir::TensorVariant& in1, const mir::TensorVariant& in2);
-
- std::vector<mir::TensorVariant> operator()() override;
-
-private:
- const mir::Tensor<float> _input1;
- const mir::Tensor<float> _input2;
-};
-
-} // namespace nnc
-
-#endif // _NNC_CORE_BACKEND_INTERPRETER_SCALE_IMPL_
#include "passes/optimizations/FuseArithmeticOps.h"
#include "passes/optimizations/OptimizationUtils.h"
-#include "mir/ops/BiasAddOp.h"
+#include "mir/ops/AddOp.h"
#include "mir/ops/ConstantOp.h"
#include "mir/ops/Conv2DOp.h"
-#include "mir/ops/ScaleOp.h"
+#include "mir/ops/MulOp.h"
#include "mir/Graph.h"
#include "mir/Tensor.h"
#include "mir/Index.h"
#include "mir/ShapeRange.h"
#include <algorithm>
-#include <functional>
namespace nnc {
using Edge = pair<Operation*, Operation*>;
/**
- * This function used to get 'ConstantOp' with weights of 'BiasAddOp', 'ScaleOp' or 'Conv2DOp'
+ * This function used to get 'ConstantOp' with weights of 'AddOp', 'MulOp' or 'Conv2DOp'
* For each of these ops weights stored in second input node
*/
ops::ConstantOp* getSecondInputAsConst(const Operation* op) {
- assert(op->getType() == OpType::biasAdd ||
- op->getType() == OpType::scale ||
+ assert(op->getType() == OpType::add ||
+ op->getType() == OpType::mul ||
op->getType() == OpType::conv2D);
return dynamic_cast<ops::ConstantOp*>(op->getInput(1)->getProducer()->getNode());
}
*/
float operand2 = const2_accessor.at(Index{idx.at(0)});
switch (merge_type) {
- case OpType::scale:
+ case OpType::mul:
new_const_accessor.at(idx) = operand1 * operand2;
break;
- case OpType::biasAdd:
+ case OpType::add:
new_const_accessor.at(idx) = operand1 + operand2;
break;
default:
- assert(false && "only 'scale' and 'biasAdd' constants merge types supported");
+ assert(false && "only 'mul' and 'add' constants merge types supported");
}
}
return g->create<ops::ConstantOp>(const1_op->getName(), new_const_val);
}
-// TODO: support 'DepthwiseConv'->'Scale' and 'DepthwiseConv'->'ElementwiseMul'
-// TODO: support 'ElementwiseAdd' and 'ElementwiseMul'
+// TODO: support 'DepthwiseConv'->'Mul'
/**
* This function fuses some successive operations with constant weights into one:
- * 'Bias'->'Bias' into 'Bias'; 'Scale'->'Scale' into 'Scale'; 'Conv'->'Scale' into 'Conv';
+ * 'Add'->'Add' into 'Add'; 'Mul'->'Mul' into 'Mul'; 'Conv'->'Mul' into 'Conv';
* Before: | After:
- * -------------------------|--------------------------
- * [input] [Const1] | [input] [Const1*Const2]
- * \\ // | \\ //
- * [Scale] [Const2] | [Scale]
- * \\ // |
- * [Scale] |
- * -------------------------|--------------------------
- * [input] [Const1] | [input] [Const1+Const2]
- * \\ // | \\ //
- * [BiasAdd] [Const2] | [BiasAdd]
- * \\ // |
- * [BiasAdd] |
- * -------------------------|--------------------------
- * [input] [Const1] | [input] [Const1*Const2]
- * \\ // | \\ //
- * [Conv2D] [Const2] | [Conv2D]
- * \\ // |
- * [Scale] |
+ * -------------------------|---------------------------
+ * [input] [Const1] | [input] [Const1*Const2]
+ * \\ // | \\ //
+ * [Mul] [Const2] | [Mul]
+ * \\ // |
+ * [Mul] |
+ * -------------------------|---------------------------
+ * [input] [Const1] | [input] [Const1+Const2]
+ * \\ // | \\ //
+ * [Add] [Const2] | [Add]
+ * \\ // |
+ * [Add] |
+ * -------------------------|---------------------------
+ * [input] [Const1] | [input] [Const1*Const2]
+ * \\ // | \\ //
+ * [Conv2D] [Const2] | [Conv2D]
+ * \\ // |
+ * [Mul] |
*/
bool fuseSuccessiveOps(Graph* g) {
// Find all successive ops
vector<Edge> successive_ops;
- auto scale_scale_vec = findSuccessiveOpsWithConstWeights(g, OpType::scale, OpType::scale);
- successive_ops.insert(successive_ops.end(), scale_scale_vec.begin(), scale_scale_vec.end());
- auto bias_bias_vec = findSuccessiveOpsWithConstWeights(g, OpType::biasAdd, OpType::biasAdd);
- successive_ops.insert(successive_ops.end(), bias_bias_vec.begin(), bias_bias_vec.end());
- auto conv_scale_vec = findSuccessiveOpsWithConstWeights(g, OpType::conv2D, OpType::scale);
- successive_ops.insert(successive_ops.end(), conv_scale_vec.begin(), conv_scale_vec.end());
+ auto mul_mul_vec = findSuccessiveOpsWithConstWeights(g, OpType::mul, OpType::mul);
+ successive_ops.insert(successive_ops.end(), mul_mul_vec.begin(), mul_mul_vec.end());
+ auto add_add_vec = findSuccessiveOpsWithConstWeights(g, OpType::add, OpType::add);
+ successive_ops.insert(successive_ops.end(), add_add_vec.begin(), add_add_vec.end());
+ auto conv_mul_vec = findSuccessiveOpsWithConstWeights(g, OpType::conv2D, OpType::mul);
+ successive_ops.insert(successive_ops.end(), conv_mul_vec.begin(), conv_mul_vec.end());
for (auto& edge : successive_ops) {
auto const1_op = getSecondInputAsConst(edge.first);
}
/**
- * This function sinks 'BiasAdd' through 'Scale'
- * by multiplying 'BiasAdd' weights on 'Scale' weights
+ * This function sinks 'Add' through 'Mul'
+ * by multiplying 'Add' weights on 'Mul' weights
* Before: | After:
*--------------------------|--------------------------
- * [input] [Const1] | [input] [Const2]
- * \\ // | \\ //
- * [BiasAdd] [Const2] | [Scale] [Const1*Const2]
- * \\ // | \\ //
- * [Scale] | [BiasAdd]
+ * [input] [Const1] | [input] [Const2]
+ * \\ // | \\ //
+ * [Add] [Const2] | [Mul] [Const1*Const2]
+ * \\ // | \\ //
+ * [Mul] | [Add]
* |
*/
-bool sinkBiasThroughScale(Graph* g) {
- auto bias_scale_edges = findSuccessiveOpsWithConstWeights(g, OpType::biasAdd, OpType::scale);
+bool sinkAddThroughMul(Graph* g) {
+ auto add_mul_edges = findSuccessiveOpsWithConstWeights(g, OpType::add, OpType::mul);
- for (auto& edge : bias_scale_edges) {
- auto old_bias_op = edge.first;
- auto old_scale_op = edge.second;
- auto old_bias_const_op = getSecondInputAsConst(old_bias_op);
- auto old_scale_const_op = getSecondInputAsConst(old_scale_op);
- assert(old_bias_const_op && old_scale_const_op);
+ for (auto& edge : add_mul_edges) {
+ auto old_add_op = edge.first;
+ auto old_mul_op = edge.second;
+ auto old_add_const_op = getSecondInputAsConst(old_add_op);
+ auto ols_mul_const_op = getSecondInputAsConst(old_mul_op);
+ assert(old_add_const_op && ols_mul_const_op);
// Create new operations
- auto old_bias_input = old_bias_op->getInput(0)->getProducer();
- auto new_scale_op = g->copyOpWithInputs(old_scale_op, {old_bias_input,
- old_scale_const_op->getOutput(0)});
- auto new_bias_const_op = mergeConstantOps(g, old_bias_const_op, old_scale_const_op,
- OpType::scale);
- auto new_bias_op = g->copyOpWithInputs(old_bias_op, {new_scale_op->getOutput(0),
- new_bias_const_op->getOutput(0)});
-
- // Replace old scale with new bias and remove old nodes
- g->replaceNode(old_scale_op, new_bias_op);
- removeNodeIfUnused(g, old_bias_op);
- removeNodeIfUnused(g, old_bias_const_op);
+ auto old_add_input = old_add_op->getInput(0)->getProducer();
+ auto new_mul_op = g->copyOpWithInputs(old_mul_op, {old_add_input, ols_mul_const_op->getOutput(0)});
+ auto new_add_const_op = mergeConstantOps(g, old_add_const_op, ols_mul_const_op,
+ OpType::mul);
+ auto new_add_op = g->copyOpWithInputs(
+ old_add_op, {new_mul_op->getOutput(0), new_add_const_op->getOutput(0)});
+
+ // Replace old mul with new add and remove old nodes
+ g->replaceNode(old_mul_op, new_add_op);
+ removeNodeIfUnused(g, old_add_op);
+ removeNodeIfUnused(g, old_add_const_op);
}
- // If there is no bias-scale edges - graph wasn't changed
- return !bias_scale_edges.empty();
+ // If there is no add-mul edges - graph wasn't changed
+ return !add_mul_edges.empty();
}
} // unnamed namespace
while (graph_changed) {
graph_changed = false;
graph_changed |= fuseSuccessiveOps(g);
- graph_changed |= sinkBiasThroughScale(g);
+ graph_changed |= sinkAddThroughMul(g);
}
return g;
#include "CommonData.generated.h"
#include "eigen.generated.h"
#include "cpp_common_funcs.generated.h"
-#include "cpp_add_bias.generated.h"
#include "cpp_capped_relu.generated.h"
#include "cpp_concat.generated.h"
#include "cpp_conv.generated.h"
#include "cpp_reduce.generated.h"
#include "cpp_resize.generated.h"
#include "cpp_softmax.generated.h"
-#include "cpp_scale.generated.h"
#include "cpp_slice.generated.h"
#include "cpp_dropout.generated.h"
#include "cpp_batchnorm.generated.h"
out.write(CommonData, sizeof(CommonData));
out.write(cpp_common_funcs, sizeof(cpp_common_funcs));
- out.write(cpp_add_bias, sizeof(cpp_add_bias));
out.write(cpp_capped_relu, sizeof(cpp_capped_relu));
out.write(cpp_concat, sizeof(cpp_concat));
out.write(cpp_conv, sizeof(cpp_conv));
// Operations calls into all of the above
out.write(cpp_operations, sizeof(cpp_operations));
// Below call into operations
- out.write(cpp_scale, sizeof(cpp_scale));
out.write(cpp_dropout, sizeof(cpp_dropout));
out.write(cpp_batchnorm, sizeof(cpp_batchnorm));
out.write(cpp_leaky_relu, sizeof(cpp_leaky_relu));
#include "ModelAnalyzer.h"
#include "mir/Shape.h"
-#include "mir/ShapeRange.h"
#include "mir/Graph.h"
#include "mir/ops/BatchNormOp.h"
-#include "mir/ops/BiasAddOp.h"
#include "mir/ops/CappedReluOp.h"
#include "mir/ops/ConcatOp.h"
#include "mir/ops/ConstantOp.h"
#include "mir/ops/ReluOp.h"
#include "mir/ops/ReshapeOp.h"
#include "mir/ops/ResizeOp.h"
-#include "mir/ops/ScaleOp.h"
#include "mir/ops/SigmoidOp.h"
#include "mir/ops/SliceOp.h"
#include "mir/ops/SoftmaxOp.h"
appendOperationToInference(&op, "cappedRelu");
}
-void ModelAnalyzer::visit(ops::BiasAddOp& op) {
- appendOperationToInference(&op, "biasAdd");
-}
-
void ModelAnalyzer::visit(ops::InputOp& op) {
assert(op.getNumInputs() == 0);
appendOperationToInference(&op, "in");
appendOperationToInference(&op, "dropout");
}
-void ModelAnalyzer::visit(ops::ScaleOp& op) {
- appendOperationToInference(&op, "scale");
-}
-
void ModelAnalyzer::visit(mir::ops::SliceOp& op) {
appendOperationToInference(&op, "slice");
}
void analyze(const mir::Graph* g);
void visit(mir::ops::BatchNormOp& op) override;
- void visit(mir::ops::BiasAddOp& op) override;
void visit(mir::ops::CappedReluOp& op) override;
void visit(mir::ops::ConcatOp& op) override;
void visit(mir::ops::ConstantOp& op) override;
void visit(mir::ops::ReluOp& op) override;
void visit(mir::ops::ReshapeOp& op) override;
void visit(mir::ops::ResizeOp& op) override;
- void visit(mir::ops::ScaleOp& op) override;
void visit(mir::ops::SigmoidOp& op) override;
void visit(mir::ops::SliceOp& op) override;
void visit(mir::ops::SoftmaxOp& op) override;
#include "CommonData.def"
#include "mir/ops/BatchNormOp.h"
-#include "mir/ops/BiasAddOp.h"
#include "mir/ops/CappedReluOp.h"
#include "mir/ops/ConcatOp.h"
#include "mir/ops/ConstantOp.h"
#include "mir/ops/ReluOp.h"
#include "mir/ops/ReshapeOp.h"
#include "mir/ops/ResizeOp.h"
-#include "mir/ops/ScaleOp.h"
#include "mir/ops/SliceOp.h"
#include "mir/ops/SoftmaxOp.h"
#include "mir/ops/SqueezeOp.h"
serializeT<float>(op.getCap());
}
-void Serializer::visit(ops::BiasAddOp& /*op*/) {
- _curOp->paramStartOffset = _buffer.size();
- // no parameters to dump
-}
-
void Serializer::visit(ops::InputOp& /*op*/) {
// no parameters to dump
}
serializeT<int32_t>(op.getSpatial());
}
-void Serializer::visit(ops::ScaleOp& /*op*/) {
- _curOp->paramStartOffset = _buffer.size();
- // no parameters to dump
-}
-
void Serializer::visit(mir::ops::SliceOp& op) {
_curOp->paramStartOffset = _buffer.size();
serializeShape(op.getStarts());
public:
void visit(mir::ops::BatchNormOp& op) override;
- void visit(mir::ops::BiasAddOp& op) override;
void visit(mir::ops::CappedReluOp& op) override;
void visit(mir::ops::ConcatOp& op) override;
void visit(mir::ops::ConstantOp& op) override;
void visit(mir::ops::ReluOp& op) override;
void visit(mir::ops::ReshapeOp& op) override;
void visit(mir::ops::ResizeOp& op) override;
- void visit(mir::ops::ScaleOp& op) override;
void visit(mir::ops::SigmoidOp& op) override;
void visit(mir::ops::SliceOp& op) override;
void visit(mir::ops::SoftmaxOp& op) override;
+++ /dev/null
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-inline void AddBiasAndEvalActivationFunction(const float* bias_data,
- const RuntimeShape& bias_shape,
- float* array_data,
- const RuntimeShape& array_shape) {
-#ifdef USE_NEON
- const int bias_size = bias_shape.FlatSize();
- const int array_size = array_shape.FlatSize();
- TFLITE_DCHECK_EQ((array_size % bias_size), 0);
- float* array_ptr = array_data;
- float* array_end_ptr = array_ptr + array_size;
- for (; array_ptr != array_end_ptr; array_ptr += bias_size) {
- int i = 0;
- for (; i <= bias_size - 16; i += 16) {
- auto b0 = vld1q_f32(bias_data + i);
- auto b1 = vld1q_f32(bias_data + i + 4);
- auto b2 = vld1q_f32(bias_data + i + 8);
- auto b3 = vld1q_f32(bias_data + i + 12);
- auto a0 = vld1q_f32(array_ptr + i);
- auto a1 = vld1q_f32(array_ptr + i + 4);
- auto a2 = vld1q_f32(array_ptr + i + 8);
- auto a3 = vld1q_f32(array_ptr + i + 12);
- auto x0 = vaddq_f32(a0, b0);
- auto x1 = vaddq_f32(a1, b1);
- auto x2 = vaddq_f32(a2, b2);
- auto x3 = vaddq_f32(a3, b3);
- vst1q_f32(array_ptr + i, x0);
- vst1q_f32(array_ptr + i + 4, x1);
- vst1q_f32(array_ptr + i + 8, x2);
- vst1q_f32(array_ptr + i + 12, x3);
- }
- for (; i <= bias_size - 4; i += 4) {
- auto b = vld1q_f32(bias_data + i);
- auto a = vld1q_f32(array_ptr + i);
- auto x = vaddq_f32(a, b);
- vst1q_f32(array_ptr + i, x);
- }
- for (; i < bias_size; i++) {
- array_ptr[i] = array_ptr[i] + bias_data[i];
- }
- }
-#else // not NEON
-
- assert(bias_shape.DimensionsCount() == 1 &&
- bias_shape.Dims(0) == array_shape.Dims(array_shape.DimensionsCount() - 1));
- const auto bias_vec = MapAsVector(bias_data, bias_shape);
- auto out_mat = MapAsMatrixWithLastDimAsRows(array_data, array_shape);
- out_mat.colwise() += bias_vec;
-#endif
-}
CappedRelu(input, input_d, cap, out.getData(), input_d);
}
-void biasAdd(Tensor& out, const char* params, const Tensor& in1, const Tensor& in2)
-{
- out.reshape(in1.getShape());
- out.fillData(in1.getData(), in1.getShape().getNumElems());
-
- AddBiasAndEvalActivationFunction(in2.getData(), shapeToRuntimeShape(in2.getShape()),
- out.getData(), shapeToRuntimeShape(out.getShape()));
-}
-
void slice(Tensor& out, const char* params, const Tensor& in) {
Shape starts = deserializeShape(params);
Shape sizes = deserializeShape(params);
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-void scale(Tensor& out, const char* params, const Tensor& in1, const Tensor& in2) {
- const auto in1_shape = shapeToRuntimeShape(in1.getShape());
- const auto in2_shape = shapeToRuntimeShape(in2.getShape());
- assert(in2_shape.DimensionsCount() == 1 &&
- in2_shape.Dims(0) == in1_shape.Dims(in1_shape.DimensionsCount() - 1));
-
- out.reshape(in1.getShape());
-
-#ifdef USE_NEON
- const int scale_size = in2_shape.FlatSize();
- const int array_size = in1_shape.FlatSize();
- TFLITE_DCHECK_EQ((array_size % scale_size), 0);
- out.fillData(in1.getData(), array_size);
- float* array_ptr = out.getData();
- const float* scale_ptr = in2.getData();
- float* array_end_ptr = array_ptr + array_size;
- for (; array_ptr != array_end_ptr; array_ptr += scale_size) {
- int i = 0;
- for (; i <= scale_size - 16; i += 16) {
- auto b0 = vld1q_f32(scale_ptr + i);
- auto b1 = vld1q_f32(scale_ptr + i + 4);
- auto b2 = vld1q_f32(scale_ptr + i + 8);
- auto b3 = vld1q_f32(scale_ptr + i + 12);
- auto a0 = vld1q_f32(array_ptr + i);
- auto a1 = vld1q_f32(array_ptr + i + 4);
- auto a2 = vld1q_f32(array_ptr + i + 8);
- auto a3 = vld1q_f32(array_ptr + i + 12);
- auto x0 = vmulq_f32(a0, b0);
- auto x1 = vmulq_f32(a1, b1);
- auto x2 = vmulq_f32(a2, b2);
- auto x3 = vmulq_f32(a3, b3);
- vst1q_f32(array_ptr + i, x0);
- vst1q_f32(array_ptr + i + 4, x1);
- vst1q_f32(array_ptr + i + 8, x2);
- vst1q_f32(array_ptr + i + 12, x3);
- }
- for (; i <= scale_size - 4; i += 4) {
- auto b = vld1q_f32(scale_ptr + i);
- auto a = vld1q_f32(array_ptr + i);
- auto x = vmulq_f32(a, b);
- vst1q_f32(array_ptr + i, x);
- }
- for (; i < scale_size; i++) {
- array_ptr[i] = array_ptr[i] * scale_ptr[i];
- }
- }
-#else // not NEON
-
- const auto out_shape = shapeToRuntimeShape(out.getShape());
-
- const auto in1_mat = MapAsMatrixWithLastDimAsRows(in1.getData(), in1_shape);
- const auto in2_vec = MapAsVector(in2.getData(), in2_shape);
- auto out_mat = MapAsMatrixWithLastDimAsRows(out.getData(), out_shape);
- out_mat.colwise() = in2_vec;
- out_mat.array() = out_mat.array() * in1_mat.array();
-#endif
-}
// MIR
#include "mir/Graph.h"
-#include "mir/ops/BiasAddOp.h"
-#include "mir/ops/CappedReluOp.h"
#include "mir/ops/CappedReluOp.h"
#include "mir/ops/ConcatOp.h"
#include "mir/ops/ConstantOp.h"
#include "mir/ops/ReduceOp.h"
#include "mir/ops/ReluOp.h"
#include "mir/ops/ReshapeOp.h"
-#include "mir/ops/ScaleOp.h"
#include "mir/ops/SigmoidOp.h"
#include "mir/ops/SoftmaxOp.h"
#include "mir/ops/TanhOp.h"
checkDomStructure(m, {}, {});
}
-TEST(acl_backend_mir_to_dom, bias) {
- const int32_t channels = 2;
- TensorVariant w = createTensorVariant({channels});
-
- Graph g;
- OpConstructor op_generator = [&w](Graph& g, const vector<Operation::Output*>& inputs) {
- auto bias = g.create<mir::ops::ConstantOp>("", w)->getOutput(0);
- return g.create<mir::ops::BiasAddOp>("bias", inputs[0], bias);
- };
- vector<Shape> input_shapes{{1, 10, 10, channels}};
-
- fillGraph(g, op_generator, input_shapes);
-
- stringstream params_out;
- AclCppOpGenerator dom_gen(artifactName, params_out);
-
- const ArtifactModule& m = dom_gen.generate(&g);
-
- checkDomStructure(m, {}, {});
-}
-
-TEST(acl_backend_mir_to_dom, scale) {
- const int32_t channels = 2;
- TensorVariant w = createTensorVariant({channels});
-
- Graph g;
- OpConstructor op_generator = [&w](Graph& g, const vector<Operation::Output*>& inputs) {
- auto scale = g.create<mir::ops::ConstantOp>("", w)->getOutput(0);
- return g.create<mir::ops::ScaleOp>("scale", inputs[0], scale);
- };
- vector<Shape> input_shapes{{1, 10, 10, channels}};
-
- fillGraph(g, op_generator, input_shapes);
-
- stringstream params_out;
- AclCppOpGenerator dom_gen(artifactName, params_out);
-
- const ArtifactModule& m = dom_gen.generate(&g);
-
- checkDomStructure(m, {}, {});
-
- stringstream code_out;
- ArtifactGeneratorCppCode code_gen(code_out);
-}
-
TEST(acl_backend_mir_to_dom, concat) {
Graph g;
OpConstructor op_generator = [](Graph& g, const vector<Operation::Output*>& inputs) {
#include "passes/optimizations/FuseArithmeticOps.h"
#include "Util.h"
#include "mir/Graph.h"
-#include "mir/ops/BiasAddOp.h"
+#include "mir/ops/AddOp.h"
#include "mir/ops/ConstantOp.h"
#include "mir/ops/Conv2DOp.h"
-#include "mir/ops/ScaleOp.h"
+#include "mir/ops/MulOp.h"
#include <gtest/gtest.h>
#include <sstream>
auto conv = g.create<ops::Conv2DOp>("conv", input->getOutput(0), conv_const->getOutput(0),
Shape{1, 1}, padding, padding);
auto bias1_const = g.create<ops::ConstantOp>("", TensorVariant(DataType::FLOAT32, {10}));
- auto bias1 = g.create<ops::BiasAddOp>("bias1", conv->getOutput(0), bias1_const->getOutput(0));
+ auto bias1 = g.create<ops::AddOp>("bias1", conv->getOutput(0), bias1_const->getOutput(0));
auto scale1_const = g.create<ops::ConstantOp>("", TensorVariant(DataType::FLOAT32, {10}));
- auto scale1 = g.create<ops::ScaleOp>("scale1", bias1->getOutput(0), scale1_const->getOutput(0));
+ auto scale1 = g.create<ops::MulOp>("scale1", bias1->getOutput(0), scale1_const->getOutput(0));
auto scale2_const = g.create<ops::ConstantOp>("", TensorVariant(DataType::FLOAT32, {10}));
- auto scale2 = g.create<ops::ScaleOp>("scale2", scale1->getOutput(0), scale2_const->getOutput(0));
+ auto scale2 = g.create<ops::MulOp>("scale2", scale1->getOutput(0), scale2_const->getOutput(0));
auto scale3_const = g.create<ops::ConstantOp>("", TensorVariant(DataType::FLOAT32, {10}));
- auto scale3 = g.create<ops::ScaleOp>("scale3", scale2->getOutput(0), scale3_const->getOutput(0));
+ auto scale3 = g.create<ops::MulOp>("scale3", scale2->getOutput(0), scale3_const->getOutput(0));
auto bias2_const = g.create<ops::ConstantOp>("", TensorVariant(DataType::FLOAT32, {10}));
- g.create<ops::BiasAddOp>("", scale3->getOutput(0), bias2_const->getOutput(0));
+ g.create<ops::AddOp>("", scale3->getOutput(0), bias2_const->getOutput(0));
// Check that layout is desired
std::stringstream ss;
#include "mir/ops/ConcatOp.h"
#include "mir/ops/OutputOp.h"
#include "mir/ops/PoolOp.h"
-#include "mir/ops/BiasAddOp.h"
-#include "mir/ops/ScaleOp.h"
+#include "mir/ops/AddOp.h"
+#include "mir/ops/MulOp.h"
#include "mir/ops/Conv2DOp.h"
#include "mir/Visitor.h"
void visit(mir::ops::TanhOp& op) override { _s << "th_" << op.getName() << "."; }
- void visit(mir::ops::ScaleOp& op) override { _s << "s_" << op.getName() << "."; }
+ void visit(mir::ops::MulOp& op) override { _s << "s_" << op.getName() << "."; }
- void visit(mir::ops::BiasAddOp& op) override { _s << "b_" << op.getName() << "."; }
+ void visit(mir::ops::AddOp& op) override { _s << "b_" << op.getName() << "."; }
void visit(mir::ops::ReluOp& op) override { _s << "r_" << op.getName() << "."; }
#include "code_snippets/cpp_header_types.def"
#include "code_snippets/cpp_common_funcs.def"
-#include "code_snippets/cpp_add_bias.def"
#include "code_snippets/cpp_capped_relu.def"
#include "code_snippets/cpp_concat.def"
#include "code_snippets/cpp_conv.def"
#include "code_snippets/cpp_transpose.def"
#include "code_snippets/cpp_operations.def"
-#include "code_snippets/cpp_scale.def"
#include "code_snippets/cpp_leaky_relu.def"
// soft backend part
#include "SBSerializer.h"
// operations part
-#include "mir/ops/BiasAddOp.h"
#include "mir/ops/CappedReluOp.h"
#include "mir/ops/ConcatOp.h"
#include "mir/ops/Conv2DOp.h"
#include "mir/ops/ReluOp.h"
#include "mir/ops/ReshapeOp.h"
#include "mir/ops/ResizeOp.h"
-#include "mir/ops/ScaleOp.h"
#include "mir/ops/SigmoidOp.h"
#include "mir/ops/SliceOp.h"
#include "mir/ops/SoftmaxOp.h"
}
-TEST(cpp_operations_test, bias) {
- vector<int> input_shape_data{2, 3, 4, 5};
- vector<int> weights_shape_data{5};
- vector<unique_ptr<mir::TensorVariant>> input_ntensors(2);
- Tensor input_atensor0;
- Tensor input_atensor1;
- fillTensors(input_ntensors[0], input_atensor0, input_shape_data, 1.0f);
- fillTensors(input_ntensors[1], input_atensor1, weights_shape_data, 1.0f);
-
- auto op_generator = [](mir::Graph& g, const std::vector<mir::Operation::Output*>& inputs) {
- return g.create<mir::ops::BiasAddOp>("y", inputs[0], inputs[1]);
- };
-
- createAndRunTestGraph(op_generator, biasAdd, input_ntensors, input_atensor0, input_atensor1);
-}
-
-TEST(cpp_operations_test, scale) {
- vector<int> input_shape_data{2, 3, 4, 5};
- vector<int> weights_shape_data{5};
- vector<unique_ptr<mir::TensorVariant>> input_ntensors(2);
- Tensor input_atensor0;
- Tensor input_atensor1;
- fillTensors(input_ntensors[0], input_atensor0, input_shape_data, 1.0f);
- fillTensors(input_ntensors[1], input_atensor1, weights_shape_data, 1.0f);
-
- auto op_generator = [](mir::Graph& g, const std::vector<mir::Operation::Output*>& inputs) {
- return g.create<mir::ops::ScaleOp>("y", inputs[0], inputs[1]);
- };
-
- createAndRunTestGraph(op_generator, scale, input_ntensors, input_atensor0, input_atensor1);
-}
-
TEST(cpp_operations_test, capped_relu) {
// test prerequisites
// cap has this value to cut input numbers(they are in range [-1, 1])