From 5b0745857e79c3f4567a28c0b10d2f71e0e92ca2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=D0=A1=D0=B5=D1=80=D0=B3=D0=B5=D0=B9=20=D0=91=D0=B0=D1=80?= =?utf8?q?=D0=B0=D0=BD=D0=BD=D0=B8=D0=BA=D0=BE=D0=B2/AI=20Tools=20Lab=20/S?= =?utf8?q?RR/Engineer/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Wed, 7 Aug 2019 18:08:48 +0300 Subject: [PATCH] [nnc] Replace BiasAdd and Scale with elementwise equivalents (#6358) `BiasAdd` and `Scale` are restricted versions of equivalent Elementwise ops and are going to be removed. Signed-off-by: Sergei Barannikov --- compiler/nnc/driver/Driver.cpp | 3 + .../nnc/include/passes/interpreter/Interpreter.h | 2 - .../passes/acl_soft_backend/AclCppOpGenerator.cpp | 179 --------------------- .../passes/acl_soft_backend/AclCppOpGenerator.h | 2 - compiler/nnc/passes/interpreter/Interpreter.cpp | 17 -- compiler/nnc/passes/interpreter/ops/Bias.cpp | 34 ---- compiler/nnc/passes/interpreter/ops/Bias.h | 38 ----- compiler/nnc/passes/interpreter/ops/Scale.cpp | 34 ---- compiler/nnc/passes/interpreter/ops/Scale.h | 38 ----- .../nnc/passes/optimizations/FuseArithmeticOps.cpp | 129 ++++++++------- compiler/nnc/passes/soft_backend/CPPGenerator.cpp | 4 - compiler/nnc/passes/soft_backend/ModelAnalyzer.cpp | 11 -- compiler/nnc/passes/soft_backend/ModelAnalyzer.h | 2 - compiler/nnc/passes/soft_backend/SBSerializer.cpp | 12 -- compiler/nnc/passes/soft_backend/SBSerializer.h | 2 - .../soft_backend/code_snippets/cpp_add_bias.def | 64 -------- .../soft_backend/code_snippets/cpp_operations.def | 9 -- .../soft_backend/code_snippets/cpp_scale.def | 73 --------- compiler/nnc/unittests/acl_backend/MIRToDOM.cpp | 48 ------ .../unittests/optimizations/FuseArithmeticOps.cpp | 14 +- compiler/nnc/unittests/optimizations/Util.h | 8 +- .../nnc/unittests/soft_backend/CPPOperations.cpp | 36 ----- 22 files changed, 77 insertions(+), 682 deletions(-) delete mode 100644 compiler/nnc/passes/interpreter/ops/Bias.cpp delete mode 100644 compiler/nnc/passes/interpreter/ops/Bias.h delete mode 100644 compiler/nnc/passes/interpreter/ops/Scale.cpp delete mode 100644 compiler/nnc/passes/interpreter/ops/Scale.h delete mode 100644 compiler/nnc/passes/soft_backend/code_snippets/cpp_add_bias.def delete mode 100644 compiler/nnc/passes/soft_backend/code_snippets/cpp_scale.def diff --git a/compiler/nnc/driver/Driver.cpp b/compiler/nnc/driver/Driver.cpp index e060952..62125cb 100644 --- a/compiler/nnc/driver/Driver.cpp +++ b/compiler/nnc/driver/Driver.cpp @@ -126,7 +126,10 @@ void Driver::registerOptimizationPass() { _passManager.registerPass(std::unique_ptr(new CombineTransposes())); _passManager.registerPass(std::unique_ptr(new SinkTranspose())); _passManager.registerPass(std::unique_ptr(new SinkRelu())); +#if 0 + // TODO Support broadcasting. _passManager.registerPass(std::unique_ptr(new FuseArithmeticOps())); +#endif _passManager.registerPass(std::unique_ptr(new RemoveDeadEnds())); } } // registerOptimizationPass diff --git a/compiler/nnc/include/passes/interpreter/Interpreter.h b/compiler/nnc/include/passes/interpreter/Interpreter.h index c8bed84..f50cbb1 100644 --- a/compiler/nnc/include/passes/interpreter/Interpreter.h +++ b/compiler/nnc/include/passes/interpreter/Interpreter.h @@ -37,7 +37,6 @@ public: mir::TensorVariant getResult(const mir::Operation::Output* tensor); void visit(mir::ops::BatchNormOp& op) override; - void visit(mir::ops::BiasAddOp& op) override; void visit(mir::ops::CappedReluOp& op) override; void visit(mir::ops::ConcatOp& op) override; void visit(mir::ops::ConstantOp& op) override; @@ -59,7 +58,6 @@ public: void visit(mir::ops::ReluOp& op) override; void visit(mir::ops::ReshapeOp& op) override; void visit(mir::ops::ResizeOp& op) override; - void visit(mir::ops::ScaleOp& op) override; void visit(mir::ops::SigmoidOp& op) override; void visit(mir::ops::SliceOp& op) override; void visit(mir::ops::SoftmaxOp& op) override; diff --git a/compiler/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp b/compiler/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp index 7c1e5f9..8900b49 100644 --- a/compiler/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp +++ b/compiler/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp @@ -23,7 +23,6 @@ #include "mir/Operation.h" #include "mir/ops/BatchNormOp.h" -#include "mir/ops/BiasAddOp.h" #include "mir/ops/CappedReluOp.h" #include "mir/ops/ConcatOp.h" #include "mir/ops/ConstantOp.h" @@ -43,7 +42,6 @@ #include "mir/ops/ReluOp.h" #include "mir/ops/ReshapeOp.h" #include "mir/ops/ResizeOp.h" -#include "mir/ops/ScaleOp.h" #include "mir/ops/SigmoidOp.h" #include "mir/ops/SoftmaxOp.h" #include "mir/ops/SqrtOp.h" @@ -371,88 +369,6 @@ void AclCppOpGenerator::visit(ops::CappedReluOp& op) { genActivation(op, "LU_BOUNDED_RELU", op.getCap()); } -void AclCppOpGenerator::visit(ops::BiasAddOp& op) { - assert(op.getNumInputs() == 2); - const auto* ir_input = op.getInput(0)->getProducer(); - const auto* ir_weights = op.getInput(1)->getProducer(); - const auto* ir_output = op.getOutput(0); - - auto ir_weights_op = dynamic_cast(ir_weights->getNode()); - if (ir_weights_op == nullptr) - throw AclCppException("Unsupported operation type"); - - const auto& ir_weights_tensor = ir_weights_op->getValue(); - assert(ir_weights_tensor.getShape().rank() == 1); - - // Get the input node tensor id in the DOM. - shared_ptr input = AF::id(tensorName(ir_input)); - - const string output_tensor_name = tensorName(ir_output); - - shared_ptr transposed_input; - Shape transposed_output_shape; - shared_ptr transposed_output; - - // Create the output tensor in the DOM and obtain its identifier. - const Shape& out_shape = ir_output->getShape(); - const string transposed_output_name = output_tensor_name + "_transposed_output"; - - switch (out_shape.rank()) { - case 4: { - // transpose input to NCHW format supported by ACL - const string transposed_input_name = output_tensor_name + "_transposed_input"; - transposed_output_shape = transposeShape<0, 3, 1, 2>(out_shape); - transposed_input = genTransposeMIRtoACL(transposed_input_name, ir_input->getShape(), input); - - transposed_output = - genTensor(transposed_output_name, transposed_output_shape); - break; - } - case 2: - case 1: - transposed_output_shape = out_shape; - transposed_input = input; - transposed_output = genTensor(tensorName(ir_output), out_shape); - break; - default: - throw AclCppException("Unsupported number of dimensions: " + to_string(out_shape.rank())); - } - - // Prefix used for the name of variables related to the operation implementation. - string layer_name = transposed_output->name() + "_bias_add_layer"; - - // Reshape the IR biases tensor and generate the corresponding DOM tensor. - const auto& ir_input_shape = ir_input->getShape(); - Shape ir_biases_shape(ir_input_shape.rank()); - - // ACL CLArithmeticAddition supports input tensors broadcasting. - for (int i = 0; i < ir_input_shape.rank(); ++i) - ir_biases_shape.dim(i) = 1; - - ir_biases_shape.dim(1) = ir_weights_tensor.getShape().dim(0); - auto biases = genTensor(layer_name + "_biases", ir_biases_shape); - - // Instantiate the CLArithmeticAddition object. - auto layer = genLayer("arm_compute::CLArithmeticAddition", layer_name, - {AF::ref(transposed_input), AF::ref(biases), AF::ref(transposed_output), - AF::lit("arm_compute::ConvertPolicy::WRAP")}); - - addToPersistentTensors(biases); - // Save the IR biases tensor to later read this in the artifact. - serializeTensor(biases, ir_weights_tensor); - genTensorAllocation(_infBlock, transposed_output); - genLayerExecution(layer); - - if (out_shape.rank() == 4) { - // Generate output in NHWC format - shared_ptr output = - genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output); - - genTensorDeallocation(_infBlock, transposed_input); - genTensorDeallocation(_infBlock, transposed_output); - } -} - void AclCppOpGenerator::visit(ops::InputOp& op) { shared_ptr tensor; tensor = genTensor(op.getOutput(0)); @@ -528,101 +444,6 @@ void AclCppOpGenerator::visit(ops::ReshapeOp& op) { genLayerExecution(layer); } -void AclCppOpGenerator::visit(ops::ScaleOp& op) { - // May be not a perfect implementation, using the CLPixelWiseMultiplication ACL function taking - // two input tensors with the same shapes. - assert(op.getNumInputs() == 2); - const auto* ir_input = op.getInput(0)->getProducer(); - const auto* ir_weights = op.getInput(1)->getProducer(); - const auto* ir_output = op.getOutput(0); - - auto ir_weights_op = dynamic_cast(ir_weights->getNode()); - if (ir_weights_op == nullptr) - throw AclCppException("Unsupported operation type"); - - const auto& ir_weights_tensor = ir_weights_op->getValue(); - assert(ir_weights_tensor.getShape().rank() == 1); - - // Get input tensor identifier in the generated artifact. - auto input = AF::id(tensorName(ir_input)); - - const string output_tensor_name = tensorName(ir_output); - - // transpose input to NCHW format supported by ACL - const string transposed_input_name = output_tensor_name + "_transposed_input"; - shared_ptr transposed_input = - genTransposeMIRtoACL(transposed_input_name, ir_input->getShape(), input); - - // Create the output tensor in the DOM and obtain its identifier. - const Shape& out_shape = ir_output->getShape(); - Shape transposed_output_shape; - switch (out_shape.rank()) { - case 4: - transposed_output_shape = transposeShape<0, 3, 1, 2>(out_shape); - break; - case 2: - case 1: - transposed_output_shape = out_shape; - break; - default: - throw AclCppException("Unsupported number of dimensions: " + to_string(out_shape.rank())); - } - - const string transposed_output_name = output_tensor_name + "_transposed_output"; - shared_ptr transposed_output = - genTensor(transposed_output_name, transposed_output_shape); - - auto operation_name = transposed_output->name() + "_scale_layer"; - - // Reshape the IR scales tensor and generate the corresponding DOM tensor. - const Shape ir_input_shape = transposeShape<0, 3, 1, 2>(ir_input->getShape()); - Shape ir_scales_shape(ir_input_shape.rank()); - - // ACL CLArithmeticDivision supports input tensors broadcasting. - for (int i = 0; i < ir_input_shape.rank(); ++i) - ir_scales_shape.dim(i) = 1; - - ir_scales_shape.dim(1) = ir_weights_tensor.getShape().dim(0); - auto scales = genTensor(operation_name + "_scales", ir_scales_shape); - - // We do not use the genMultiplication() function here because the input needs broadcasting. - - // Create a unit tensor in the DOM. - auto unit = genTensor(operation_name + "_unit", ir_input_shape); - - // Create a tmp tensor in the DOM to store the result of 1 / scale. - - auto tmp = genTensor(operation_name + "_tmp", ir_input_shape); - - // Create an instance of the CLArithmeticDivision class as a member of the artifact class. - auto layer1 = genLayer("arm_compute::CLArithmeticDivision", - operation_name + "_arithmetic_div_layer_1", - {AF::ref(unit), AF::ref(scales), AF::ref(tmp)}); - genLayerExecution(layer1); - - // Create an instance of the CLArithmeticDivision class as a member of the artifact class. - auto layer2 = genLayer("arm_compute::CLArithmeticDivision", - operation_name + "_arithmetic_div_layer_2", - {AF::ref(transposed_input), AF::ref(tmp), AF::ref(transposed_output)}); - - addToPersistentTensors(scales); - // Save the IR scales tensor to later read this in the artifact. - serializeTensor(scales, ir_weights_tensor); - addToPersistentTensors(unit); - // Fill the unit tensor with the 1 value. - fillTensor(unit, "1"); - addToPersistentTensors(tmp); - genTensorAllocation(_infBlock, transposed_output); - genLayerExecution(layer2); - - // Generate output in NHWC format - shared_ptr output = - genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output); - - genTensorDeallocation(_infBlock, transposed_input); - genTensorDeallocation(_infBlock, transposed_output); -} - void AclCppOpGenerator::visit(mir::ops::SliceOp& /*op*/) { throw AclCppException("Unimplemented operation: SliceOp"); } diff --git a/compiler/nnc/passes/acl_soft_backend/AclCppOpGenerator.h b/compiler/nnc/passes/acl_soft_backend/AclCppOpGenerator.h index ebcb63f..a0e55cc 100644 --- a/compiler/nnc/passes/acl_soft_backend/AclCppOpGenerator.h +++ b/compiler/nnc/passes/acl_soft_backend/AclCppOpGenerator.h @@ -48,7 +48,6 @@ public: * @param op */ void visit(mir::ops::BatchNormOp& op) override; - void visit(mir::ops::BiasAddOp& op) override; void visit(mir::ops::CappedReluOp& op) override; void visit(mir::ops::ConcatOp& op) override; void visit(mir::ops::ConstantOp& op) override; @@ -70,7 +69,6 @@ public: void visit(mir::ops::ReluOp& op) override; void visit(mir::ops::ReshapeOp& op) override; void visit(mir::ops::ResizeOp& op) override; - void visit(mir::ops::ScaleOp& op) override; void visit(mir::ops::SigmoidOp& op) override; void visit(mir::ops::SliceOp& op) override; void visit(mir::ops::SoftmaxOp& op) override; diff --git a/compiler/nnc/passes/interpreter/Interpreter.cpp b/compiler/nnc/passes/interpreter/Interpreter.cpp index b7a2367..a93680c 100644 --- a/compiler/nnc/passes/interpreter/Interpreter.cpp +++ b/compiler/nnc/passes/interpreter/Interpreter.cpp @@ -17,7 +17,6 @@ #include "passes/interpreter/Interpreter.h" #include "mir/ops/BatchNormOp.h" -#include "mir/ops/BiasAddOp.h" #include "mir/ops/CappedReluOp.h" #include "mir/ops/ConcatOp.h" #include "mir/ops/ConstantOp.h" @@ -38,7 +37,6 @@ #include "mir/ops/ReduceOp.h" #include "mir/ops/ReluOp.h" #include "mir/ops/ResizeOp.h" -#include "mir/ops/ScaleOp.h" #include "mir/ops/SigmoidOp.h" #include "mir/ops/SliceOp.h" #include "mir/ops/SoftmaxOp.h" @@ -48,7 +46,6 @@ #include "mir/ops/TransposeOp.h" #include "ops/BatchNorm.h" -#include "ops/Bias.h" #include "ops/Concat.h" #include "ops/Conv2D.h" #include "ops/DeConv2D.h" @@ -62,14 +59,12 @@ #include "ops/Pool.h" #include "ops/Reduce.h" #include "ops/Reshape.h" -#include "ops/Scale.h" #include "ops/Softmax.h" #include "ops/Transpose.h" #include "ops/common.h" #include #include -#include #include #include @@ -186,24 +181,12 @@ void NNInterpreter::visit(ops::DepthwiseConv2DOp& op) { setOutputTensors(op, std::move(outputs)); } -void NNInterpreter::visit(ops::BiasAddOp& op) { - auto inputs = getInputTensors(op); - auto outputs = BiasAdd(inputs[0], inputs[1])(); - setOutputTensors(op, std::move(outputs)); -} - void NNInterpreter::visit(ops::BatchNormOp& op) { auto inputs = getInputTensors(op); auto outputs = BatchNorm(inputs[0], op)(); setOutputTensors(op, std::move(outputs)); } -void NNInterpreter::visit(ops::ScaleOp& op) { - auto inputs = getInputTensors(op); - auto outputs = Scale(inputs[0], inputs[1])(); - setOutputTensors(op, std::move(outputs)); -} - void NNInterpreter::visit(ops::SliceOp& op) { auto inputs = getInputTensors(op); auto input = Tensor(inputs[0]); diff --git a/compiler/nnc/passes/interpreter/ops/Bias.cpp b/compiler/nnc/passes/interpreter/ops/Bias.cpp deleted file mode 100644 index c02943d..0000000 --- a/compiler/nnc/passes/interpreter/ops/Bias.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Bias.h" -#include "Fill.h" - -namespace nnc { - -BiasAdd::BiasAdd(const mir::TensorVariant& in1, const mir::TensorVariant& in2) - : _input1(in1), _input2(in2) { - assert(_input2.getShape().rank() == 1); - assert(_input1.getShape().dim(-1) == _input2.getShape().dim(0)); -} - -std::vector BiasAdd::operator()() { - return Fill(_input1.getShape(), [this](const mir::Index& idx) { - return _input1.at(idx) + _input2.atOffset(idx.at(idx.rank() - 1)); - })(); -} - -} // namespace nnc diff --git a/compiler/nnc/passes/interpreter/ops/Bias.h b/compiler/nnc/passes/interpreter/ops/Bias.h deleted file mode 100644 index 3ca8f50..0000000 --- a/compiler/nnc/passes/interpreter/ops/Bias.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef _NNC_BACKEND_INTERPRETER_BIAS_ -#define _NNC_BACKEND_INTERPRETER_BIAS_ - -#include "OperationImpl.h" -#include "mir/Tensor.h" - -namespace nnc { - -class BiasAdd : public OperationImpl { -public: - BiasAdd(const mir::TensorVariant& in1, const mir::TensorVariant& in2); - - std::vector operator()() override; - -private: - const mir::Tensor _input1; - const mir::Tensor _input2; -}; - -} // namespace nnc - -#endif //_NNC_BACKEND_INTERPRETER_BIAS_ diff --git a/compiler/nnc/passes/interpreter/ops/Scale.cpp b/compiler/nnc/passes/interpreter/ops/Scale.cpp deleted file mode 100644 index 1316c4d..0000000 --- a/compiler/nnc/passes/interpreter/ops/Scale.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Scale.h" -#include "Fill.h" - -namespace nnc { - -Scale::Scale(const mir::TensorVariant& in1, const mir::TensorVariant& in2) - : _input1(in1), _input2(in2) { - assert(_input2.getShape().rank() == 1); - assert(_input1.getShape().dim(-1) == _input2.getShape().dim(0)); -} - -std::vector Scale::operator()() { - return Fill(_input1.getShape(), [this](const mir::Index& idx) { - return _input1.at(idx) * _input2.atOffset(idx.at(idx.rank() - 1)); - })(); -} - -} // namespace nnc diff --git a/compiler/nnc/passes/interpreter/ops/Scale.h b/compiler/nnc/passes/interpreter/ops/Scale.h deleted file mode 100644 index ca6565c..0000000 --- a/compiler/nnc/passes/interpreter/ops/Scale.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef _NNC_CORE_BACKEND_INTERPRETER_SCALE_IMPL_ -#define _NNC_CORE_BACKEND_INTERPRETER_SCALE_IMPL_ - -#include "OperationImpl.h" -#include "mir/Tensor.h" - -namespace nnc { - -class Scale : public OperationImpl { -public: - Scale(const mir::TensorVariant& in1, const mir::TensorVariant& in2); - - std::vector operator()() override; - -private: - const mir::Tensor _input1; - const mir::Tensor _input2; -}; - -} // namespace nnc - -#endif // _NNC_CORE_BACKEND_INTERPRETER_SCALE_IMPL_ diff --git a/compiler/nnc/passes/optimizations/FuseArithmeticOps.cpp b/compiler/nnc/passes/optimizations/FuseArithmeticOps.cpp index d615a6c..cf9ca91 100644 --- a/compiler/nnc/passes/optimizations/FuseArithmeticOps.cpp +++ b/compiler/nnc/passes/optimizations/FuseArithmeticOps.cpp @@ -16,10 +16,10 @@ #include "passes/optimizations/FuseArithmeticOps.h" #include "passes/optimizations/OptimizationUtils.h" -#include "mir/ops/BiasAddOp.h" +#include "mir/ops/AddOp.h" #include "mir/ops/ConstantOp.h" #include "mir/ops/Conv2DOp.h" -#include "mir/ops/ScaleOp.h" +#include "mir/ops/MulOp.h" #include "mir/Graph.h" #include "mir/Tensor.h" #include "mir/Index.h" @@ -27,7 +27,6 @@ #include "mir/ShapeRange.h" #include -#include namespace nnc { @@ -41,12 +40,12 @@ using OpType = Operation::Type; using Edge = pair; /** - * This function used to get 'ConstantOp' with weights of 'BiasAddOp', 'ScaleOp' or 'Conv2DOp' + * This function used to get 'ConstantOp' with weights of 'AddOp', 'MulOp' or 'Conv2DOp' * For each of these ops weights stored in second input node */ ops::ConstantOp* getSecondInputAsConst(const Operation* op) { - assert(op->getType() == OpType::biasAdd || - op->getType() == OpType::scale || + assert(op->getType() == OpType::add || + op->getType() == OpType::mul || op->getType() == OpType::conv2D); return dynamic_cast(op->getInput(1)->getProducer()->getNode()); } @@ -106,54 +105,53 @@ Operation* mergeConstantOps(Graph* g, const ops::ConstantOp* const1_op, */ float operand2 = const2_accessor.at(Index{idx.at(0)}); switch (merge_type) { - case OpType::scale: + case OpType::mul: new_const_accessor.at(idx) = operand1 * operand2; break; - case OpType::biasAdd: + case OpType::add: new_const_accessor.at(idx) = operand1 + operand2; break; default: - assert(false && "only 'scale' and 'biasAdd' constants merge types supported"); + assert(false && "only 'mul' and 'add' constants merge types supported"); } } return g->create(const1_op->getName(), new_const_val); } -// TODO: support 'DepthwiseConv'->'Scale' and 'DepthwiseConv'->'ElementwiseMul' -// TODO: support 'ElementwiseAdd' and 'ElementwiseMul' +// TODO: support 'DepthwiseConv'->'Mul' /** * This function fuses some successive operations with constant weights into one: - * 'Bias'->'Bias' into 'Bias'; 'Scale'->'Scale' into 'Scale'; 'Conv'->'Scale' into 'Conv'; + * 'Add'->'Add' into 'Add'; 'Mul'->'Mul' into 'Mul'; 'Conv'->'Mul' into 'Conv'; * Before: | After: - * -------------------------|-------------------------- - * [input] [Const1] | [input] [Const1*Const2] - * \\ // | \\ // - * [Scale] [Const2] | [Scale] - * \\ // | - * [Scale] | - * -------------------------|-------------------------- - * [input] [Const1] | [input] [Const1+Const2] - * \\ // | \\ // - * [BiasAdd] [Const2] | [BiasAdd] - * \\ // | - * [BiasAdd] | - * -------------------------|-------------------------- - * [input] [Const1] | [input] [Const1*Const2] - * \\ // | \\ // - * [Conv2D] [Const2] | [Conv2D] - * \\ // | - * [Scale] | + * -------------------------|--------------------------- + * [input] [Const1] | [input] [Const1*Const2] + * \\ // | \\ // + * [Mul] [Const2] | [Mul] + * \\ // | + * [Mul] | + * -------------------------|--------------------------- + * [input] [Const1] | [input] [Const1+Const2] + * \\ // | \\ // + * [Add] [Const2] | [Add] + * \\ // | + * [Add] | + * -------------------------|--------------------------- + * [input] [Const1] | [input] [Const1*Const2] + * \\ // | \\ // + * [Conv2D] [Const2] | [Conv2D] + * \\ // | + * [Mul] | */ bool fuseSuccessiveOps(Graph* g) { // Find all successive ops vector successive_ops; - auto scale_scale_vec = findSuccessiveOpsWithConstWeights(g, OpType::scale, OpType::scale); - successive_ops.insert(successive_ops.end(), scale_scale_vec.begin(), scale_scale_vec.end()); - auto bias_bias_vec = findSuccessiveOpsWithConstWeights(g, OpType::biasAdd, OpType::biasAdd); - successive_ops.insert(successive_ops.end(), bias_bias_vec.begin(), bias_bias_vec.end()); - auto conv_scale_vec = findSuccessiveOpsWithConstWeights(g, OpType::conv2D, OpType::scale); - successive_ops.insert(successive_ops.end(), conv_scale_vec.begin(), conv_scale_vec.end()); + auto mul_mul_vec = findSuccessiveOpsWithConstWeights(g, OpType::mul, OpType::mul); + successive_ops.insert(successive_ops.end(), mul_mul_vec.begin(), mul_mul_vec.end()); + auto add_add_vec = findSuccessiveOpsWithConstWeights(g, OpType::add, OpType::add); + successive_ops.insert(successive_ops.end(), add_add_vec.begin(), add_add_vec.end()); + auto conv_mul_vec = findSuccessiveOpsWithConstWeights(g, OpType::conv2D, OpType::mul); + successive_ops.insert(successive_ops.end(), conv_mul_vec.begin(), conv_mul_vec.end()); for (auto& edge : successive_ops) { auto const1_op = getSecondInputAsConst(edge.first); @@ -177,44 +175,43 @@ bool fuseSuccessiveOps(Graph* g) { } /** - * This function sinks 'BiasAdd' through 'Scale' - * by multiplying 'BiasAdd' weights on 'Scale' weights + * This function sinks 'Add' through 'Mul' + * by multiplying 'Add' weights on 'Mul' weights * Before: | After: *--------------------------|-------------------------- - * [input] [Const1] | [input] [Const2] - * \\ // | \\ // - * [BiasAdd] [Const2] | [Scale] [Const1*Const2] - * \\ // | \\ // - * [Scale] | [BiasAdd] + * [input] [Const1] | [input] [Const2] + * \\ // | \\ // + * [Add] [Const2] | [Mul] [Const1*Const2] + * \\ // | \\ // + * [Mul] | [Add] * | */ -bool sinkBiasThroughScale(Graph* g) { - auto bias_scale_edges = findSuccessiveOpsWithConstWeights(g, OpType::biasAdd, OpType::scale); +bool sinkAddThroughMul(Graph* g) { + auto add_mul_edges = findSuccessiveOpsWithConstWeights(g, OpType::add, OpType::mul); - for (auto& edge : bias_scale_edges) { - auto old_bias_op = edge.first; - auto old_scale_op = edge.second; - auto old_bias_const_op = getSecondInputAsConst(old_bias_op); - auto old_scale_const_op = getSecondInputAsConst(old_scale_op); - assert(old_bias_const_op && old_scale_const_op); + for (auto& edge : add_mul_edges) { + auto old_add_op = edge.first; + auto old_mul_op = edge.second; + auto old_add_const_op = getSecondInputAsConst(old_add_op); + auto ols_mul_const_op = getSecondInputAsConst(old_mul_op); + assert(old_add_const_op && ols_mul_const_op); // Create new operations - auto old_bias_input = old_bias_op->getInput(0)->getProducer(); - auto new_scale_op = g->copyOpWithInputs(old_scale_op, {old_bias_input, - old_scale_const_op->getOutput(0)}); - auto new_bias_const_op = mergeConstantOps(g, old_bias_const_op, old_scale_const_op, - OpType::scale); - auto new_bias_op = g->copyOpWithInputs(old_bias_op, {new_scale_op->getOutput(0), - new_bias_const_op->getOutput(0)}); - - // Replace old scale with new bias and remove old nodes - g->replaceNode(old_scale_op, new_bias_op); - removeNodeIfUnused(g, old_bias_op); - removeNodeIfUnused(g, old_bias_const_op); + auto old_add_input = old_add_op->getInput(0)->getProducer(); + auto new_mul_op = g->copyOpWithInputs(old_mul_op, {old_add_input, ols_mul_const_op->getOutput(0)}); + auto new_add_const_op = mergeConstantOps(g, old_add_const_op, ols_mul_const_op, + OpType::mul); + auto new_add_op = g->copyOpWithInputs( + old_add_op, {new_mul_op->getOutput(0), new_add_const_op->getOutput(0)}); + + // Replace old mul with new add and remove old nodes + g->replaceNode(old_mul_op, new_add_op); + removeNodeIfUnused(g, old_add_op); + removeNodeIfUnused(g, old_add_const_op); } - // If there is no bias-scale edges - graph wasn't changed - return !bias_scale_edges.empty(); + // If there is no add-mul edges - graph wasn't changed + return !add_mul_edges.empty(); } } // unnamed namespace @@ -226,7 +223,7 @@ nnc::PassData nnc::FuseArithmeticOps::run(nnc::PassData data) { while (graph_changed) { graph_changed = false; graph_changed |= fuseSuccessiveOps(g); - graph_changed |= sinkBiasThroughScale(g); + graph_changed |= sinkAddThroughMul(g); } return g; diff --git a/compiler/nnc/passes/soft_backend/CPPGenerator.cpp b/compiler/nnc/passes/soft_backend/CPPGenerator.cpp index 4ee9cca..f8d156d 100644 --- a/compiler/nnc/passes/soft_backend/CPPGenerator.cpp +++ b/compiler/nnc/passes/soft_backend/CPPGenerator.cpp @@ -30,7 +30,6 @@ using namespace std; #include "CommonData.generated.h" #include "eigen.generated.h" #include "cpp_common_funcs.generated.h" -#include "cpp_add_bias.generated.h" #include "cpp_capped_relu.generated.h" #include "cpp_concat.generated.h" #include "cpp_conv.generated.h" @@ -45,7 +44,6 @@ using namespace std; #include "cpp_reduce.generated.h" #include "cpp_resize.generated.h" #include "cpp_softmax.generated.h" -#include "cpp_scale.generated.h" #include "cpp_slice.generated.h" #include "cpp_dropout.generated.h" #include "cpp_batchnorm.generated.h" @@ -292,7 +290,6 @@ void CPPCodeGenerator::materializeCode(ostream& out, const ModelAnalyzer& ma, co out.write(CommonData, sizeof(CommonData)); out.write(cpp_common_funcs, sizeof(cpp_common_funcs)); - out.write(cpp_add_bias, sizeof(cpp_add_bias)); out.write(cpp_capped_relu, sizeof(cpp_capped_relu)); out.write(cpp_concat, sizeof(cpp_concat)); out.write(cpp_conv, sizeof(cpp_conv)); @@ -317,7 +314,6 @@ void CPPCodeGenerator::materializeCode(ostream& out, const ModelAnalyzer& ma, co // Operations calls into all of the above out.write(cpp_operations, sizeof(cpp_operations)); // Below call into operations - out.write(cpp_scale, sizeof(cpp_scale)); out.write(cpp_dropout, sizeof(cpp_dropout)); out.write(cpp_batchnorm, sizeof(cpp_batchnorm)); out.write(cpp_leaky_relu, sizeof(cpp_leaky_relu)); diff --git a/compiler/nnc/passes/soft_backend/ModelAnalyzer.cpp b/compiler/nnc/passes/soft_backend/ModelAnalyzer.cpp index 4dbcc9d..7ea1f4a 100644 --- a/compiler/nnc/passes/soft_backend/ModelAnalyzer.cpp +++ b/compiler/nnc/passes/soft_backend/ModelAnalyzer.cpp @@ -17,11 +17,9 @@ #include "ModelAnalyzer.h" #include "mir/Shape.h" -#include "mir/ShapeRange.h" #include "mir/Graph.h" #include "mir/ops/BatchNormOp.h" -#include "mir/ops/BiasAddOp.h" #include "mir/ops/CappedReluOp.h" #include "mir/ops/ConcatOp.h" #include "mir/ops/ConstantOp.h" @@ -43,7 +41,6 @@ #include "mir/ops/ReluOp.h" #include "mir/ops/ReshapeOp.h" #include "mir/ops/ResizeOp.h" -#include "mir/ops/ScaleOp.h" #include "mir/ops/SigmoidOp.h" #include "mir/ops/SliceOp.h" #include "mir/ops/SoftmaxOp.h" @@ -353,10 +350,6 @@ void ModelAnalyzer::visit(ops::CappedReluOp& op) { appendOperationToInference(&op, "cappedRelu"); } -void ModelAnalyzer::visit(ops::BiasAddOp& op) { - appendOperationToInference(&op, "biasAdd"); -} - void ModelAnalyzer::visit(ops::InputOp& op) { assert(op.getNumInputs() == 0); appendOperationToInference(&op, "in"); @@ -396,10 +389,6 @@ void ModelAnalyzer::visit(ops::DropoutOp& op) { appendOperationToInference(&op, "dropout"); } -void ModelAnalyzer::visit(ops::ScaleOp& op) { - appendOperationToInference(&op, "scale"); -} - void ModelAnalyzer::visit(mir::ops::SliceOp& op) { appendOperationToInference(&op, "slice"); } diff --git a/compiler/nnc/passes/soft_backend/ModelAnalyzer.h b/compiler/nnc/passes/soft_backend/ModelAnalyzer.h index a7da627..0dfb537 100644 --- a/compiler/nnc/passes/soft_backend/ModelAnalyzer.h +++ b/compiler/nnc/passes/soft_backend/ModelAnalyzer.h @@ -47,7 +47,6 @@ public: void analyze(const mir::Graph* g); void visit(mir::ops::BatchNormOp& op) override; - void visit(mir::ops::BiasAddOp& op) override; void visit(mir::ops::CappedReluOp& op) override; void visit(mir::ops::ConcatOp& op) override; void visit(mir::ops::ConstantOp& op) override; @@ -69,7 +68,6 @@ public: void visit(mir::ops::ReluOp& op) override; void visit(mir::ops::ReshapeOp& op) override; void visit(mir::ops::ResizeOp& op) override; - void visit(mir::ops::ScaleOp& op) override; void visit(mir::ops::SigmoidOp& op) override; void visit(mir::ops::SliceOp& op) override; void visit(mir::ops::SoftmaxOp& op) override; diff --git a/compiler/nnc/passes/soft_backend/SBSerializer.cpp b/compiler/nnc/passes/soft_backend/SBSerializer.cpp index f275745..07d1bf3 100644 --- a/compiler/nnc/passes/soft_backend/SBSerializer.cpp +++ b/compiler/nnc/passes/soft_backend/SBSerializer.cpp @@ -21,7 +21,6 @@ #include "CommonData.def" #include "mir/ops/BatchNormOp.h" -#include "mir/ops/BiasAddOp.h" #include "mir/ops/CappedReluOp.h" #include "mir/ops/ConcatOp.h" #include "mir/ops/ConstantOp.h" @@ -41,7 +40,6 @@ #include "mir/ops/ReluOp.h" #include "mir/ops/ReshapeOp.h" #include "mir/ops/ResizeOp.h" -#include "mir/ops/ScaleOp.h" #include "mir/ops/SliceOp.h" #include "mir/ops/SoftmaxOp.h" #include "mir/ops/SqueezeOp.h" @@ -220,11 +218,6 @@ void Serializer::visit(ops::CappedReluOp& op) { serializeT(op.getCap()); } -void Serializer::visit(ops::BiasAddOp& /*op*/) { - _curOp->paramStartOffset = _buffer.size(); - // no parameters to dump -} - void Serializer::visit(ops::InputOp& /*op*/) { // no parameters to dump } @@ -251,11 +244,6 @@ void Serializer::visit(ops::BatchNormOp& op) { serializeT(op.getSpatial()); } -void Serializer::visit(ops::ScaleOp& /*op*/) { - _curOp->paramStartOffset = _buffer.size(); - // no parameters to dump -} - void Serializer::visit(mir::ops::SliceOp& op) { _curOp->paramStartOffset = _buffer.size(); serializeShape(op.getStarts()); diff --git a/compiler/nnc/passes/soft_backend/SBSerializer.h b/compiler/nnc/passes/soft_backend/SBSerializer.h index a9a84be..7c5c98e 100644 --- a/compiler/nnc/passes/soft_backend/SBSerializer.h +++ b/compiler/nnc/passes/soft_backend/SBSerializer.h @@ -41,7 +41,6 @@ class Serializer : public mir::IVisitor { public: void visit(mir::ops::BatchNormOp& op) override; - void visit(mir::ops::BiasAddOp& op) override; void visit(mir::ops::CappedReluOp& op) override; void visit(mir::ops::ConcatOp& op) override; void visit(mir::ops::ConstantOp& op) override; @@ -63,7 +62,6 @@ public: void visit(mir::ops::ReluOp& op) override; void visit(mir::ops::ReshapeOp& op) override; void visit(mir::ops::ResizeOp& op) override; - void visit(mir::ops::ScaleOp& op) override; void visit(mir::ops::SigmoidOp& op) override; void visit(mir::ops::SliceOp& op) override; void visit(mir::ops::SoftmaxOp& op) override; diff --git a/compiler/nnc/passes/soft_backend/code_snippets/cpp_add_bias.def b/compiler/nnc/passes/soft_backend/code_snippets/cpp_add_bias.def deleted file mode 100644 index 903c7a8..0000000 --- a/compiler/nnc/passes/soft_backend/code_snippets/cpp_add_bias.def +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -inline void AddBiasAndEvalActivationFunction(const float* bias_data, - const RuntimeShape& bias_shape, - float* array_data, - const RuntimeShape& array_shape) { -#ifdef USE_NEON - const int bias_size = bias_shape.FlatSize(); - const int array_size = array_shape.FlatSize(); - TFLITE_DCHECK_EQ((array_size % bias_size), 0); - float* array_ptr = array_data; - float* array_end_ptr = array_ptr + array_size; - for (; array_ptr != array_end_ptr; array_ptr += bias_size) { - int i = 0; - for (; i <= bias_size - 16; i += 16) { - auto b0 = vld1q_f32(bias_data + i); - auto b1 = vld1q_f32(bias_data + i + 4); - auto b2 = vld1q_f32(bias_data + i + 8); - auto b3 = vld1q_f32(bias_data + i + 12); - auto a0 = vld1q_f32(array_ptr + i); - auto a1 = vld1q_f32(array_ptr + i + 4); - auto a2 = vld1q_f32(array_ptr + i + 8); - auto a3 = vld1q_f32(array_ptr + i + 12); - auto x0 = vaddq_f32(a0, b0); - auto x1 = vaddq_f32(a1, b1); - auto x2 = vaddq_f32(a2, b2); - auto x3 = vaddq_f32(a3, b3); - vst1q_f32(array_ptr + i, x0); - vst1q_f32(array_ptr + i + 4, x1); - vst1q_f32(array_ptr + i + 8, x2); - vst1q_f32(array_ptr + i + 12, x3); - } - for (; i <= bias_size - 4; i += 4) { - auto b = vld1q_f32(bias_data + i); - auto a = vld1q_f32(array_ptr + i); - auto x = vaddq_f32(a, b); - vst1q_f32(array_ptr + i, x); - } - for (; i < bias_size; i++) { - array_ptr[i] = array_ptr[i] + bias_data[i]; - } - } -#else // not NEON - - assert(bias_shape.DimensionsCount() == 1 && - bias_shape.Dims(0) == array_shape.Dims(array_shape.DimensionsCount() - 1)); - const auto bias_vec = MapAsVector(bias_data, bias_shape); - auto out_mat = MapAsMatrixWithLastDimAsRows(array_data, array_shape); - out_mat.colwise() += bias_vec; -#endif -} diff --git a/compiler/nnc/passes/soft_backend/code_snippets/cpp_operations.def b/compiler/nnc/passes/soft_backend/code_snippets/cpp_operations.def index 2d77cb9..2148361 100644 --- a/compiler/nnc/passes/soft_backend/code_snippets/cpp_operations.def +++ b/compiler/nnc/passes/soft_backend/code_snippets/cpp_operations.def @@ -413,15 +413,6 @@ void cappedRelu(Tensor &out, const char *params, const Tensor &in) CappedRelu(input, input_d, cap, out.getData(), input_d); } -void biasAdd(Tensor& out, const char* params, const Tensor& in1, const Tensor& in2) -{ - out.reshape(in1.getShape()); - out.fillData(in1.getData(), in1.getShape().getNumElems()); - - AddBiasAndEvalActivationFunction(in2.getData(), shapeToRuntimeShape(in2.getShape()), - out.getData(), shapeToRuntimeShape(out.getShape())); -} - void slice(Tensor& out, const char* params, const Tensor& in) { Shape starts = deserializeShape(params); Shape sizes = deserializeShape(params); diff --git a/compiler/nnc/passes/soft_backend/code_snippets/cpp_scale.def b/compiler/nnc/passes/soft_backend/code_snippets/cpp_scale.def deleted file mode 100644 index c380efa..0000000 --- a/compiler/nnc/passes/soft_backend/code_snippets/cpp_scale.def +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -void scale(Tensor& out, const char* params, const Tensor& in1, const Tensor& in2) { - const auto in1_shape = shapeToRuntimeShape(in1.getShape()); - const auto in2_shape = shapeToRuntimeShape(in2.getShape()); - assert(in2_shape.DimensionsCount() == 1 && - in2_shape.Dims(0) == in1_shape.Dims(in1_shape.DimensionsCount() - 1)); - - out.reshape(in1.getShape()); - -#ifdef USE_NEON - const int scale_size = in2_shape.FlatSize(); - const int array_size = in1_shape.FlatSize(); - TFLITE_DCHECK_EQ((array_size % scale_size), 0); - out.fillData(in1.getData(), array_size); - float* array_ptr = out.getData(); - const float* scale_ptr = in2.getData(); - float* array_end_ptr = array_ptr + array_size; - for (; array_ptr != array_end_ptr; array_ptr += scale_size) { - int i = 0; - for (; i <= scale_size - 16; i += 16) { - auto b0 = vld1q_f32(scale_ptr + i); - auto b1 = vld1q_f32(scale_ptr + i + 4); - auto b2 = vld1q_f32(scale_ptr + i + 8); - auto b3 = vld1q_f32(scale_ptr + i + 12); - auto a0 = vld1q_f32(array_ptr + i); - auto a1 = vld1q_f32(array_ptr + i + 4); - auto a2 = vld1q_f32(array_ptr + i + 8); - auto a3 = vld1q_f32(array_ptr + i + 12); - auto x0 = vmulq_f32(a0, b0); - auto x1 = vmulq_f32(a1, b1); - auto x2 = vmulq_f32(a2, b2); - auto x3 = vmulq_f32(a3, b3); - vst1q_f32(array_ptr + i, x0); - vst1q_f32(array_ptr + i + 4, x1); - vst1q_f32(array_ptr + i + 8, x2); - vst1q_f32(array_ptr + i + 12, x3); - } - for (; i <= scale_size - 4; i += 4) { - auto b = vld1q_f32(scale_ptr + i); - auto a = vld1q_f32(array_ptr + i); - auto x = vmulq_f32(a, b); - vst1q_f32(array_ptr + i, x); - } - for (; i < scale_size; i++) { - array_ptr[i] = array_ptr[i] * scale_ptr[i]; - } - } -#else // not NEON - - const auto out_shape = shapeToRuntimeShape(out.getShape()); - - const auto in1_mat = MapAsMatrixWithLastDimAsRows(in1.getData(), in1_shape); - const auto in2_vec = MapAsVector(in2.getData(), in2_shape); - auto out_mat = MapAsMatrixWithLastDimAsRows(out.getData(), out_shape); - out_mat.colwise() = in2_vec; - out_mat.array() = out_mat.array() * in1_mat.array(); -#endif -} diff --git a/compiler/nnc/unittests/acl_backend/MIRToDOM.cpp b/compiler/nnc/unittests/acl_backend/MIRToDOM.cpp index 1e75f81..ea25f0a 100644 --- a/compiler/nnc/unittests/acl_backend/MIRToDOM.cpp +++ b/compiler/nnc/unittests/acl_backend/MIRToDOM.cpp @@ -28,8 +28,6 @@ // MIR #include "mir/Graph.h" -#include "mir/ops/BiasAddOp.h" -#include "mir/ops/CappedReluOp.h" #include "mir/ops/CappedReluOp.h" #include "mir/ops/ConcatOp.h" #include "mir/ops/ConstantOp.h" @@ -46,7 +44,6 @@ #include "mir/ops/ReduceOp.h" #include "mir/ops/ReluOp.h" #include "mir/ops/ReshapeOp.h" -#include "mir/ops/ScaleOp.h" #include "mir/ops/SigmoidOp.h" #include "mir/ops/SoftmaxOp.h" #include "mir/ops/TanhOp.h" @@ -229,51 +226,6 @@ TEST(acl_backend_mir_to_dom, constant) { checkDomStructure(m, {}, {}); } -TEST(acl_backend_mir_to_dom, bias) { - const int32_t channels = 2; - TensorVariant w = createTensorVariant({channels}); - - Graph g; - OpConstructor op_generator = [&w](Graph& g, const vector& inputs) { - auto bias = g.create("", w)->getOutput(0); - return g.create("bias", inputs[0], bias); - }; - vector input_shapes{{1, 10, 10, channels}}; - - fillGraph(g, op_generator, input_shapes); - - stringstream params_out; - AclCppOpGenerator dom_gen(artifactName, params_out); - - const ArtifactModule& m = dom_gen.generate(&g); - - checkDomStructure(m, {}, {}); -} - -TEST(acl_backend_mir_to_dom, scale) { - const int32_t channels = 2; - TensorVariant w = createTensorVariant({channels}); - - Graph g; - OpConstructor op_generator = [&w](Graph& g, const vector& inputs) { - auto scale = g.create("", w)->getOutput(0); - return g.create("scale", inputs[0], scale); - }; - vector input_shapes{{1, 10, 10, channels}}; - - fillGraph(g, op_generator, input_shapes); - - stringstream params_out; - AclCppOpGenerator dom_gen(artifactName, params_out); - - const ArtifactModule& m = dom_gen.generate(&g); - - checkDomStructure(m, {}, {}); - - stringstream code_out; - ArtifactGeneratorCppCode code_gen(code_out); -} - TEST(acl_backend_mir_to_dom, concat) { Graph g; OpConstructor op_generator = [](Graph& g, const vector& inputs) { diff --git a/compiler/nnc/unittests/optimizations/FuseArithmeticOps.cpp b/compiler/nnc/unittests/optimizations/FuseArithmeticOps.cpp index f8bbd3b..e991ce8 100644 --- a/compiler/nnc/unittests/optimizations/FuseArithmeticOps.cpp +++ b/compiler/nnc/unittests/optimizations/FuseArithmeticOps.cpp @@ -17,10 +17,10 @@ #include "passes/optimizations/FuseArithmeticOps.h" #include "Util.h" #include "mir/Graph.h" -#include "mir/ops/BiasAddOp.h" +#include "mir/ops/AddOp.h" #include "mir/ops/ConstantOp.h" #include "mir/ops/Conv2DOp.h" -#include "mir/ops/ScaleOp.h" +#include "mir/ops/MulOp.h" #include #include @@ -40,15 +40,15 @@ TEST(OptPass, fuseConvBiasScaleScaleBias) { auto conv = g.create("conv", input->getOutput(0), conv_const->getOutput(0), Shape{1, 1}, padding, padding); auto bias1_const = g.create("", TensorVariant(DataType::FLOAT32, {10})); - auto bias1 = g.create("bias1", conv->getOutput(0), bias1_const->getOutput(0)); + auto bias1 = g.create("bias1", conv->getOutput(0), bias1_const->getOutput(0)); auto scale1_const = g.create("", TensorVariant(DataType::FLOAT32, {10})); - auto scale1 = g.create("scale1", bias1->getOutput(0), scale1_const->getOutput(0)); + auto scale1 = g.create("scale1", bias1->getOutput(0), scale1_const->getOutput(0)); auto scale2_const = g.create("", TensorVariant(DataType::FLOAT32, {10})); - auto scale2 = g.create("scale2", scale1->getOutput(0), scale2_const->getOutput(0)); + auto scale2 = g.create("scale2", scale1->getOutput(0), scale2_const->getOutput(0)); auto scale3_const = g.create("", TensorVariant(DataType::FLOAT32, {10})); - auto scale3 = g.create("scale3", scale2->getOutput(0), scale3_const->getOutput(0)); + auto scale3 = g.create("scale3", scale2->getOutput(0), scale3_const->getOutput(0)); auto bias2_const = g.create("", TensorVariant(DataType::FLOAT32, {10})); - g.create("", scale3->getOutput(0), bias2_const->getOutput(0)); + g.create("", scale3->getOutput(0), bias2_const->getOutput(0)); // Check that layout is desired std::stringstream ss; diff --git a/compiler/nnc/unittests/optimizations/Util.h b/compiler/nnc/unittests/optimizations/Util.h index 6abe8af..da5c9a6 100644 --- a/compiler/nnc/unittests/optimizations/Util.h +++ b/compiler/nnc/unittests/optimizations/Util.h @@ -24,8 +24,8 @@ #include "mir/ops/ConcatOp.h" #include "mir/ops/OutputOp.h" #include "mir/ops/PoolOp.h" -#include "mir/ops/BiasAddOp.h" -#include "mir/ops/ScaleOp.h" +#include "mir/ops/AddOp.h" +#include "mir/ops/MulOp.h" #include "mir/ops/Conv2DOp.h" #include "mir/Visitor.h" @@ -39,9 +39,9 @@ public: void visit(mir::ops::TanhOp& op) override { _s << "th_" << op.getName() << "."; } - void visit(mir::ops::ScaleOp& op) override { _s << "s_" << op.getName() << "."; } + void visit(mir::ops::MulOp& op) override { _s << "s_" << op.getName() << "."; } - void visit(mir::ops::BiasAddOp& op) override { _s << "b_" << op.getName() << "."; } + void visit(mir::ops::AddOp& op) override { _s << "b_" << op.getName() << "."; } void visit(mir::ops::ReluOp& op) override { _s << "r_" << op.getName() << "."; } diff --git a/compiler/nnc/unittests/soft_backend/CPPOperations.cpp b/compiler/nnc/unittests/soft_backend/CPPOperations.cpp index 61fe5b9..e8c7224 100644 --- a/compiler/nnc/unittests/soft_backend/CPPOperations.cpp +++ b/compiler/nnc/unittests/soft_backend/CPPOperations.cpp @@ -26,7 +26,6 @@ #include "code_snippets/cpp_header_types.def" #include "code_snippets/cpp_common_funcs.def" -#include "code_snippets/cpp_add_bias.def" #include "code_snippets/cpp_capped_relu.def" #include "code_snippets/cpp_concat.def" #include "code_snippets/cpp_conv.def" @@ -50,7 +49,6 @@ #include "code_snippets/cpp_transpose.def" #include "code_snippets/cpp_operations.def" -#include "code_snippets/cpp_scale.def" #include "code_snippets/cpp_leaky_relu.def" // soft backend part @@ -59,7 +57,6 @@ #include "SBSerializer.h" // operations part -#include "mir/ops/BiasAddOp.h" #include "mir/ops/CappedReluOp.h" #include "mir/ops/ConcatOp.h" #include "mir/ops/Conv2DOp.h" @@ -77,7 +74,6 @@ #include "mir/ops/ReluOp.h" #include "mir/ops/ReshapeOp.h" #include "mir/ops/ResizeOp.h" -#include "mir/ops/ScaleOp.h" #include "mir/ops/SigmoidOp.h" #include "mir/ops/SliceOp.h" #include "mir/ops/SoftmaxOp.h" @@ -328,38 +324,6 @@ void createAndRunTestGraph( } -TEST(cpp_operations_test, bias) { - vector input_shape_data{2, 3, 4, 5}; - vector weights_shape_data{5}; - vector> input_ntensors(2); - Tensor input_atensor0; - Tensor input_atensor1; - fillTensors(input_ntensors[0], input_atensor0, input_shape_data, 1.0f); - fillTensors(input_ntensors[1], input_atensor1, weights_shape_data, 1.0f); - - auto op_generator = [](mir::Graph& g, const std::vector& inputs) { - return g.create("y", inputs[0], inputs[1]); - }; - - createAndRunTestGraph(op_generator, biasAdd, input_ntensors, input_atensor0, input_atensor1); -} - -TEST(cpp_operations_test, scale) { - vector input_shape_data{2, 3, 4, 5}; - vector weights_shape_data{5}; - vector> input_ntensors(2); - Tensor input_atensor0; - Tensor input_atensor1; - fillTensors(input_ntensors[0], input_atensor0, input_shape_data, 1.0f); - fillTensors(input_ntensors[1], input_atensor1, weights_shape_data, 1.0f); - - auto op_generator = [](mir::Graph& g, const std::vector& inputs) { - return g.create("y", inputs[0], inputs[1]); - }; - - createAndRunTestGraph(op_generator, scale, input_ntensors, input_atensor0, input_atensor1); -} - TEST(cpp_operations_test, capped_relu) { // test prerequisites // cap has this value to cut input numbers(they are in range [-1, 1]) -- 2.7.4