From 8a17ac1cdcf487a5b9a50a49718f4b412ce2d75f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ivan=20Vagin/AI=20Tools=20Lab=20/SRR/Engineer/=EC=82=BC?= =?utf8?q?=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Thu, 4 Jul 2019 12:59:38 +0300 Subject: [PATCH] [neurun] Applied shape fixer (#5549) Applied ShapeFixer, removed ShapeFixer functionality from StageGenerator Signed-off-by: Ivan Vagin --- runtimes/neurun/backend/acl_cl/StageGenerator.cc | 123 --------------------- runtimes/neurun/backend/acl_neon/StageGenerator.cc | 33 ------ runtimes/neurun/backend/cpu/StageGenerator.cc | 7 -- .../neurun/core/src/compiler/ExecutorFactory.cc | 37 +++---- runtimes/neurun/core/src/compiler/PlanBuilder.cc | 19 ++-- runtimes/neurun/core/src/compiler/PlanBuilder.h | 3 +- 6 files changed, 31 insertions(+), 191 deletions(-) diff --git a/runtimes/neurun/backend/acl_cl/StageGenerator.cc b/runtimes/neurun/backend/acl_cl/StageGenerator.cc index 7304c34..eb245a9 100644 --- a/runtimes/neurun/backend/acl_cl/StageGenerator.cc +++ b/runtimes/neurun/backend/acl_cl/StageGenerator.cc @@ -473,12 +473,8 @@ void StageGenerator::visit(const model::operation::ConcatNode &node) Param param; param.output_index = ofm_index; - _tensor_builder->dimCorrection(ofm_index, false); for (const auto &e : node.getInputs()) - { param.input_indexes.emplace_back(e); - _tensor_builder->dimCorrection(e, false); - } param.axis = _ctx.at(axis_index).asScalar(); auto tensors = _tensor_builder; @@ -555,8 +551,6 @@ void StageGenerator::visit(const model::operation::FullyConnectedNode &node) UNUSED_RELEASE(feature_size); assert(feature_size == batch_size * input_size); - tensors->dimCorrection(input_index, false); - // for reshaping needs_reshape = true; reshape.dim(0) = batch_size; /* H */ @@ -618,17 +612,6 @@ void StageGenerator::visit(const model::operation::MulNode &node) const auto lhs_index{node.getInputs().at(model::operation::MulNode::Input::LHS)}; const auto rhs_index{node.getInputs().at(model::operation::MulNode::Input::RHS)}; - if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) - { - const auto broadcast_rank = - std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); - - // TODO remove const_cast later. For example, _ctx may need to be a non const variable or - // a node to extend shape may be inserted in front of this operation - const_cast<::neurun::model::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank); - const_cast<::neurun::model::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank); - } - struct Param { model::OperandIndex ofm_index; @@ -764,9 +747,6 @@ void StageGenerator::visit(const model::operation::ReshapeNode &node) auto tensors = _tensor_builder; - tensors->dimCorrection(input_index, false); - tensors->dimCorrection(output_index, false); - returnStage([tensors, param](IExecutionBuilder &builder) { auto output_alloc = tensors->at(param.output_index).get(); auto input_alloc = tensors->at(param.input_index).get(); @@ -802,9 +782,6 @@ void StageGenerator::visit(const model::operation::SqueezeNode &node) Param param{output_index, input_index}; auto tensors = _tensor_builder; - tensors->dimCorrection(input_index, false); - tensors->dimCorrection(output_index, false); - returnStage([tensors, param](IExecutionBuilder &builder) { auto output_alloc = tensors->at(param.output_index).get(); auto input_alloc = tensors->at(param.input_index).get(); @@ -1081,14 +1058,6 @@ void StageGenerator::visit(const model::operation::AddNode &node) const auto lhs_index{node.getInputs().at(model::operation::AddNode::Input::LHS)}; const auto rhs_index{node.getInputs().at(model::operation::AddNode::Input::RHS)}; - if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) - { - const auto broadcast_rank = - std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); - const_cast<::neurun::model::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank); - const_cast<::neurun::model::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank); - } - struct Param { model::OperandIndex ofm_index; @@ -1136,17 +1105,6 @@ void StageGenerator::visit(const model::operation::SubNode &node) const auto lhs_index{node.getInputs().at(model::operation::SubNode::Input::LHS)}; const auto rhs_index{node.getInputs().at(model::operation::SubNode::Input::RHS)}; - if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) - { - const auto broadcast_rank = - std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); - - // TODO remove const_cast later. For example, _ctx may need to be a non const variable or - // a node to extend shape may be inserted in front of this operation - const_cast<::neurun::model::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank); - const_cast<::neurun::model::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank); - } - struct Param { model::OperandIndex ofm_index; @@ -1194,17 +1152,6 @@ void StageGenerator::visit(const model::operation::DivNode &node) const auto lhs_index{node.getInputs().at(model::operation::DivNode::Input::LHS)}; const auto rhs_index{node.getInputs().at(model::operation::DivNode::Input::RHS)}; - if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) - { - const auto broadcast_rank = - std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); - - // TODO remove const_cast later. For example, _ctx may need to be a non const variable or - // a node to extend shape may be inserted in front of this operation - const_cast<::neurun::model::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank); - const_cast<::neurun::model::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank); - } - // Construct operation parameters struct Param { @@ -1324,17 +1271,6 @@ void StageGenerator::visit(const model::operation::LogicalAndNode &node) const auto input0_index{node.getInputs().at(model::operation::LogicalAndNode::Input::INPUT0)}; const auto input1_index{node.getInputs().at(model::operation::LogicalAndNode::Input::INPUT1)}; - if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape())) - { - const auto broadcast_rank = - std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank()); - - // TODO remove const_cast later. For example, _ctx may need to be a non const variable or - // a node to extend shape may be inserted in front of this operation - const_cast<::neurun::model::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank); - const_cast<::neurun::model::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank); - } - // Construct operation parameters struct Param { @@ -1728,17 +1664,6 @@ void StageGenerator::visit(const model::operation::ComparisonNode &node) const auto input0_index{node.getInputs().at(model::operation::ComparisonNode::Input::INPUT0)}; const auto input1_index{node.getInputs().at(model::operation::ComparisonNode::Input::INPUT1)}; - if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape())) - { - const auto broadcast_rank = - std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank()); - - // TODO remove const_cast later. For example, _ctx may need to be a non const variable or - // a node to extend shape may be inserted in front of this operation - const_cast<::neurun::model::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank); - const_cast<::neurun::model::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank); - } - // Construct operation parameters struct Param { @@ -2094,9 +2019,6 @@ void StageGenerator::visit(const model::operation::SpaceToDepthNode &node) auto tensors = _tensor_builder; - tensors->dimCorrection(ofm_index, false); - tensors->dimCorrection(ifm_index, false); - returnStage([tensors, param](IExecutionBuilder &builder) { auto ofm_alloc = tensors->at(param.ofm_index).get(); auto ifm_alloc = tensors->at(param.ifm_index).get(); @@ -2199,9 +2121,6 @@ void StageGenerator::visit(const model::operation::EmbeddingLookupNode &node) auto tensors = _tensor_builder; - tensors->dimCorrection(values_index, false); - tensors->dimCorrection(output_index, false); - returnStage([tensors, param](IExecutionBuilder &builder) { auto output_alloc = tensors->at(param.output_index).get(); auto lookups_alloc = tensors->at(param.lookups_index).get(); @@ -2340,14 +2259,6 @@ void StageGenerator::visit(const model::operation::PReLUNode &node) const auto ifm_index{node.getInputs().at(model::operation::PReLUNode::Input::INPUT)}; const auto alpha_index{node.getInputs().at(model::operation::PReLUNode::Input::ALPHA)}; - if (!(_ctx.at(ifm_index).shape() == _ctx.at(alpha_index).shape())) - { - const auto broadcast_rank = - std::max(_ctx.at(ifm_index).shape().rank(), _ctx.at(alpha_index).shape().rank()); - const_cast<::neurun::model::Shape &>(_ctx.at(ifm_index).shape()).extendRank(broadcast_rank); - const_cast<::neurun::model::Shape &>(_ctx.at(alpha_index).shape()).extendRank(broadcast_rank); - } - struct Param { model::OperandIndex ofm_index; @@ -2501,14 +2412,6 @@ void StageGenerator::visit(const model::operation::LogicalOrNode &node) const auto input0_index{node.getInputs().at(model::operation::LogicalOrNode::Input::INPUT0)}; const auto input1_index{node.getInputs().at(model::operation::LogicalOrNode::Input::INPUT1)}; - if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape())) - { - const auto broadcast_rank = - std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank()); - const_cast<::neurun::model::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank); - const_cast<::neurun::model::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank); - } - // Construct operation parameters struct Param { @@ -2587,14 +2490,6 @@ void StageGenerator::visit(const model::operation::SquaredDifferenceNode &node) const auto lhs_index{node.getInputs().at(model::operation::SquaredDifferenceNode::Input::LHS)}; const auto rhs_index{node.getInputs().at(model::operation::SquaredDifferenceNode::Input::RHS)}; - if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) - { - const auto broadcast_rank = - std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); - const_cast(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank); - const_cast(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank); - } - // Construct operation parameters struct Param { @@ -2719,10 +2614,6 @@ void StageGenerator::visit(const model::operation::GatherNode &node) auto tensors = _tensor_builder; - tensors->dimCorrection(ofm_index, false); - tensors->dimCorrection(ifm_index, false); - tensors->dimCorrection(indices_index, false); - returnStage([tensors, param](IExecutionBuilder &builder) { auto ofm_alloc = tensors->at(param.ofm_index).get(); auto ifm_alloc = tensors->at(param.ifm_index).get(); @@ -2852,9 +2743,6 @@ void StageGenerator::visit(const model::operation::ArgMaxNode &node) assert(axis_shape.rank() == 1); assert((ifm_shape.rank() - 1) == ofm_shape.rank()); - _tensor_builder->dimCorrection(ofm_index, false); - _tensor_builder->dimCorrection(ifm_index, false); - std::vector l_axis; const int axis_size = axis_shape.num_elements(); auto axis_base = _ctx.at(axis_index).data().base(); @@ -3302,12 +3190,8 @@ void StageGenerator::visit(const model::operation::SplitNode &node) param.axis = acl_common::ToARMComputeAxis(ifm_rank, param.axis).value(); param.ifm_rank = ifm_rank; - _tensor_builder->dimCorrection(input_index, false); for (const auto &e : node.getOutputs()) - { param.output_indexes.emplace_back(e); - _tensor_builder->dimCorrection(e, false); - } auto tensors = _tensor_builder; @@ -3365,12 +3249,8 @@ void StageGenerator::visit(const model::operation::UnpackNode &node) param.axis += input_rank; param.axis = acl_common::ToARMComputeAxis(input_rank, param.axis).value(); - _tensor_builder->dimCorrection(input_index, false); for (const auto &output_index : node.getOutputs()) - { param.output_indexes.emplace_back(output_index); - _tensor_builder->dimCorrection(output_index, false); - } auto tensors = _tensor_builder; @@ -3437,9 +3317,6 @@ void StageGenerator::visit(const model::operation::PadNode &node) auto tensors = _tensor_builder; - _tensor_builder->dimCorrection(input_index, false); - _tensor_builder->dimCorrection(output_index, false); - returnStage([tensors, param](IExecutionBuilder &builder) { auto input = tensors->at(param.input_index).get()->handle(); auto output = tensors->at(param.output_index).get()->handle(); diff --git a/runtimes/neurun/backend/acl_neon/StageGenerator.cc b/runtimes/neurun/backend/acl_neon/StageGenerator.cc index 181336c..0da3bdc 100644 --- a/runtimes/neurun/backend/acl_neon/StageGenerator.cc +++ b/runtimes/neurun/backend/acl_neon/StageGenerator.cc @@ -436,12 +436,8 @@ void StageGenerator::visit(const model::operation::ConcatNode &node) Param param; param.output_index = ofm_index; - _tensor_builder->dimCorrection(ofm_index, false); for (const auto &e : node.getInputs()) - { param.input_indexes.emplace_back(e); - _tensor_builder->dimCorrection(e, false); - } param.axis = _ctx.at(axis_index).asScalar(); auto tensors = _tensor_builder; @@ -517,8 +513,6 @@ void StageGenerator::visit(const model::operation::FullyConnectedNode &node) UNUSED_RELEASE(feature_size); assert(feature_size == batch_size * input_size); - tensors->dimCorrection(input_index, false); - // for reshaping needs_reshape = true; reshape.dim(0) = batch_size; /* H */ @@ -580,17 +574,6 @@ void StageGenerator::visit(const model::operation::MulNode &node) const auto lhs_index{node.getInputs().at(model::operation::MulNode::Input::LHS)}; const auto rhs_index{node.getInputs().at(model::operation::MulNode::Input::RHS)}; - if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) - { - const auto broadcast_rank = - std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); - auto lhs_shape = _ctx.at(lhs_index).shape(); - auto rhs_shape = _ctx.at(rhs_index).shape(); - - lhs_shape.extendRank(broadcast_rank); - rhs_shape.extendRank(broadcast_rank); - } - struct Param { model::OperandIndex ofm_index; @@ -602,11 +585,6 @@ void StageGenerator::visit(const model::operation::MulNode &node) // TODO: fix, tests are failing throw std::runtime_error("NYI"); - // Nontrivial broadcasting isn't supported yet - if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) - { - throw std::runtime_error("NYI"); - } Param param; @@ -659,9 +637,6 @@ void StageGenerator::visit(const model::operation::ReshapeNode &node) auto tensors = _tensor_builder; - tensors->dimCorrection(input_index, false); - tensors->dimCorrection(output_index, false); - returnStage([tensors, param](IExecutionBuilder &builder) { auto output_alloc = tensors->at(param.output_index).get(); auto input_alloc = tensors->at(param.input_index).get(); @@ -751,14 +726,6 @@ void StageGenerator::visit(const model::operation::AddNode &node) const auto lhs_index{node.getInputs().at(model::operation::AddNode::Input::LHS)}; const auto rhs_index{node.getInputs().at(model::operation::AddNode::Input::RHS)}; - if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape())) - { - const auto broadcast_rank = - std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank()); - const_cast<::neurun::model::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank); - const_cast<::neurun::model::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank); - } - struct Param { model::OperandIndex ofm_index; diff --git a/runtimes/neurun/backend/cpu/StageGenerator.cc b/runtimes/neurun/backend/cpu/StageGenerator.cc index c89c669..6612638 100644 --- a/runtimes/neurun/backend/cpu/StageGenerator.cc +++ b/runtimes/neurun/backend/cpu/StageGenerator.cc @@ -506,13 +506,6 @@ void StageGenerator::visit(const model::operation::AddNode &node) const auto lhs_index{node.getInputs().at(model::operation::AddNode::Input::LHS)}; const auto rhs_index{node.getInputs().at(model::operation::AddNode::Input::RHS)}; - // Broadcasting and quantization - if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()) || - _ctx.at(lhs_index).typeInfo().type() == model::DataType::QUANT8_ASYMM) - { - throw std::runtime_error{"NYI"}; - } - struct Param { model::OperandIndex ofm_index; diff --git a/runtimes/neurun/core/src/compiler/ExecutorFactory.cc b/runtimes/neurun/core/src/compiler/ExecutorFactory.cc index d0b1623..2b30ca8 100644 --- a/runtimes/neurun/core/src/compiler/ExecutorFactory.cc +++ b/runtimes/neurun/core/src/compiler/ExecutorFactory.cc @@ -28,6 +28,7 @@ #include "OperationValidator.h" #include "SubTensorAnalyzer.h" #include "PlanBuilder.h" +#include "backend/IShapeFixer.h" #include "ConstantInitializer.h" #include "cpp14/memory.h" @@ -93,19 +94,17 @@ exec::IExecutor *ExecutorFactory::createLinearExecutor(graph::Graph &graph) PlanBuilder plan_builder{*operand_context, *operation_sequence}; - // Plan building + // Fix shapes linear->iterate([&](const linear::Element &element) { auto backend = element.lower_info->backend(); - - // Generate Stage - auto stage_gen = backend->stage_gen(); - plan_builder.addStage(stage_gen->generate(*element.subgraph)); + auto shape_fixer = backend->shape_fixer(); + shape_fixer->fix(*element.subgraph); }); auto tensor_builders = linear->planTensors(); // TODO Add optimization passes - plan_builder.finalize(tensor_builders); + plan_builder.finalize(linear.get(), tensor_builders); ConstantInitializer{graph, *operand_context, *linear->getLowerInfo()}(); @@ -136,13 +135,12 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(graph::Graph &graph, bo subg.accept(subtensor_analyzer); }); + // Fix shapes graph.subg_ctx().iterate( [&](const model::SubgraphIndex &subg_index, const model::Subgraph &subg) { auto backend = graph.getLowerInfo(subg_index)->backend(); - - // Generate Stage - auto stage_gen = backend->stage_gen(); - stages[subg_index] = stage_gen->generate(subg); + auto shape_fixer = backend->shape_fixer(); + shape_fixer->fix(subg); }); backend::TensorBuilderSet tensor_builders; @@ -193,9 +191,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(graph::Graph &graph, bo }); } - // TODO Extract this to another class - // IExecutionBuilder should be moved to `compiler/IExecutionBuilder.h` from - // `backend/IStageGenerator.h`. + // TODO Extract this to another file class ExecutionBuilder : public IExecutionBuilder { public: @@ -221,12 +217,15 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(graph::Graph &graph, bo ExecutionBuilder execution_builder; - for (auto &&itr : stages) - { - // TODO This approach is temporal. See declaration of `setNextIndex`. - execution_builder.setNextIndex(itr.first); - (*itr.second)(execution_builder); - } + // Generate and process stages + graph.subg_ctx().iterate( + [&](const model::SubgraphIndex &subg_index, const model::Subgraph &subg) { + auto backend = graph.getLowerInfo(subg_index)->backend(); + auto stage_gen = backend->stage_gen(); + // TODO This approach is temporal. See declaration of `setNextIndex`. + execution_builder.setNextIndex(subg_index); + (*stage_gen->generate(subg))(execution_builder); + }); for (const auto &tensor_builder : tensor_builders) { diff --git a/runtimes/neurun/core/src/compiler/PlanBuilder.cc b/runtimes/neurun/core/src/compiler/PlanBuilder.cc index dbe8b27..691b41a 100644 --- a/runtimes/neurun/core/src/compiler/PlanBuilder.cc +++ b/runtimes/neurun/core/src/compiler/PlanBuilder.cc @@ -17,6 +17,8 @@ #include "PlanBuilder.h" #include "backend/operand/IObject.h" +#include "linear/Linear.h" +#include "backend/Backend.h" namespace neurun { @@ -28,7 +30,8 @@ void PlanBuilder::addStage(std::unique_ptr stage) _stages.emplace_back(std::move(stage)); } -void PlanBuilder::finalize(const backend::TensorBuilderSet &tensor_builders) +void PlanBuilder::finalize(const linear::Linear *linear, + const backend::TensorBuilderSet &tensor_builders) { // Prepare tensors for (auto &tensor_builder : tensor_builders) @@ -42,13 +45,13 @@ void PlanBuilder::finalize(const backend::TensorBuilderSet &tensor_builders) }); } - // Process Stage + // Generate and process stages ExecutionBuilder execution_builder{_operations}; - - for (const auto &stage : _stages) - { - (*stage)(execution_builder); - } + linear->iterate([&](const linear::Element &element) { + auto backend = element.lower_info->backend(); + auto stage_gen = backend->stage_gen(); + (*stage_gen->generate(*element.subgraph))(execution_builder); + }); // Allocate Tensor Memory for cl_tensors for (auto &tensor_builder : tensor_builders) @@ -57,5 +60,5 @@ void PlanBuilder::finalize(const backend::TensorBuilderSet &tensor_builders) } } -} // namepsace compiler +} // namespace compiler } // namespace neurun diff --git a/runtimes/neurun/core/src/compiler/PlanBuilder.h b/runtimes/neurun/core/src/compiler/PlanBuilder.h index 58fde49..7d62b89 100644 --- a/runtimes/neurun/core/src/compiler/PlanBuilder.h +++ b/runtimes/neurun/core/src/compiler/PlanBuilder.h @@ -23,6 +23,7 @@ #include "backend/IStageGenerator.h" #include "backend/ITensorBuilder.h" #include "backend/IStage.h" +#include "linear/Linear.h" namespace neurun { @@ -61,7 +62,7 @@ public: public: // TODO Remove the argument `tensor_builders` - void finalize(const backend::TensorBuilderSet &tensor_builders); + void finalize(const linear::Linear *linear, const backend::TensorBuilderSet &tensor_builders); private: OperandContext &_operands; -- 2.7.4