From b3c78cb5f98801d93d944e5170583ac2fe59df0a Mon Sep 17 00:00:00 2001 From: =?utf8?q?=D0=A1=D0=B5=D1=80=D0=B3=D0=B5=D0=B9=20=D0=91=D0=B0=D1=80?= =?utf8?q?=D0=B0=D0=BD=D0=BD=D0=B8=D0=BA=D0=BE=D0=B2/AI=20Tools=20Lab=20/S?= =?utf8?q?RR/Engineer/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Mon, 3 Dec 2018 14:05:51 +0300 Subject: [PATCH] [nnc] Implementation of Transpose operation (#2464) * Add `Transpose` operation to ModelIR; * Support `Transpose` operation in interpreter and soft backend. Signed-off-by: Sergei Barannikov --- contrib/nnc/core/CMakeLists.txt | 1 + contrib/nnc/core/modelIR/IrDotDumper.cpp | 8 +++ contrib/nnc/core/modelIR/Operation.cpp | 1 + .../nnc/core/modelIR/operations/TransposeOp.cpp | 40 +++++++++++++ contrib/nnc/include/core/modelIR/IrDotDumper.h | 2 + .../include/core/modelIR/operations/ReduceFOp.h | 2 +- .../include/core/modelIR/operations/TransposeOp.h | 43 ++++++++++++++ .../core/modelIR/operations/operations.lst.h | 3 +- .../passes/acl_soft_backend/AclCppOpGenerator.h | 1 + .../nnc/include/passes/interpreter/Interpreter.h | 1 + .../passes/acl_soft_backend/AclCppOpGenerator.cpp | 4 ++ contrib/nnc/passes/interpreter/Interpreter.cpp | 9 +++ contrib/nnc/passes/interpreter/ops/Transpose.cpp | 49 ++++++++++++++++ contrib/nnc/passes/interpreter/ops/Transpose.h | 38 +++++++++++++ contrib/nnc/passes/soft_backend/CPPGenerator.cpp | 2 + contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp | 5 ++ contrib/nnc/passes/soft_backend/ModelAnalyzer.h | 1 + contrib/nnc/passes/soft_backend/SBSerializer.cpp | 13 +++++ contrib/nnc/passes/soft_backend/SBSerializer.h | 1 + .../code_snippets/cpp_common_funcs.def | 5 ++ .../soft_backend/code_snippets/cpp_operations.def | 15 +++++ .../soft_backend/code_snippets/cpp_transpose.def | 65 ++++++++++++++++++++++ .../nnc/unittests/soft_backend/CPPOperations.cpp | 1 + 23 files changed, 308 insertions(+), 2 deletions(-) create mode 100644 contrib/nnc/core/modelIR/operations/TransposeOp.cpp create mode 100644 contrib/nnc/include/core/modelIR/operations/TransposeOp.h create mode 100644 contrib/nnc/passes/interpreter/ops/Transpose.cpp create mode 100644 contrib/nnc/passes/interpreter/ops/Transpose.h create mode 100644 contrib/nnc/passes/soft_backend/code_snippets/cpp_transpose.def diff --git a/contrib/nnc/core/CMakeLists.txt b/contrib/nnc/core/CMakeLists.txt index 598092d..cc4f450 100644 --- a/contrib/nnc/core/CMakeLists.txt +++ b/contrib/nnc/core/CMakeLists.txt @@ -6,6 +6,7 @@ set(SOURCES "modelIR/operations/ConcatOp.cpp" "modelIR/operations/PadOp.cpp" "modelIR/operations/PoolOp.cpp" "modelIR/operations/SqueezeOp.cpp" + "modelIR/operations/TransposeOp.cpp" "modelIR/Graph.cpp" "modelIR/Index.cpp" "modelIR/ir_dot_builder.cpp" diff --git a/contrib/nnc/core/modelIR/IrDotDumper.cpp b/contrib/nnc/core/modelIR/IrDotDumper.cpp index 558dc05..5e09f69 100644 --- a/contrib/nnc/core/modelIR/IrDotDumper.cpp +++ b/contrib/nnc/core/modelIR/IrDotDumper.cpp @@ -253,6 +253,14 @@ void IrDotDumper::visit(ops::ResizeOp& op) { dotBuilder.updateWithOp(&op, node_info); } +void IrDotDumper::visit(ops::TransposeOp& op) { + auto node_info = DotIrNodeInfo().withType("TransposeOp", op.getName()) + .withInShapes(getInputShapes(op)) + .withOutShapes(getOutputShapes(op)); + + dotBuilder.updateWithOp(&op, node_info); +} + } // namespace mir } // namespace nnc diff --git a/contrib/nnc/core/modelIR/Operation.cpp b/contrib/nnc/core/modelIR/Operation.cpp index 90598fb..4952d32 100644 --- a/contrib/nnc/core/modelIR/Operation.cpp +++ b/contrib/nnc/core/modelIR/Operation.cpp @@ -38,6 +38,7 @@ #include "core/modelIR/operations/ReshapeOp.h" #include "core/modelIR/operations/PadOp.h" #include "core/modelIR/operations/ReduceFOp.h" +#include "core/modelIR/operations/TransposeOp.h" namespace nnc { namespace mir { diff --git a/contrib/nnc/core/modelIR/operations/TransposeOp.cpp b/contrib/nnc/core/modelIR/operations/TransposeOp.cpp new file mode 100644 index 0000000..663b534 --- /dev/null +++ b/contrib/nnc/core/modelIR/operations/TransposeOp.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "core/modelIR/operations/TransposeOp.h" + +namespace nnc { +namespace mir { +namespace ops { + +TransposeOp::TransposeOp(const IODescriptor& arg, const std::vector& axis_order) + : Operation(Type::transpose, {arg}), _axisOrder(axis_order) { + assert(_axisOrder.size() == static_cast(getInputShape(0).rank())); + inferOutputShapes(); +} + +void TransposeOp::inferOutputShapes() { + auto& input_shape = getInputShape(0); + Shape output_shape; + output_shape.resize(input_shape.rank()); + for (std::size_t i = 0; i < _axisOrder.size(); ++i) + output_shape.dim(i) = input_shape.dim(static_cast(_axisOrder.at(i))); + setOutputShape(0, output_shape); +} + +} // namespace ops +} // namespace mir +} // namespace nnc diff --git a/contrib/nnc/include/core/modelIR/IrDotDumper.h b/contrib/nnc/include/core/modelIR/IrDotDumper.h index 2ab5c3b..cc54ab3 100644 --- a/contrib/nnc/include/core/modelIR/IrDotDumper.h +++ b/contrib/nnc/include/core/modelIR/IrDotDumper.h @@ -41,6 +41,7 @@ #include "core/modelIR/operations/SqueezeOp.h" #include "core/modelIR/operations/PadOp.h" #include "core/modelIR/operations/ReduceFOp.h" +#include "core/modelIR/operations/TransposeOp.h" #include "core/modelIR/ir_dot_builder.h" @@ -78,6 +79,7 @@ public: void visit(ops::SqueezeOp& op) override; void visit(ops::PadOp& op) override; void visit(ops::ReduceFOp& op) override; + void visit(ops::TransposeOp& op) override; void writeDot(std::ostream &os) { dotBuilder.writeDot(os); }; diff --git a/contrib/nnc/include/core/modelIR/operations/ReduceFOp.h b/contrib/nnc/include/core/modelIR/operations/ReduceFOp.h index 2c87673..e247434 100644 --- a/contrib/nnc/include/core/modelIR/operations/ReduceFOp.h +++ b/contrib/nnc/include/core/modelIR/operations/ReduceFOp.h @@ -40,7 +40,7 @@ public: const std::vector& reduce_dims, bool keep_dims, FuncType func_type) - : Operation(Type::reduceFOp, {arg}), _reduceDims(reduce_dims), _keepDims(keep_dims), + : Operation(Type::reduceF, {arg}), _reduceDims(reduce_dims), _keepDims(keep_dims), _funcType(func_type) { // Infer output shapes. const auto& input_shape = getInputShape(0); diff --git a/contrib/nnc/include/core/modelIR/operations/TransposeOp.h b/contrib/nnc/include/core/modelIR/operations/TransposeOp.h new file mode 100644 index 0000000..31f8d58 --- /dev/null +++ b/contrib/nnc/include/core/modelIR/operations/TransposeOp.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _NNC_CORE_IR_MODEL_TRANSPOSE_H_ +#define _NNC_CORE_IR_MODEL_TRANSPOSE_H_ + +#include "core/modelIR/Operation.h" +#include + +namespace nnc { +namespace mir { +namespace ops { + +class TransposeOp : public Operation { +public: + TransposeOp(const IODescriptor& arg, const std::vector& axis_order); + + const std::vector& getAxisOrder() const { return _axisOrder; } + +private: + void inferOutputShapes(); + + std::vector _axisOrder; +}; + +} // namespace ops +} // namespace mir +} // namespace nnc + +#endif //_NNC_CORE_IR_MODEL_TRANSPOSE_H_ diff --git a/contrib/nnc/include/core/modelIR/operations/operations.lst.h b/contrib/nnc/include/core/modelIR/operations/operations.lst.h index c09a1f5..040070b 100644 --- a/contrib/nnc/include/core/modelIR/operations/operations.lst.h +++ b/contrib/nnc/include/core/modelIR/operations/operations.lst.h @@ -40,4 +40,5 @@ HANDLE_OP(deConv2D, DeConv2DOp) HANDLE_OP(ELU, EluOp) HANDLE_OP(squeeze, SqueezeOp) HANDLE_OP(pad, PadOp) -HANDLE_OP(reduceFOp, ReduceFOp) +HANDLE_OP(reduceF, ReduceFOp) +HANDLE_OP(transpose, TransposeOp) diff --git a/contrib/nnc/include/passes/acl_soft_backend/AclCppOpGenerator.h b/contrib/nnc/include/passes/acl_soft_backend/AclCppOpGenerator.h index 86b2220..077049b 100644 --- a/contrib/nnc/include/passes/acl_soft_backend/AclCppOpGenerator.h +++ b/contrib/nnc/include/passes/acl_soft_backend/AclCppOpGenerator.h @@ -70,6 +70,7 @@ public: void visit(mir::ops::SqueezeOp& op) override; void visit(mir::ops::PadOp& op) override; void visit(mir::ops::ReduceFOp& op) override; + void visit(mir::ops::TransposeOp& op) override; private: using AF = ArtifactFactory; diff --git a/contrib/nnc/include/passes/interpreter/Interpreter.h b/contrib/nnc/include/passes/interpreter/Interpreter.h index ccdb301..ce4d938 100644 --- a/contrib/nnc/include/passes/interpreter/Interpreter.h +++ b/contrib/nnc/include/passes/interpreter/Interpreter.h @@ -59,6 +59,7 @@ public: void visit(ops::SqueezeOp& op) override; void visit(ops::PadOp& op) override; void visit(ops::ReduceFOp& op) override; + void visit(ops::TransposeOp& op) override; void setInput(const std::string &name, const TensorVariant& data); std::vector &getResult(Operation* op); diff --git a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp index c04d3c1..3546c32 100644 --- a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp +++ b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp @@ -922,5 +922,9 @@ void AclCppOpGenerator::visit(mir::ops::ReduceFOp& op) { assert(false && "Unimplemented operation: ReduceFOp"); } +void AclCppOpGenerator::visit(mir::ops::TransposeOp& op) { + assert(false && "Unimplemented operation: TransposeOp"); +} + } // namespace nnc diff --git a/contrib/nnc/passes/interpreter/Interpreter.cpp b/contrib/nnc/passes/interpreter/Interpreter.cpp index 4b7e6ff..9938357 100644 --- a/contrib/nnc/passes/interpreter/Interpreter.cpp +++ b/contrib/nnc/passes/interpreter/Interpreter.cpp @@ -43,6 +43,7 @@ #include "core/modelIR/operations/ElementwiseOp.h" #include "core/modelIR/operations/SqueezeOp.h" #include "core/modelIR/operations/PadOp.h" +#include "core/modelIR/operations/TransposeOp.h" #include "ops/Bias.h" #include "ops/Concat.h" @@ -54,6 +55,7 @@ #include "ops/Reshape.h" #include "ops/Softmax.h" #include "ops/Scale.h" +#include "ops/Transpose.h" #include "ops/Dropout.h" #include "ops/BatchNorm.h" #include "ops/Pad.h" @@ -339,4 +341,11 @@ void NNInterpreter::visit(ops::ReduceFOp& op) { } } +void NNInterpreter::visit(ops::TransposeOp& op) { + mapByName(&op); + auto operand = op.getPrevNodes()[0]; + auto& input = var(operand.op->getId())[operand.index]; + var(op.getId()) = Transpose(input, op)(); +} + } // namespace nnc diff --git a/contrib/nnc/passes/interpreter/ops/Transpose.cpp b/contrib/nnc/passes/interpreter/ops/Transpose.cpp new file mode 100644 index 0000000..fba948d --- /dev/null +++ b/contrib/nnc/passes/interpreter/ops/Transpose.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Transpose.h" +#include "core/modelIR/Tensor.h" +#include "core/modelIR/ShapeRange.h" + +namespace nnc { + +using namespace mir; + +Transpose::Transpose(const mir::TensorVariant& input, + const mir::ops::TransposeOp& op) : _op(op), _input(input) {} + +std::vector Transpose::operator()() { + auto res = allocate_tensor(_op.getOutputShape(0)); + Tensor res_accessor(res); + + auto& input_shape = _op.getInputShape(0); + auto& axis_order = _op.getAxisOrder(); + std::size_t num_axes = axis_order.size(); + + ShapeRange in_range(input_shape); + Index out_index; + out_index.resize(input_shape.rank()); + + for (auto& in_index : in_range) { + for (std::size_t i = 0; i < num_axes; ++i) + out_index.at(static_cast(i)) = in_index.at(static_cast(axis_order.at(i))); + res_accessor.at(out_index) = _input.at(in_index); + } + + return {res}; +} + +} diff --git a/contrib/nnc/passes/interpreter/ops/Transpose.h b/contrib/nnc/passes/interpreter/ops/Transpose.h new file mode 100644 index 0000000..97879aa --- /dev/null +++ b/contrib/nnc/passes/interpreter/ops/Transpose.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _NNC_CORE_BACKEND_INTERPRETER_TRANSPOSE_ +#define _NNC_CORE_BACKEND_INTERPRETER_TRANSPOSE_ + +#include "OperationImpl.h" +#include "core/modelIR/operations/TransposeOp.h" + +namespace nnc { + +class Transpose : public OperationImpl { +public: + std::vector operator()() override; + + Transpose(const mir::TensorVariant& input, const mir::ops::TransposeOp& op); + +private: + const mir::ops::TransposeOp& _op; + const mir::Tensor _input; +}; + +} + +#endif //_NNC_CORE_BACKEND_INTERPRETER_TRANSPOSE_ diff --git a/contrib/nnc/passes/soft_backend/CPPGenerator.cpp b/contrib/nnc/passes/soft_backend/CPPGenerator.cpp index 30565ad..2ed4343 100644 --- a/contrib/nnc/passes/soft_backend/CPPGenerator.cpp +++ b/contrib/nnc/passes/soft_backend/CPPGenerator.cpp @@ -45,6 +45,7 @@ using namespace std; #include "cpp_tanh.generated.h" #include "cpp_elementwise.generated.h" #include "cpp_pad.generated.h" +#include "cpp_transpose.generated.h" namespace nnc { @@ -287,6 +288,7 @@ void CPPCodeGenerator::materializeCode(ostream &out, const ModelAnalyzer &ma, co out.write(cpp_tanh, sizeof(cpp_tanh)); out.write(cpp_pad, sizeof(cpp_pad)); out.write(cpp_conv_transpose, sizeof(cpp_conv_transpose)); + out.write(cpp_transpose, sizeof(cpp_transpose)); out.write(cpp_operations, sizeof(cpp_operations)); out.write(cpp_scale, sizeof(cpp_scale)); out.write(cpp_dropout, sizeof(cpp_dropout)); diff --git a/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp b/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp index d3e459e..ddb73b5 100644 --- a/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp +++ b/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp @@ -46,6 +46,7 @@ #include "core/modelIR/operations/SqueezeOp.h" #include "core/modelIR/operations/PadOp.h" #include "core/modelIR/operations/ReduceFOp.h" +#include "core/modelIR/operations/TransposeOp.h" using namespace std; @@ -293,4 +294,8 @@ void ModelAnalyzer::visit(mir::ops::ReduceFOp& op) { addOpDescr(&op, "ReduceMean"); } +void ModelAnalyzer::visit(mir::ops::TransposeOp& op) { + addOpDescr(&op, "transpose"); +} + } // namespace nnc diff --git a/contrib/nnc/passes/soft_backend/ModelAnalyzer.h b/contrib/nnc/passes/soft_backend/ModelAnalyzer.h index fb1fcf3..0b16711 100644 --- a/contrib/nnc/passes/soft_backend/ModelAnalyzer.h +++ b/contrib/nnc/passes/soft_backend/ModelAnalyzer.h @@ -112,6 +112,7 @@ public: void visit(mir::ops::SqueezeOp& op) override; void visit(mir::ops::PadOp& op) override; void visit(mir::ops::ReduceFOp& op) override; + void visit(mir::ops::TransposeOp& op) override; /** * @return vector of id's of network input tensors diff --git a/contrib/nnc/passes/soft_backend/SBSerializer.cpp b/contrib/nnc/passes/soft_backend/SBSerializer.cpp index dba8621..d167924 100644 --- a/contrib/nnc/passes/soft_backend/SBSerializer.cpp +++ b/contrib/nnc/passes/soft_backend/SBSerializer.cpp @@ -42,6 +42,7 @@ #include "core/modelIR/operations/SqueezeOp.h" #include "core/modelIR/operations/PadOp.h" #include "core/modelIR/operations/ReduceFOp.h" +#include "core/modelIR/operations/TransposeOp.h" #include "pass/PassException.h" #include @@ -355,4 +356,16 @@ void Serializer::visit(mir::ops::ReduceFOp& op) { serializeShape(op.getOutputShape(0)); } +void Serializer::visit(mir::ops::TransposeOp& op) { + _curOp->_paramStartOffset = _buffer.size(); + // serializer parameters + auto& axis_order = op.getAxisOrder(); + serializeT(static_cast(axis_order.size())); + for (std::size_t i = 0; i < axis_order.size(); ++i) { + serializeT(static_cast(axis_order.at(i))); + } + // serialize output shape + serializeShape(op.getOutputShape(0)); +} + } // namespace nnc diff --git a/contrib/nnc/passes/soft_backend/SBSerializer.h b/contrib/nnc/passes/soft_backend/SBSerializer.h index 6d01a27..dcac3cb 100644 --- a/contrib/nnc/passes/soft_backend/SBSerializer.h +++ b/contrib/nnc/passes/soft_backend/SBSerializer.h @@ -64,6 +64,7 @@ public: void visit(mir::ops::SqueezeOp& op) override; void visit(mir::ops::PadOp& op) override; void visit(mir::ops::ReduceFOp& op) override; + void visit(mir::ops::TransposeOp& op) override; void serialize(std::list &inferenceSequence); diff --git a/contrib/nnc/passes/soft_backend/code_snippets/cpp_common_funcs.def b/contrib/nnc/passes/soft_backend/code_snippets/cpp_common_funcs.def index 2f4a271..24f95c3 100644 --- a/contrib/nnc/passes/soft_backend/code_snippets/cpp_common_funcs.def +++ b/contrib/nnc/passes/soft_backend/code_snippets/cpp_common_funcs.def @@ -526,3 +526,8 @@ inline int Offset(const Dims<4>& dims, int* index) { inline int Offset(const RuntimeShape& shape, int* index) { return Offset(shape, index[0], index[1], index[2], index[3]); } + +struct TransposeParams { + int8 perm_count; + int32 perm[4]; +}; diff --git a/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def b/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def index e78dc97..c9d7d5a 100644 --- a/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def +++ b/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def @@ -536,3 +536,18 @@ void pad(Tensor& out, const char* params, const Tensor& in) { Pad(input, input_dims, left_paddings, right_paddings, output, output_dims); } + +void transpose(Tensor &out, const char *params, const Tensor &in) { + TransposeParams transpose_params; + transpose_params.perm_count = deserializeT(params); + for (int i = 0; i < transpose_params.perm_count; ++i) + transpose_params.perm[i] = deserializeT(params); + + Shape out_s = deserializeShape(params); + assert(out_s.getNumElems() == in.getShape().getNumElems()); + out.reShape(out_s); + + Transpose(transpose_params, + shapeToRuntimeShape(in.getShape()), in.getData(), + shapeToRuntimeShape(out.getShape()), out.getData()); +} diff --git a/contrib/nnc/passes/soft_backend/code_snippets/cpp_transpose.def b/contrib/nnc/passes/soft_backend/code_snippets/cpp_transpose.def new file mode 100644 index 0000000..30bd4df --- /dev/null +++ b/contrib/nnc/passes/soft_backend/code_snippets/cpp_transpose.def @@ -0,0 +1,65 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +template +void Transpose(const TransposeParams& params, + const RuntimeShape& unextended_input_shape, const T* input_data, + const RuntimeShape& unextended_output_shape, T* output_data) { + const int unextended_output_size = unextended_output_shape.DimensionsCount(); + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_size, 4); + TFLITE_DCHECK_EQ(unextended_output_size, params.perm_count); + const RuntimeShape input_shape = + RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); + const int input_ext_size = 4 - unextended_input_shape.DimensionsCount(); + const int output_ext_size = 4 - unextended_output_size; + + // The perm data is extended to match the output, each index incremented by + // the amount of front padding of the input shape. + int extended_perm[4]; + for (int i = 0; i < output_ext_size; ++i) { + extended_perm[i] = i; + } + for (int i = 0; i < unextended_output_size; ++i) { + extended_perm[i + output_ext_size] = params.perm[i] + input_ext_size; + } + + int out_sizes[4]; + // Compute the inverse permutation array so we can do an output centered + // transpose. Also, check to make sure output_dims is matching input_dims. + for (int k = 0; k < 4; k++) { + out_sizes[k] = MatchingDim(input_shape, extended_perm[k], output_shape, k); + } + + // Naive transpose loop (iterate on output index and compute input index). + int o[4]; // loop index (on output). + int i[4]; + for (o[3] = 0; o[3] < out_sizes[3]; o[3]++) { + i[extended_perm[3]] = o[3]; + for (o[2] = 0; o[2] < out_sizes[2]; o[2]++) { + i[extended_perm[2]] = o[2]; + for (o[1] = 0; o[1] < out_sizes[1]; o[1]++) { + i[extended_perm[1]] = o[1]; + for (o[0] = 0; o[0] < out_sizes[0]; o[0]++) { + i[extended_perm[0]] = o[0]; + output_data[Offset(output_shape, o)] = + input_data[Offset(input_shape, i)]; + } + } + } + } +} diff --git a/contrib/nnc/unittests/soft_backend/CPPOperations.cpp b/contrib/nnc/unittests/soft_backend/CPPOperations.cpp index 31c30aa..31e649c 100644 --- a/contrib/nnc/unittests/soft_backend/CPPOperations.cpp +++ b/contrib/nnc/unittests/soft_backend/CPPOperations.cpp @@ -40,6 +40,7 @@ #include "code_snippets/cpp_elementwise.def" #include "code_snippets/cpp_tanh.def" #include "code_snippets/cpp_pad.def" +#include "code_snippets/cpp_transpose.def" #include "CommonData.def" #include "code_snippets/cpp_header_types.def" -- 2.7.4