From b3c78cb5f98801d93d944e5170583ac2fe59df0a Mon Sep 17 00:00:00 2001
From: =?utf8?q?=D0=A1=D0=B5=D1=80=D0=B3=D0=B5=D0=B9=20=D0=91=D0=B0=D1=80?=
 =?utf8?q?=D0=B0=D0=BD=D0=BD=D0=B8=D0=BA=D0=BE=D0=B2/AI=20Tools=20Lab=20/S?=
 =?utf8?q?RR/Engineer/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?=
 <s.barannikov@samsung.com>
Date: Mon, 3 Dec 2018 14:05:51 +0300
Subject: [PATCH] [nnc] Implementation of Transpose operation (#2464)

* Add `Transpose` operation to ModelIR;
* Support `Transpose` operation in interpreter and soft backend.

Signed-off-by: Sergei Barannikov <s.barannikov@samsung.com>
---
 contrib/nnc/core/CMakeLists.txt                    |  1 +
 contrib/nnc/core/modelIR/IrDotDumper.cpp           |  8 +++
 contrib/nnc/core/modelIR/Operation.cpp             |  1 +
 .../nnc/core/modelIR/operations/TransposeOp.cpp    | 40 +++++++++++++
 contrib/nnc/include/core/modelIR/IrDotDumper.h     |  2 +
 .../include/core/modelIR/operations/ReduceFOp.h    |  2 +-
 .../include/core/modelIR/operations/TransposeOp.h  | 43 ++++++++++++++
 .../core/modelIR/operations/operations.lst.h       |  3 +-
 .../passes/acl_soft_backend/AclCppOpGenerator.h    |  1 +
 .../nnc/include/passes/interpreter/Interpreter.h   |  1 +
 .../passes/acl_soft_backend/AclCppOpGenerator.cpp  |  4 ++
 contrib/nnc/passes/interpreter/Interpreter.cpp     |  9 +++
 contrib/nnc/passes/interpreter/ops/Transpose.cpp   | 49 ++++++++++++++++
 contrib/nnc/passes/interpreter/ops/Transpose.h     | 38 +++++++++++++
 contrib/nnc/passes/soft_backend/CPPGenerator.cpp   |  2 +
 contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp  |  5 ++
 contrib/nnc/passes/soft_backend/ModelAnalyzer.h    |  1 +
 contrib/nnc/passes/soft_backend/SBSerializer.cpp   | 13 +++++
 contrib/nnc/passes/soft_backend/SBSerializer.h     |  1 +
 .../code_snippets/cpp_common_funcs.def             |  5 ++
 .../soft_backend/code_snippets/cpp_operations.def  | 15 +++++
 .../soft_backend/code_snippets/cpp_transpose.def   | 65 ++++++++++++++++++++++
 .../nnc/unittests/soft_backend/CPPOperations.cpp   |  1 +
 23 files changed, 308 insertions(+), 2 deletions(-)
 create mode 100644 contrib/nnc/core/modelIR/operations/TransposeOp.cpp
 create mode 100644 contrib/nnc/include/core/modelIR/operations/TransposeOp.h
 create mode 100644 contrib/nnc/passes/interpreter/ops/Transpose.cpp
 create mode 100644 contrib/nnc/passes/interpreter/ops/Transpose.h
 create mode 100644 contrib/nnc/passes/soft_backend/code_snippets/cpp_transpose.def

diff --git a/contrib/nnc/core/CMakeLists.txt b/contrib/nnc/core/CMakeLists.txt
index 598092d..cc4f450 100644
--- a/contrib/nnc/core/CMakeLists.txt
+++ b/contrib/nnc/core/CMakeLists.txt
@@ -6,6 +6,7 @@ set(SOURCES "modelIR/operations/ConcatOp.cpp"
             "modelIR/operations/PadOp.cpp"
             "modelIR/operations/PoolOp.cpp"
             "modelIR/operations/SqueezeOp.cpp"
+            "modelIR/operations/TransposeOp.cpp"
             "modelIR/Graph.cpp"
             "modelIR/Index.cpp"
             "modelIR/ir_dot_builder.cpp"
diff --git a/contrib/nnc/core/modelIR/IrDotDumper.cpp b/contrib/nnc/core/modelIR/IrDotDumper.cpp
index 558dc05..5e09f69 100644
--- a/contrib/nnc/core/modelIR/IrDotDumper.cpp
+++ b/contrib/nnc/core/modelIR/IrDotDumper.cpp
@@ -253,6 +253,14 @@ void IrDotDumper::visit(ops::ResizeOp& op) {
   dotBuilder.updateWithOp(&op, node_info);
 }
 
+void IrDotDumper::visit(ops::TransposeOp& op) {
+  auto node_info = DotIrNodeInfo().withType("TransposeOp", op.getName())
+    .withInShapes(getInputShapes(op))
+    .withOutShapes(getOutputShapes(op));
+
+  dotBuilder.updateWithOp(&op, node_info);
+}
+
 } // namespace mir
 } // namespace nnc
 
diff --git a/contrib/nnc/core/modelIR/Operation.cpp b/contrib/nnc/core/modelIR/Operation.cpp
index 90598fb..4952d32 100644
--- a/contrib/nnc/core/modelIR/Operation.cpp
+++ b/contrib/nnc/core/modelIR/Operation.cpp
@@ -38,6 +38,7 @@
 #include "core/modelIR/operations/ReshapeOp.h"
 #include "core/modelIR/operations/PadOp.h"
 #include "core/modelIR/operations/ReduceFOp.h"
+#include "core/modelIR/operations/TransposeOp.h"
 
 namespace nnc {
 namespace mir {
diff --git a/contrib/nnc/core/modelIR/operations/TransposeOp.cpp b/contrib/nnc/core/modelIR/operations/TransposeOp.cpp
new file mode 100644
index 0000000..663b534
--- /dev/null
+++ b/contrib/nnc/core/modelIR/operations/TransposeOp.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/modelIR/operations/TransposeOp.h"
+
+namespace nnc {
+namespace mir {
+namespace ops {
+
+TransposeOp::TransposeOp(const IODescriptor& arg, const std::vector<std::size_t>& axis_order)
+    : Operation(Type::transpose, {arg}), _axisOrder(axis_order) {
+  assert(_axisOrder.size() == static_cast<std::size_t>(getInputShape(0).rank()));
+  inferOutputShapes();
+}
+
+void TransposeOp::inferOutputShapes() {
+  auto& input_shape = getInputShape(0);
+  Shape output_shape;
+  output_shape.resize(input_shape.rank());
+  for (std::size_t i = 0; i < _axisOrder.size(); ++i)
+    output_shape.dim(i) = input_shape.dim(static_cast<int32_t>(_axisOrder.at(i)));
+  setOutputShape(0, output_shape);
+}
+
+} // namespace ops
+} // namespace mir
+} // namespace nnc
diff --git a/contrib/nnc/include/core/modelIR/IrDotDumper.h b/contrib/nnc/include/core/modelIR/IrDotDumper.h
index 2ab5c3b..cc54ab3 100644
--- a/contrib/nnc/include/core/modelIR/IrDotDumper.h
+++ b/contrib/nnc/include/core/modelIR/IrDotDumper.h
@@ -41,6 +41,7 @@
 #include "core/modelIR/operations/SqueezeOp.h"
 #include "core/modelIR/operations/PadOp.h"
 #include "core/modelIR/operations/ReduceFOp.h"
+#include "core/modelIR/operations/TransposeOp.h"
 
 #include "core/modelIR/ir_dot_builder.h"
 
@@ -78,6 +79,7 @@ public:
   void visit(ops::SqueezeOp& op) override;
   void visit(ops::PadOp& op) override;
   void visit(ops::ReduceFOp& op) override;
+  void visit(ops::TransposeOp& op) override;
 
   void writeDot(std::ostream &os) { dotBuilder.writeDot(os); };
 
diff --git a/contrib/nnc/include/core/modelIR/operations/ReduceFOp.h b/contrib/nnc/include/core/modelIR/operations/ReduceFOp.h
index 2c87673..e247434 100644
--- a/contrib/nnc/include/core/modelIR/operations/ReduceFOp.h
+++ b/contrib/nnc/include/core/modelIR/operations/ReduceFOp.h
@@ -40,7 +40,7 @@ public:
             const std::vector<int32_t>& reduce_dims,
             bool keep_dims,
             FuncType func_type)
-      : Operation(Type::reduceFOp, {arg}), _reduceDims(reduce_dims), _keepDims(keep_dims),
+      : Operation(Type::reduceF, {arg}), _reduceDims(reduce_dims), _keepDims(keep_dims),
         _funcType(func_type) {
     // Infer output shapes.
     const auto& input_shape = getInputShape(0);
diff --git a/contrib/nnc/include/core/modelIR/operations/TransposeOp.h b/contrib/nnc/include/core/modelIR/operations/TransposeOp.h
new file mode 100644
index 0000000..31f8d58
--- /dev/null
+++ b/contrib/nnc/include/core/modelIR/operations/TransposeOp.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_IR_MODEL_TRANSPOSE_H_
+#define _NNC_CORE_IR_MODEL_TRANSPOSE_H_
+
+#include "core/modelIR/Operation.h"
+#include <vector>
+
+namespace nnc {
+namespace mir {
+namespace ops {
+
+class TransposeOp : public Operation {
+public:
+  TransposeOp(const IODescriptor& arg, const std::vector<std::size_t>& axis_order);
+
+  const std::vector<std::size_t>& getAxisOrder() const { return _axisOrder; }
+
+private:
+  void inferOutputShapes();
+
+  std::vector<std::size_t> _axisOrder;
+};
+
+} // namespace ops
+} // namespace mir
+} // namespace nnc
+
+#endif //_NNC_CORE_IR_MODEL_TRANSPOSE_H_
diff --git a/contrib/nnc/include/core/modelIR/operations/operations.lst.h b/contrib/nnc/include/core/modelIR/operations/operations.lst.h
index c09a1f5..040070b 100644
--- a/contrib/nnc/include/core/modelIR/operations/operations.lst.h
+++ b/contrib/nnc/include/core/modelIR/operations/operations.lst.h
@@ -40,4 +40,5 @@ HANDLE_OP(deConv2D, DeConv2DOp)
 HANDLE_OP(ELU, EluOp)
 HANDLE_OP(squeeze, SqueezeOp)
 HANDLE_OP(pad, PadOp)
-HANDLE_OP(reduceFOp, ReduceFOp)
+HANDLE_OP(reduceF, ReduceFOp)
+HANDLE_OP(transpose, TransposeOp)
diff --git a/contrib/nnc/include/passes/acl_soft_backend/AclCppOpGenerator.h b/contrib/nnc/include/passes/acl_soft_backend/AclCppOpGenerator.h
index 86b2220..077049b 100644
--- a/contrib/nnc/include/passes/acl_soft_backend/AclCppOpGenerator.h
+++ b/contrib/nnc/include/passes/acl_soft_backend/AclCppOpGenerator.h
@@ -70,6 +70,7 @@ public:
   void visit(mir::ops::SqueezeOp& op) override;
   void visit(mir::ops::PadOp& op) override;
   void visit(mir::ops::ReduceFOp& op) override;
+  void visit(mir::ops::TransposeOp& op) override;
 
 private:
   using AF = ArtifactFactory;
diff --git a/contrib/nnc/include/passes/interpreter/Interpreter.h b/contrib/nnc/include/passes/interpreter/Interpreter.h
index ccdb301..ce4d938 100644
--- a/contrib/nnc/include/passes/interpreter/Interpreter.h
+++ b/contrib/nnc/include/passes/interpreter/Interpreter.h
@@ -59,6 +59,7 @@ public:
   void visit(ops::SqueezeOp& op) override;
   void visit(ops::PadOp& op) override;
   void visit(ops::ReduceFOp& op) override;
+  void visit(ops::TransposeOp& op) override;
 
   void setInput(const std::string &name, const TensorVariant& data);
   std::vector<TensorVariant> &getResult(Operation* op);
diff --git a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp
index c04d3c1..3546c32 100644
--- a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp
+++ b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp
@@ -922,5 +922,9 @@ void AclCppOpGenerator::visit(mir::ops::ReduceFOp& op) {
   assert(false && "Unimplemented operation: ReduceFOp");
 }
 
+void AclCppOpGenerator::visit(mir::ops::TransposeOp& op) {
+  assert(false && "Unimplemented operation: TransposeOp");
+}
+
 }
 // namespace nnc
diff --git a/contrib/nnc/passes/interpreter/Interpreter.cpp b/contrib/nnc/passes/interpreter/Interpreter.cpp
index 4b7e6ff..9938357 100644
--- a/contrib/nnc/passes/interpreter/Interpreter.cpp
+++ b/contrib/nnc/passes/interpreter/Interpreter.cpp
@@ -43,6 +43,7 @@
 #include "core/modelIR/operations/ElementwiseOp.h"
 #include "core/modelIR/operations/SqueezeOp.h"
 #include "core/modelIR/operations/PadOp.h"
+#include "core/modelIR/operations/TransposeOp.h"
 
 #include "ops/Bias.h"
 #include "ops/Concat.h"
@@ -54,6 +55,7 @@
 #include "ops/Reshape.h"
 #include "ops/Softmax.h"
 #include "ops/Scale.h"
+#include "ops/Transpose.h"
 #include "ops/Dropout.h"
 #include "ops/BatchNorm.h"
 #include "ops/Pad.h"
@@ -339,4 +341,11 @@ void NNInterpreter::visit(ops::ReduceFOp& op) {
   }
 }
 
+void NNInterpreter::visit(ops::TransposeOp& op) {
+  mapByName(&op);
+  auto operand = op.getPrevNodes()[0];
+  auto& input = var(operand.op->getId())[operand.index];
+  var(op.getId()) = Transpose(input, op)();
+}
+
 } // namespace nnc
diff --git a/contrib/nnc/passes/interpreter/ops/Transpose.cpp b/contrib/nnc/passes/interpreter/ops/Transpose.cpp
new file mode 100644
index 0000000..fba948d
--- /dev/null
+++ b/contrib/nnc/passes/interpreter/ops/Transpose.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Transpose.h"
+#include "core/modelIR/Tensor.h"
+#include "core/modelIR/ShapeRange.h"
+
+namespace nnc {
+
+using namespace mir;
+
+Transpose::Transpose(const mir::TensorVariant& input,
+                     const mir::ops::TransposeOp& op) : _op(op), _input(input) {}
+
+std::vector<mir::TensorVariant> Transpose::operator()() {
+  auto res = allocate_tensor(_op.getOutputShape(0));
+  Tensor<float> res_accessor(res);
+
+  auto& input_shape = _op.getInputShape(0);
+  auto& axis_order = _op.getAxisOrder();
+  std::size_t num_axes = axis_order.size();
+
+  ShapeRange in_range(input_shape);
+  Index out_index;
+  out_index.resize(input_shape.rank());
+
+  for (auto& in_index : in_range) {
+    for (std::size_t i = 0; i < num_axes; ++i)
+      out_index.at(static_cast<int32_t>(i)) = in_index.at(static_cast<int32_t>(axis_order.at(i)));
+    res_accessor.at(out_index) = _input.at(in_index);
+  }
+
+  return {res};
+}
+
+}
diff --git a/contrib/nnc/passes/interpreter/ops/Transpose.h b/contrib/nnc/passes/interpreter/ops/Transpose.h
new file mode 100644
index 0000000..97879aa
--- /dev/null
+++ b/contrib/nnc/passes/interpreter/ops/Transpose.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_TRANSPOSE_
+#define _NNC_CORE_BACKEND_INTERPRETER_TRANSPOSE_
+
+#include "OperationImpl.h"
+#include "core/modelIR/operations/TransposeOp.h"
+
+namespace nnc {
+
+class Transpose : public OperationImpl<float> {
+public:
+  std::vector<mir::TensorVariant> operator()() override;
+
+  Transpose(const mir::TensorVariant& input, const mir::ops::TransposeOp& op);
+
+private:
+  const mir::ops::TransposeOp& _op;
+  const mir::Tensor<float> _input;
+};
+
+}
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_TRANSPOSE_
diff --git a/contrib/nnc/passes/soft_backend/CPPGenerator.cpp b/contrib/nnc/passes/soft_backend/CPPGenerator.cpp
index 30565ad..2ed4343 100644
--- a/contrib/nnc/passes/soft_backend/CPPGenerator.cpp
+++ b/contrib/nnc/passes/soft_backend/CPPGenerator.cpp
@@ -45,6 +45,7 @@ using namespace std;
 #include "cpp_tanh.generated.h"
 #include "cpp_elementwise.generated.h"
 #include "cpp_pad.generated.h"
+#include "cpp_transpose.generated.h"
 
 namespace nnc
 {
@@ -287,6 +288,7 @@ void CPPCodeGenerator::materializeCode(ostream &out, const ModelAnalyzer &ma, co
   out.write(cpp_tanh, sizeof(cpp_tanh));
   out.write(cpp_pad, sizeof(cpp_pad));
   out.write(cpp_conv_transpose, sizeof(cpp_conv_transpose));
+  out.write(cpp_transpose, sizeof(cpp_transpose));
   out.write(cpp_operations, sizeof(cpp_operations));
   out.write(cpp_scale, sizeof(cpp_scale));
   out.write(cpp_dropout, sizeof(cpp_dropout));
diff --git a/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp b/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp
index d3e459e..ddb73b5 100644
--- a/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp
+++ b/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp
@@ -46,6 +46,7 @@
 #include "core/modelIR/operations/SqueezeOp.h"
 #include "core/modelIR/operations/PadOp.h"
 #include "core/modelIR/operations/ReduceFOp.h"
+#include "core/modelIR/operations/TransposeOp.h"
 
 using namespace std;
 
@@ -293,4 +294,8 @@ void ModelAnalyzer::visit(mir::ops::ReduceFOp& op) {
   addOpDescr(&op, "ReduceMean");
 }
 
+void ModelAnalyzer::visit(mir::ops::TransposeOp& op) {
+  addOpDescr(&op, "transpose");
+}
+
 } // namespace nnc
diff --git a/contrib/nnc/passes/soft_backend/ModelAnalyzer.h b/contrib/nnc/passes/soft_backend/ModelAnalyzer.h
index fb1fcf3..0b16711 100644
--- a/contrib/nnc/passes/soft_backend/ModelAnalyzer.h
+++ b/contrib/nnc/passes/soft_backend/ModelAnalyzer.h
@@ -112,6 +112,7 @@ public:
   void visit(mir::ops::SqueezeOp& op) override;
   void visit(mir::ops::PadOp& op) override;
   void visit(mir::ops::ReduceFOp& op) override;
+  void visit(mir::ops::TransposeOp& op) override;
 
   /**
    * @return vector of id's of network input tensors
diff --git a/contrib/nnc/passes/soft_backend/SBSerializer.cpp b/contrib/nnc/passes/soft_backend/SBSerializer.cpp
index dba8621..d167924 100644
--- a/contrib/nnc/passes/soft_backend/SBSerializer.cpp
+++ b/contrib/nnc/passes/soft_backend/SBSerializer.cpp
@@ -42,6 +42,7 @@
 #include "core/modelIR/operations/SqueezeOp.h"
 #include "core/modelIR/operations/PadOp.h"
 #include "core/modelIR/operations/ReduceFOp.h"
+#include "core/modelIR/operations/TransposeOp.h"
 
 #include "pass/PassException.h"
 #include <algorithm>
@@ -355,4 +356,16 @@ void Serializer::visit(mir::ops::ReduceFOp& op) {
   serializeShape(op.getOutputShape(0));
 }
 
+void Serializer::visit(mir::ops::TransposeOp& op) {
+  _curOp->_paramStartOffset = _buffer.size();
+  // serializer parameters
+  auto& axis_order = op.getAxisOrder();
+  serializeT(static_cast<int32_t>(axis_order.size()));
+  for (std::size_t i = 0; i < axis_order.size(); ++i) {
+    serializeT(static_cast<int32_t>(axis_order.at(i)));
+  }
+  // serialize output shape
+  serializeShape(op.getOutputShape(0));
+}
+
 } // namespace nnc
diff --git a/contrib/nnc/passes/soft_backend/SBSerializer.h b/contrib/nnc/passes/soft_backend/SBSerializer.h
index 6d01a27..dcac3cb 100644
--- a/contrib/nnc/passes/soft_backend/SBSerializer.h
+++ b/contrib/nnc/passes/soft_backend/SBSerializer.h
@@ -64,6 +64,7 @@ public:
   void visit(mir::ops::SqueezeOp& op) override;
   void visit(mir::ops::PadOp& op) override;
   void visit(mir::ops::ReduceFOp& op) override;
+  void visit(mir::ops::TransposeOp& op) override;
 
   void serialize(std::list<OpDescr> &inferenceSequence);
 
diff --git a/contrib/nnc/passes/soft_backend/code_snippets/cpp_common_funcs.def b/contrib/nnc/passes/soft_backend/code_snippets/cpp_common_funcs.def
index 2f4a271..24f95c3 100644
--- a/contrib/nnc/passes/soft_backend/code_snippets/cpp_common_funcs.def
+++ b/contrib/nnc/passes/soft_backend/code_snippets/cpp_common_funcs.def
@@ -526,3 +526,8 @@ inline int Offset(const Dims<4>& dims, int* index) {
 inline int Offset(const RuntimeShape& shape, int* index) {
   return Offset(shape, index[0], index[1], index[2], index[3]);
 }
+
+struct TransposeParams {
+  int8 perm_count;
+  int32 perm[4];
+};
diff --git a/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def b/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def
index e78dc97..c9d7d5a 100644
--- a/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def
+++ b/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def
@@ -536,3 +536,18 @@ void pad(Tensor& out, const char* params, const Tensor& in) {
 
   Pad(input, input_dims, left_paddings, right_paddings, output, output_dims);
 }
+
+void transpose(Tensor &out, const char *params, const Tensor &in) {
+  TransposeParams transpose_params;
+  transpose_params.perm_count = deserializeT<int32_t>(params);
+  for (int i = 0; i < transpose_params.perm_count; ++i)
+    transpose_params.perm[i] = deserializeT<int32_t>(params);
+
+  Shape out_s = deserializeShape(params);
+  assert(out_s.getNumElems() == in.getShape().getNumElems());
+  out.reShape(out_s);
+
+  Transpose(transpose_params,
+            shapeToRuntimeShape(in.getShape()), in.getData(),
+            shapeToRuntimeShape(out.getShape()), out.getData());
+}
diff --git a/contrib/nnc/passes/soft_backend/code_snippets/cpp_transpose.def b/contrib/nnc/passes/soft_backend/code_snippets/cpp_transpose.def
new file mode 100644
index 0000000..30bd4df
--- /dev/null
+++ b/contrib/nnc/passes/soft_backend/code_snippets/cpp_transpose.def
@@ -0,0 +1,65 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+template <typename T>
+void Transpose(const TransposeParams& params,
+               const RuntimeShape& unextended_input_shape, const T* input_data,
+               const RuntimeShape& unextended_output_shape, T* output_data) {
+  const int unextended_output_size = unextended_output_shape.DimensionsCount();
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_size, 4);
+  TFLITE_DCHECK_EQ(unextended_output_size, params.perm_count);
+  const RuntimeShape input_shape =
+      RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape output_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_shape);
+  const int input_ext_size = 4 - unextended_input_shape.DimensionsCount();
+  const int output_ext_size = 4 - unextended_output_size;
+
+  // The perm data is extended to match the output, each index incremented by
+  // the amount of front padding of the input shape.
+  int extended_perm[4];
+  for (int i = 0; i < output_ext_size; ++i) {
+    extended_perm[i] = i;
+  }
+  for (int i = 0; i < unextended_output_size; ++i) {
+    extended_perm[i + output_ext_size] = params.perm[i] + input_ext_size;
+  }
+
+  int out_sizes[4];
+  // Compute the inverse permutation array so we can do an output centered
+  // transpose. Also, check to make sure output_dims is matching input_dims.
+  for (int k = 0; k < 4; k++) {
+    out_sizes[k] = MatchingDim(input_shape, extended_perm[k], output_shape, k);
+  }
+
+  // Naive transpose loop (iterate on output index and compute input index).
+  int o[4];  // loop index (on output).
+  int i[4];
+  for (o[3] = 0; o[3] < out_sizes[3]; o[3]++) {
+    i[extended_perm[3]] = o[3];
+    for (o[2] = 0; o[2] < out_sizes[2]; o[2]++) {
+      i[extended_perm[2]] = o[2];
+      for (o[1] = 0; o[1] < out_sizes[1]; o[1]++) {
+        i[extended_perm[1]] = o[1];
+        for (o[0] = 0; o[0] < out_sizes[0]; o[0]++) {
+          i[extended_perm[0]] = o[0];
+          output_data[Offset(output_shape, o)] =
+              input_data[Offset(input_shape, i)];
+        }
+      }
+    }
+  }
+}
diff --git a/contrib/nnc/unittests/soft_backend/CPPOperations.cpp b/contrib/nnc/unittests/soft_backend/CPPOperations.cpp
index 31c30aa..31e649c 100644
--- a/contrib/nnc/unittests/soft_backend/CPPOperations.cpp
+++ b/contrib/nnc/unittests/soft_backend/CPPOperations.cpp
@@ -40,6 +40,7 @@
 #include "code_snippets/cpp_elementwise.def"
 #include "code_snippets/cpp_tanh.def"
 #include "code_snippets/cpp_pad.def"
+#include "code_snippets/cpp_transpose.def"
 
 #include "CommonData.def"
 #include "code_snippets/cpp_header_types.def"
-- 
2.7.4