Remove unpack simple kernel (#5135)
author오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>
Tue, 7 May 2019 04:43:26 +0000 (13:43 +0900)
committerGitHub Enterprise <noreply-CODE@samsung.com>
Tue, 7 May 2019 04:43:26 +0000 (13:43 +0900)
* Remove unpack simple kernel

Remove unpack simple kernel in libs/ARMComputeEx
Use unstack kernel in acl 19.02
Fix generated test to test 3D input (permutation issue)

Signed-off-by: Hyeongseok Oh <hseok82.oh@samsung.com>
* Update comment, remove change in pack

libs/ARMComputeEx/arm_compute/runtime/misc/functions/SimpleUnpackLayer.h [deleted file]
libs/ARMComputeEx/src/runtime/misc/functions/SimpleUnpackLayer.cpp [deleted file]
runtimes/pure_arm_compute/src/compilation.cc
tests/nnapi/specs/Ex/unpack_ex_3D_float_1.mod.py
tests/nnapi/specs/Ex/unpack_ex_3D_float_2.mod.py
tests/nnapi/specs/Ex/unpack_ex_3D_int_1.mod.py
tests/nnapi/specs/Ex/unpack_ex_3D_int_2.mod.py

diff --git a/libs/ARMComputeEx/arm_compute/runtime/misc/functions/SimpleUnpackLayer.h b/libs/ARMComputeEx/arm_compute/runtime/misc/functions/SimpleUnpackLayer.h
deleted file mode 100644 (file)
index 62e4dd5..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __UNPACK_LAYER_H__
-#define __UNPACK_LAYER_H__
-
-#include <arm_compute/runtime/CL/CLTensor.h>
-#include <arm_compute/runtime/CL/CLSubTensor.h>
-#include <arm_compute/runtime/CL/functions/CLReshapeLayer.h>
-#include <arm_compute/runtime/CL/functions/CLPermute.h>
-
-#include "Utils.h"
-
-namespace arm_compute
-{
-namespace misc
-{
-
-class SimpleUnpackLayer : public arm_compute::IFunction
-{
-public:
-  SimpleUnpackLayer(void)
-      : _cl_permuted_vector{}, _output_vector{}, _sub_tensor_vector{}, _cl_reshape_vector{},
-        _cl_permute_vector{}, _input(nullptr), _axis(0)
-  {
-    // DO NOTHING
-  }
-
-public:
-  void configure(arm_compute::ICLTensor *input,
-                 const std::vector<arm_compute::ICLTensor *> &output_vector, uint32_t axis);
-
-public:
-  void run(void) override;
-
-private:
-  std::vector<arm_compute::CLTensor> _cl_permuted_vector;
-  std::vector<arm_compute::ICLTensor *> _output_vector;
-  std::vector<std::shared_ptr<arm_compute::CLSubTensor>> _sub_tensor_vector;
-  std::vector<arm_compute::CLReshapeLayer> _cl_reshape_vector;
-  std::vector<arm_compute::CLPermute> _cl_permute_vector;
-  arm_compute::ICLTensor *_input;
-  uint32_t _axis;
-};
-
-} // namespace misc
-} // namespace arm_compute
-
-#endif // __UNPACK_LAYER_H__
diff --git a/libs/ARMComputeEx/src/runtime/misc/functions/SimpleUnpackLayer.cpp b/libs/ARMComputeEx/src/runtime/misc/functions/SimpleUnpackLayer.cpp
deleted file mode 100644 (file)
index c5374cc..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "arm_compute/runtime/misc/functions/SimpleUnpackLayer.h"
-
-namespace arm_compute
-{
-namespace misc
-{
-
-void SimpleUnpackLayer::configure(arm_compute::ICLTensor *input,
-                                  const std::vector<arm_compute::ICLTensor *> &output_vector,
-                                  uint32_t axis)
-{
-  uint32_t nr_outputs = output_vector.size();
-  _cl_permuted_vector.resize(nr_outputs);
-  _cl_permute_vector.resize(nr_outputs);
-  uint32_t input_rank = input->info()->num_dimensions();
-  const arm_compute::PermutationVector pv{2, 0, 1};
-  _input = input;
-  _axis = axis;
-  _cl_reshape_vector.resize(nr_outputs);
-
-  arm_compute::TensorShape subTensor_shape{};
-  for (uint32_t i = 0; i < input_rank; i++)
-  {
-    if (i != _axis)
-    {
-      subTensor_shape.set(i, _input->info()->tensor_shape()[i]);
-    }
-    else
-    {
-      subTensor_shape.set(i, 1);
-    }
-  }
-
-  auto subTensor_offset = arm_compute::Coordinates{};
-  subTensor_offset.set_num_dimensions(input_rank);
-
-  for (size_t i = 0; i < output_vector.size(); i++)
-  {
-    _output_vector.push_back(output_vector[i]);
-    subTensor_offset[_axis] = i;
-    auto temp_tensor = std::make_shared<arm_compute::CLSubTensor>(CAST_CL(_input), subTensor_shape,
-                                                                  subTensor_offset, true);
-    _sub_tensor_vector.push_back(temp_tensor);
-    // Copies into the subtensor
-    _cl_permute_vector[i].configure(_sub_tensor_vector[i].get(), &_cl_permuted_vector[i], pv);
-    _cl_reshape_vector[i].configure(&_cl_permuted_vector[i], CAST_CL(_output_vector[i]));
-    _cl_permuted_vector[i].allocator()->allocate();
-  }
-}
-
-void SimpleUnpackLayer::run(void)
-{
-  for (size_t i = 0; i < _output_vector.size(); i++)
-  {
-    _cl_permute_vector[i].run();
-    _cl_reshape_vector[i].run();
-  }
-}
-
-} // namespace misc
-} // namespace arm_compute
index 4506992..a6ef8a4 100644 (file)
@@ -52,7 +52,6 @@
 #include <arm_compute/runtime/misc/functions/GenericReshapeLayer.h>
 #include <arm_compute/runtime/misc/functions/GenericFullyConnectedLayer.h>
 #include <arm_compute/runtime/misc/functions/SimplePackLayer.h>
-#include <arm_compute/runtime/misc/functions/SimpleUnpackLayer.h>
 #include <arm_compute/runtime/misc/functions/GenericGather.h>
 
 #include "misc/matrix/IndexIterator.h"
@@ -5180,6 +5179,11 @@ void Planner::visit(const ::internal::tflite::op::Unpack::Node &node)
 
   if (input_rank == 4)
   {
+    // TODO: generate test case for this and generalize 4D method all cases.
+    throw std::runtime_error("UNPACK_4D not implemented");
+  }
+  else if (input_rank == 3)
+  {
     Param param;
     param.ifm_index = ifm_index.asInt();
     param.axis = axis_uint;
@@ -5193,7 +5197,7 @@ void Planner::visit(const ::internal::tflite::op::Unpack::Node &node)
 
       if (::internal::arm_compute::isGpuMode())
       {
-        auto fn = nnfw::cpp14::make_unique<SimpleUnpackLayer>();
+        auto fn = nnfw::cpp14::make_unique<::arm_compute::CLUnstack>();
         std::vector<::arm_compute::ICLTensor *> outputs;
         for (const auto &index : param.ofm_indexes)
         {
@@ -5210,11 +5214,6 @@ void Planner::visit(const ::internal::tflite::op::Unpack::Node &node)
 
     _builder.addStage(stage);
   }
-  else if (input_rank == 3)
-  {
-    // TODO: generate test case for this and generalize 4D method all cases.
-    throw std::runtime_error("UNPACK_3D not implemented");
-  }
   else if (input_rank == 2)
   {
     throw std::runtime_error("UNPACK_2D not implemented");
index e5cb38e..7e8ef60 100644 (file)
@@ -1,11 +1,11 @@
 # Sample UnPack model, axis = 0
 model = Model()
-input = Input("input", "TENSOR_FLOAT32", "{3, 2, 3, 4}")
+input = Input("input", "TENSOR_FLOAT32", "{3, 6, 4}")
 axis = Int32Scalar("axis", 0)
 num_splits = Int32Scalar("num_splits", 3)
-out1 = Output("output1", "TENSOR_FLOAT32", "{2, 3, 4}")
-out2 = Output("output2", "TENSOR_FLOAT32", "{2, 3, 4}")
-out3 = Output("output3", "TENSOR_FLOAT32", "{2, 3, 4}")
+out1 = Output("output1", "TENSOR_FLOAT32", "{6, 4}")
+out2 = Output("output2", "TENSOR_FLOAT32", "{6, 4}")
+out3 = Output("output3", "TENSOR_FLOAT32", "{6, 4}")
 model = model.Operation("UNPACK_EX", input, num_splits, axis).To([out1, out2, out3])
 
 input0 = {input: # input 0
index 1d1045a..ed7800a 100644 (file)
@@ -1,11 +1,11 @@
-# Sample UnPack model, axis = 2
+# Sample UnPack model, axis = 1
 model = Model()
-input = Input("input", "TENSOR_FLOAT32", "{3, 2, 3, 4}")
-axis = Int32Scalar("axis", 2)
+input = Input("input", "TENSOR_FLOAT32", "{6, 3, 4}")
+axis = Int32Scalar("axis", 1)
 num_splits = Int32Scalar("num_splits", 3)
-out1 = Output("output1", "TENSOR_FLOAT32", "{3, 2, 4}")
-out2 = Output("output2", "TENSOR_FLOAT32", "{3, 2, 4}")
-out3 = Output("output3", "TENSOR_FLOAT32", "{3, 2, 4}")
+out1 = Output("output1", "TENSOR_FLOAT32", "{6, 4}")
+out2 = Output("output2", "TENSOR_FLOAT32", "{6, 4}")
+out3 = Output("output3", "TENSOR_FLOAT32", "{6, 4}")
 model = model.Operation("UNPACK_EX", input, num_splits, axis).To([out1, out2, out3])
 
 input0 = {input: # input 0
index 2a668cf..34e153b 100644 (file)
@@ -1,11 +1,11 @@
 # Sample UnPack model, axis = 0
 model = Model()
-input = Input("input", "TENSOR_INT32", "{3, 2, 3, 4}")
+input = Input("input", "TENSOR_INT32", "{3, 6, 4}")
 axis = Int32Scalar("axis", 0)
 num_splits = Int32Scalar("num_splits", 3)
-out1 = Output("output1", "TENSOR_INT32", "{2, 3, 4}")
-out2 = Output("output2", "TENSOR_INT32", "{2, 3, 4}")
-out3 = Output("output3", "TENSOR_INT32", "{2, 3, 4}")
+out1 = Output("output1", "TENSOR_INT32", "{6, 4}")
+out2 = Output("output2", "TENSOR_INT32", "{6, 4}")
+out3 = Output("output3", "TENSOR_INT32", "{6, 4}")
 model = model.Operation("UNPACK_EX", input, num_splits, axis).To([out1, out2, out3])
 
 input0 = {input: # input 0
index 1159549..db51351 100644 (file)
@@ -1,11 +1,11 @@
-# Sample UnPack model, axis = 2
+# Sample UnPack model, axis = 1
 model = Model()
-input = Input("input", "TENSOR_INT32", "{3, 2, 3, 4}")
-axis = Int32Scalar("axis", 2)
+input = Input("input", "TENSOR_INT32", "{6, 3, 4}")
+axis = Int32Scalar("axis", 1)
 num_splits = Int32Scalar("num_splits", 3)
-out1 = Output("output1", "TENSOR_INT32", "{3, 2, 4}")
-out2 = Output("output2", "TENSOR_INT32", "{3, 2, 4}")
-out3 = Output("output3", "TENSOR_INT32", "{3, 2, 4}")
+out1 = Output("output1", "TENSOR_INT32", "{6, 4}")
+out2 = Output("output2", "TENSOR_INT32", "{6, 4}")
+out3 = Output("output3", "TENSOR_INT32", "{6, 4}")
 model = model.Operation("UNPACK_EX", input, num_splits, axis).To([out1, out2, out3])
 
 input0 = {input: # input 0