[newrt] Implement Concat kernel for CPU (#1924)
author김수진/동작제어Lab(SR)/Engineer/삼성전자 <sjsujin.kim@samsung.com>
Tue, 10 Jul 2018 23:46:57 +0000 (08:46 +0900)
committer박세희/동작제어Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com>
Tue, 10 Jul 2018 23:46:57 +0000 (08:46 +0900)
Related : #1860

This commit implements `Concat` kernel for CPU in new runtime.

Signed-off-by: sjsujinkim <sjsujin.kim@samsung.com>
runtimes/new_runtime/src/compilation.cc
runtimes/new_runtime/src/internal/cpu/StageGenerator.cc
runtimes/new_runtime/src/internal/kernels/cpufallback/ConcatLayer.cc [new file with mode: 0644]
runtimes/new_runtime/src/internal/kernels/cpufallback/ConcatLayer.h [new file with mode: 0644]
runtimes/new_runtime/src/internal/kernels/cpufallback/OperationUtils.cc
runtimes/new_runtime/src/internal/kernels/cpufallback/OperationUtils.h

index 8f721de..7cf6044 100644 (file)
@@ -397,6 +397,7 @@ void TensorMarker::visit(const ::internal::tflite::op::AvgPool2D::implicit::Node
 void TensorMarker::visit(const ::internal::tflite::op::Concat::Node &node)
 {
   const auto &param = node.param();
+  mark(param.ofm_index);
   for (auto ind : param.ifm_indexes)
   {
     mark(ind);
index d943980..9f5042b 100644 (file)
@@ -6,6 +6,7 @@
 #include "internal/kernels/cpufallback/ConvolutionLayer.h"
 #include "internal/kernels/cpufallback/AvgPoolLayer.h"
 #include "internal/kernels/cpufallback/MaxPoolLayer.h"
+#include "internal/kernels/cpufallback/ConcatLayer.h"
 
 #include "logging.h"
 
@@ -306,7 +307,56 @@ Stage StageGenerator::generate(const ::internal::tflite::op::AvgPool2D::implicit
 
 Stage StageGenerator::generate(const ::internal::tflite::op::Concat::Node &node)
 {
-  throw std::runtime_error("NYI");
+  VERBOSE(Concat) << "generate CPU Concat" << std::endl;
+
+  const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+  const ::internal::tflite::operand::Index axis_index{node.param().axis_index};
+
+  struct Param
+  {
+    int32_t output_index;
+    std::vector<int32_t> input_indexes;
+
+    int32_t axis;
+
+    ::internal::tflite::operand::Shape ofm_shape{1};
+    std::vector<::internal::tflite::operand::Shape> ifm_shapes;
+  };
+
+  Param param;
+
+  param.output_index = node.param().ofm_index;
+  param.input_indexes = node.param().ifm_indexes;
+  param.axis = _ctx.at(axis_index).asScalar<int32_t>();
+
+  param.ofm_shape = _ctx.at(ofm_index).shape();
+
+  for (auto ifm_ind : node.param().ifm_indexes)
+  {
+    const ::internal::tflite::operand::Index ifm_index{ifm_ind};
+    param.ifm_shapes.emplace_back(_ctx.at(ifm_index).shape());
+  }
+
+  auto tensors = _tensor_builder;
+
+  return [tensors, param](IExecutionBuilder &builder) {
+    auto output_alloc = tensors->at(::internal::tflite::operand::Index{param.output_index}).get();
+
+    std::vector<const uint8_t *> input_buffers;
+    for (auto ifm_ind : param.input_indexes)
+    {
+      input_buffers.emplace_back(
+          tensors->at(::internal::tflite::operand::Index{ifm_ind}).get()->buffer());
+    }
+
+    std::unique_ptr<::internal::kernels::cpu::ConcatLayer> fn{
+        new ::internal::kernels::cpu::ConcatLayer};
+
+    fn->configure(input_buffers, param.ifm_shapes, param.axis, output_alloc->buffer(),
+                  param.ofm_shape);
+
+    builder.append(std::move(fn));
+  };
 }
 
 Stage StageGenerator::generate(const ::internal::tflite::op::FullyConnected::Node &node)
diff --git a/runtimes/new_runtime/src/internal/kernels/cpufallback/ConcatLayer.cc b/runtimes/new_runtime/src/internal/kernels/cpufallback/ConcatLayer.cc
new file mode 100644 (file)
index 0000000..651fc4b
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConcatLayer.h"
+
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "internal/kernels/cpufallback/OperationUtils.h"
+
+namespace internal
+{
+namespace kernels
+{
+namespace cpu
+{
+
+bool ConcatLayer::concatenationFloat32()
+{
+  int num_inputs = _inputShapes.size();
+  std::vector<::tflite::Dims<4> *> inputDimsPtr(num_inputs);
+  std::vector<::tflite::Dims<4>> inputDims(num_inputs);
+  for (int i = 0; i < num_inputs; i++)
+  {
+    inputDims[i] = convertShapeToDims(_inputShapes[i]);
+    inputDimsPtr[i] = &inputDims[i];
+  }
+
+  std::vector<const float *> inputFloatPtrs;
+
+  for (auto ptr : _inputDataPtrs)
+  {
+    inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(ptr));
+  }
+
+  ::tflite::optimized_ops::Concatenation<::tflite::FusedActivationFunctionType::kNone, float>(
+      getNumberOfDimensions(_outputShape) - _axis - 1, inputFloatPtrs.data(), inputDimsPtr.data(),
+      num_inputs, reinterpret_cast<float *>(_outputData), convertShapeToDims(_outputShape));
+  return true;
+}
+bool ConcatLayer::concatenationQuant8()
+{
+  int num_inputs = _inputShapes.size();
+  std::vector<::tflite::Dims<4> *> inputDimsPtr(num_inputs);
+  std::vector<::tflite::Dims<4>> inputDims(num_inputs);
+  for (int i = 0; i < num_inputs; i++)
+  {
+    inputDims[i] = convertShapeToDims(_inputShapes[i]);
+    inputDimsPtr[i] = &inputDims[i];
+  }
+  ::tflite::optimized_ops::Concatenation<::tflite::FusedActivationFunctionType::kNone, uint8_t>(
+      getNumberOfDimensions(_outputShape) - _axis - 1, _inputDataPtrs.data(), inputDimsPtr.data(),
+      num_inputs, _outputData, convertShapeToDims(_outputShape));
+  return true;
+}
+
+void ConcatLayer::configure(const std::vector<const uint8_t *> &inputDataPtrs,
+                            const std::vector<internal::tflite::operand::Shape> &inputShapes,
+                            int32_t axis, uint8_t *outputData,
+                            const internal::tflite::operand::Shape outputShape)
+{
+  _inputDataPtrs = inputDataPtrs;
+
+  for (auto shape : inputShapes)
+  {
+    _inputShapes.emplace_back(convertShape(shape));
+    _inputType = shape.type();
+  }
+
+  _axis = axis;
+
+  _outputData = outputData;
+  _outputShape = convertShape(outputShape);
+}
+
+void ConcatLayer::run()
+{
+  if (_inputType == static_cast<uint32_t>(OperandType::TENSOR_FLOAT32))
+  {
+    concatenationFloat32();
+  }
+  else if (_inputType == static_cast<uint32_t>(OperandType::TENSOR_QUANT8_ASYMM))
+  {
+    concatenationQuant8();
+  }
+}
+
+} // namespace cpu
+} // namespace kernels
+} // namespace internal
diff --git a/runtimes/new_runtime/src/internal/kernels/cpufallback/ConcatLayer.h b/runtimes/new_runtime/src/internal/kernels/cpufallback/ConcatLayer.h
new file mode 100644 (file)
index 0000000..569d4d8
--- /dev/null
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_KERNELS_CPU_CONCATLAYER_H__
+#define __INTERNAL_KERNELS_CPU_CONCATLAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include "internal/Model.h"
+#include "internal/kernels/cpufallback/OperationUtils.h"
+
+using namespace internal::kernels::cpu;
+
+namespace internal
+{
+namespace kernels
+{
+namespace cpu
+{
+
+class ConcatLayer : public ::arm_compute::IFunction
+{
+public:
+  ConcatLayer() {}
+
+public:
+  bool concatenationFloat32();
+
+  bool concatenationQuant8();
+
+  void configure(const std::vector<const uint8_t *> &inputDataPtrs,
+                 const std::vector<internal::tflite::operand::Shape> &inputShapes, int32_t axis,
+                 uint8_t *outputData, const internal::tflite::operand::Shape outputShape);
+
+  void run();
+
+private:
+  std::vector<const uint8_t *> _inputDataPtrs;
+  uint8_t *_outputData;
+
+  int32_t _axis;
+
+  std::vector<Shape> _inputShapes;
+  Shape _outputShape;
+
+  int32_t _inputType;
+};
+
+} // namespace cpu
+} // namespace kernels
+} // namespace internal
+
+#endif // __INTERNAL_KERNELS_CPU_CONCATLAYER_H__
index 322fd80..685b386 100644 (file)
@@ -10,6 +10,8 @@ namespace kernels
 namespace cpu
 {
 
+uint32_t getNumberOfDimensions(const Shape &shape) { return shape.dimensions.size(); }
+
 uint32_t getSizeOfDimension(const Shape &shape, uint32_t dimensionIdx)
 {
   if (dimensionIdx >= shape.dimensions.size())
index 0c75199..b2ab60b 100644 (file)
@@ -37,6 +37,8 @@ struct Shape
   int32_t offset;
 };
 
+uint32_t getNumberOfDimensions(const Shape &shape);
+
 uint32_t getSizeOfDimension(const Shape &shape, uint32_t dimensionIdx);
 
 inline ::tflite::Dims<4> convertShapeToDims(const Shape &shape)