From: 김수진/동작제어Lab(SR)/Engineer/삼성전자 Date: Tue, 10 Jul 2018 23:46:57 +0000 (+0900) Subject: [newrt] Implement Concat kernel for CPU (#1924) X-Git-Tag: 0.2~472 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=32784776449ccd2b705f221910570a394b77eeb0;p=platform%2Fcore%2Fml%2Fnnfw.git [newrt] Implement Concat kernel for CPU (#1924) Related : #1860 This commit implements `Concat` kernel for CPU in new runtime. Signed-off-by: sjsujinkim --- diff --git a/runtimes/new_runtime/src/compilation.cc b/runtimes/new_runtime/src/compilation.cc index 8f721de..7cf6044 100644 --- a/runtimes/new_runtime/src/compilation.cc +++ b/runtimes/new_runtime/src/compilation.cc @@ -397,6 +397,7 @@ void TensorMarker::visit(const ::internal::tflite::op::AvgPool2D::implicit::Node void TensorMarker::visit(const ::internal::tflite::op::Concat::Node &node) { const auto ¶m = node.param(); + mark(param.ofm_index); for (auto ind : param.ifm_indexes) { mark(ind); diff --git a/runtimes/new_runtime/src/internal/cpu/StageGenerator.cc b/runtimes/new_runtime/src/internal/cpu/StageGenerator.cc index d943980..9f5042b 100644 --- a/runtimes/new_runtime/src/internal/cpu/StageGenerator.cc +++ b/runtimes/new_runtime/src/internal/cpu/StageGenerator.cc @@ -6,6 +6,7 @@ #include "internal/kernels/cpufallback/ConvolutionLayer.h" #include "internal/kernels/cpufallback/AvgPoolLayer.h" #include "internal/kernels/cpufallback/MaxPoolLayer.h" +#include "internal/kernels/cpufallback/ConcatLayer.h" #include "logging.h" @@ -306,7 +307,56 @@ Stage StageGenerator::generate(const ::internal::tflite::op::AvgPool2D::implicit Stage StageGenerator::generate(const ::internal::tflite::op::Concat::Node &node) { - throw std::runtime_error("NYI"); + VERBOSE(Concat) << "generate CPU Concat" << std::endl; + + const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index}; + const ::internal::tflite::operand::Index axis_index{node.param().axis_index}; + + struct Param + { + int32_t output_index; + std::vector input_indexes; + + int32_t axis; + + ::internal::tflite::operand::Shape ofm_shape{1}; + std::vector<::internal::tflite::operand::Shape> ifm_shapes; + }; + + Param param; + + param.output_index = node.param().ofm_index; + param.input_indexes = node.param().ifm_indexes; + param.axis = _ctx.at(axis_index).asScalar(); + + param.ofm_shape = _ctx.at(ofm_index).shape(); + + for (auto ifm_ind : node.param().ifm_indexes) + { + const ::internal::tflite::operand::Index ifm_index{ifm_ind}; + param.ifm_shapes.emplace_back(_ctx.at(ifm_index).shape()); + } + + auto tensors = _tensor_builder; + + return [tensors, param](IExecutionBuilder &builder) { + auto output_alloc = tensors->at(::internal::tflite::operand::Index{param.output_index}).get(); + + std::vector input_buffers; + for (auto ifm_ind : param.input_indexes) + { + input_buffers.emplace_back( + tensors->at(::internal::tflite::operand::Index{ifm_ind}).get()->buffer()); + } + + std::unique_ptr<::internal::kernels::cpu::ConcatLayer> fn{ + new ::internal::kernels::cpu::ConcatLayer}; + + fn->configure(input_buffers, param.ifm_shapes, param.axis, output_alloc->buffer(), + param.ofm_shape); + + builder.append(std::move(fn)); + }; } Stage StageGenerator::generate(const ::internal::tflite::op::FullyConnected::Node &node) diff --git a/runtimes/new_runtime/src/internal/kernels/cpufallback/ConcatLayer.cc b/runtimes/new_runtime/src/internal/kernels/cpufallback/ConcatLayer.cc new file mode 100644 index 0000000..651fc4b --- /dev/null +++ b/runtimes/new_runtime/src/internal/kernels/cpufallback/ConcatLayer.cc @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConcatLayer.h" + +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "internal/kernels/cpufallback/OperationUtils.h" + +namespace internal +{ +namespace kernels +{ +namespace cpu +{ + +bool ConcatLayer::concatenationFloat32() +{ + int num_inputs = _inputShapes.size(); + std::vector<::tflite::Dims<4> *> inputDimsPtr(num_inputs); + std::vector<::tflite::Dims<4>> inputDims(num_inputs); + for (int i = 0; i < num_inputs; i++) + { + inputDims[i] = convertShapeToDims(_inputShapes[i]); + inputDimsPtr[i] = &inputDims[i]; + } + + std::vector inputFloatPtrs; + + for (auto ptr : _inputDataPtrs) + { + inputFloatPtrs.emplace_back(reinterpret_cast(ptr)); + } + + ::tflite::optimized_ops::Concatenation<::tflite::FusedActivationFunctionType::kNone, float>( + getNumberOfDimensions(_outputShape) - _axis - 1, inputFloatPtrs.data(), inputDimsPtr.data(), + num_inputs, reinterpret_cast(_outputData), convertShapeToDims(_outputShape)); + return true; +} +bool ConcatLayer::concatenationQuant8() +{ + int num_inputs = _inputShapes.size(); + std::vector<::tflite::Dims<4> *> inputDimsPtr(num_inputs); + std::vector<::tflite::Dims<4>> inputDims(num_inputs); + for (int i = 0; i < num_inputs; i++) + { + inputDims[i] = convertShapeToDims(_inputShapes[i]); + inputDimsPtr[i] = &inputDims[i]; + } + ::tflite::optimized_ops::Concatenation<::tflite::FusedActivationFunctionType::kNone, uint8_t>( + getNumberOfDimensions(_outputShape) - _axis - 1, _inputDataPtrs.data(), inputDimsPtr.data(), + num_inputs, _outputData, convertShapeToDims(_outputShape)); + return true; +} + +void ConcatLayer::configure(const std::vector &inputDataPtrs, + const std::vector &inputShapes, + int32_t axis, uint8_t *outputData, + const internal::tflite::operand::Shape outputShape) +{ + _inputDataPtrs = inputDataPtrs; + + for (auto shape : inputShapes) + { + _inputShapes.emplace_back(convertShape(shape)); + _inputType = shape.type(); + } + + _axis = axis; + + _outputData = outputData; + _outputShape = convertShape(outputShape); +} + +void ConcatLayer::run() +{ + if (_inputType == static_cast(OperandType::TENSOR_FLOAT32)) + { + concatenationFloat32(); + } + else if (_inputType == static_cast(OperandType::TENSOR_QUANT8_ASYMM)) + { + concatenationQuant8(); + } +} + +} // namespace cpu +} // namespace kernels +} // namespace internal diff --git a/runtimes/new_runtime/src/internal/kernels/cpufallback/ConcatLayer.h b/runtimes/new_runtime/src/internal/kernels/cpufallback/ConcatLayer.h new file mode 100644 index 0000000..569d4d8 --- /dev/null +++ b/runtimes/new_runtime/src/internal/kernels/cpufallback/ConcatLayer.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INTERNAL_KERNELS_CPU_CONCATLAYER_H__ +#define __INTERNAL_KERNELS_CPU_CONCATLAYER_H__ + +#include + +#include + +#include "internal/Model.h" +#include "internal/kernels/cpufallback/OperationUtils.h" + +using namespace internal::kernels::cpu; + +namespace internal +{ +namespace kernels +{ +namespace cpu +{ + +class ConcatLayer : public ::arm_compute::IFunction +{ +public: + ConcatLayer() {} + +public: + bool concatenationFloat32(); + + bool concatenationQuant8(); + + void configure(const std::vector &inputDataPtrs, + const std::vector &inputShapes, int32_t axis, + uint8_t *outputData, const internal::tflite::operand::Shape outputShape); + + void run(); + +private: + std::vector _inputDataPtrs; + uint8_t *_outputData; + + int32_t _axis; + + std::vector _inputShapes; + Shape _outputShape; + + int32_t _inputType; +}; + +} // namespace cpu +} // namespace kernels +} // namespace internal + +#endif // __INTERNAL_KERNELS_CPU_CONCATLAYER_H__ diff --git a/runtimes/new_runtime/src/internal/kernels/cpufallback/OperationUtils.cc b/runtimes/new_runtime/src/internal/kernels/cpufallback/OperationUtils.cc index 322fd80..685b386 100644 --- a/runtimes/new_runtime/src/internal/kernels/cpufallback/OperationUtils.cc +++ b/runtimes/new_runtime/src/internal/kernels/cpufallback/OperationUtils.cc @@ -10,6 +10,8 @@ namespace kernels namespace cpu { +uint32_t getNumberOfDimensions(const Shape &shape) { return shape.dimensions.size(); } + uint32_t getSizeOfDimension(const Shape &shape, uint32_t dimensionIdx) { if (dimensionIdx >= shape.dimensions.size()) diff --git a/runtimes/new_runtime/src/internal/kernels/cpufallback/OperationUtils.h b/runtimes/new_runtime/src/internal/kernels/cpufallback/OperationUtils.h index 0c75199..b2ab60b 100644 --- a/runtimes/new_runtime/src/internal/kernels/cpufallback/OperationUtils.h +++ b/runtimes/new_runtime/src/internal/kernels/cpufallback/OperationUtils.h @@ -37,6 +37,8 @@ struct Shape int32_t offset; }; +uint32_t getNumberOfDimensions(const Shape &shape); + uint32_t getSizeOfDimension(const Shape &shape, uint32_t dimensionIdx); inline ::tflite::Dims<4> convertShapeToDims(const Shape &shape)