From 0a89042ad279bfb6786c36885604a8fe519a7e9b Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EA=B9=80=EC=88=98=EC=A7=84/=EB=8F=99=EC=9E=91=EC=A0=9C?= =?utf8?q?=EC=96=B4Lab=28SR=29/Engineer/=EC=82=BC=EC=84=B1=EC=A0=84?= =?utf8?q?=EC=9E=90?= Date: Thu, 13 Dec 2018 13:02:25 +0900 Subject: [PATCH] [neurun] Use IObject in PermuteLayer (#4005) This commit uses `IObject` insted of `ITensor` in `PermuteLayer`, since we don't want to use `map/unmap` functions explicitly and to get any `ACL` dependencies. Related : #3279 Part of : #3873 Signed-off-by: sjsujinkim --- .../neurun/src/backend/acl_cl/TensorBuilder.cc | 13 +- runtimes/neurun/src/backend/acl_cl/TensorBuilder.h | 4 + runtimes/neurun/src/backend/cpu/StageGenerator.cc | 6 +- runtimes/neurun/src/backend/cpu/TensorBuilder.cc | 9 +- runtimes/neurun/src/backend/cpu/TensorBuilder.h | 2 + runtimes/neurun/src/kernel/cpu/PermuteLayer.cc | 272 +++++++++++---------- runtimes/neurun/src/kernel/cpu/PermuteLayer.h | 12 +- 7 files changed, 173 insertions(+), 145 deletions(-) diff --git a/runtimes/neurun/src/backend/acl_cl/TensorBuilder.cc b/runtimes/neurun/src/backend/acl_cl/TensorBuilder.cc index e902071..b04f81c 100644 --- a/runtimes/neurun/src/backend/acl_cl/TensorBuilder.cc +++ b/runtimes/neurun/src/backend/acl_cl/TensorBuilder.cc @@ -178,13 +178,20 @@ TensorBuilder::tensorAt(const graph::operand::Index &ind) std::shared_ptr TensorBuilder::wrapTensor(const graph::operand::Index &ind) { - if (_tensors.find(ind) != _tensors.end()) + if (_objects.find(ind) != _objects.end()) { - return std::make_shared(_tensors.at(ind)); + return _objects.at(ind); } else { - return std::make_shared(_subtensors.at(ind)); + if (_tensors.find(ind) != _tensors.end()) + { + return _objects[ind] = std::make_shared(_tensors.at(ind)); + } + else + { + return _objects[ind] = std::make_shared(_subtensors.at(ind)); + } } } diff --git a/runtimes/neurun/src/backend/acl_cl/TensorBuilder.h b/runtimes/neurun/src/backend/acl_cl/TensorBuilder.h index a0c0168..e9f2eab 100644 --- a/runtimes/neurun/src/backend/acl_cl/TensorBuilder.h +++ b/runtimes/neurun/src/backend/acl_cl/TensorBuilder.h @@ -20,6 +20,7 @@ #include "backend/interface/ITensorBuilder.h" #include "backend/acl_cl/operand/CLTensor.h" #include "backend/acl_cl/operand/CLSubTensor.h" +#include "backend/acl_cl/operand/Object.h" #include @@ -81,6 +82,9 @@ private: std::unordered_map> _subtensors; + std::unordered_map> + _objects; }; } // namespace acl_cl diff --git a/runtimes/neurun/src/backend/cpu/StageGenerator.cc b/runtimes/neurun/src/backend/cpu/StageGenerator.cc index eaf23f4..078d4b8 100644 --- a/runtimes/neurun/src/backend/cpu/StageGenerator.cc +++ b/runtimes/neurun/src/backend/cpu/StageGenerator.cc @@ -582,12 +582,12 @@ void StageGenerator::visit(const graph::operation::PermuteNode &node) const auto output_tensors = output_backend->tensor_builder(); returnStage([input_tensors, output_tensors, param](IExecutionBuilder &builder) { - auto output_alloc = output_tensors->tensorAt(param.output_index).get(); - auto input_alloc = input_tensors->tensorAt(param.input_index).get(); + auto output_object = output_tensors->wrapTensor(param.output_index); + auto input_object = input_tensors->wrapTensor(param.input_index); auto fn = nnfw::cpp14::make_unique<::neurun::kernel::cpu::PermuteLayer>(); - fn->configure(input_alloc, output_alloc, param.shape, param.type); + fn->configure(input_object, output_object, param.shape, param.type); builder.append(std::move(fn)); }); diff --git a/runtimes/neurun/src/backend/cpu/TensorBuilder.cc b/runtimes/neurun/src/backend/cpu/TensorBuilder.cc index a9fb3c0..b211eba 100644 --- a/runtimes/neurun/src/backend/cpu/TensorBuilder.cc +++ b/runtimes/neurun/src/backend/cpu/TensorBuilder.cc @@ -96,7 +96,14 @@ TensorBuilder::tensorAt(const graph::operand::Index &ind) std::shared_ptr TensorBuilder::wrapTensor(const graph::operand::Index &ind) { - return std::make_shared(_tensors.at(ind)); + if (_objects.find(ind) != _objects.end()) + { + return _objects.at(ind); + } + else + { + return _objects[ind] = std::make_shared(_tensors.at(ind)); + } } void TensorBuilder::iterate(const IterateFunction &fn) diff --git a/runtimes/neurun/src/backend/cpu/TensorBuilder.h b/runtimes/neurun/src/backend/cpu/TensorBuilder.h index 98dd8ea..3e922ec 100644 --- a/runtimes/neurun/src/backend/cpu/TensorBuilder.h +++ b/runtimes/neurun/src/backend/cpu/TensorBuilder.h @@ -21,6 +21,7 @@ #include "backend/interface/ITensorBuilder.h" #include "backend/cpu/operand/Tensor.h" +#include "backend/cpu/operand/Object.h" #include "graph/operand/Index.h" #include "MemoryPlanner.h" @@ -68,6 +69,7 @@ public: private: std::unordered_map _tensor_info_map; std::unordered_map> _tensors; + std::unordered_map> _objects; std::unordered_map _tensor_mem_map; std::shared_ptr _mem_planner; std::shared_ptr _mem_alloc; diff --git a/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc b/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc index f2219d9..e22814d 100644 --- a/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc +++ b/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc @@ -33,8 +33,8 @@ namespace kernel namespace cpu { -void PermuteLayer::configure(::neurun::backend::operand::ITensor *input, - ::neurun::backend::operand::ITensor *output, +void PermuteLayer::configure(std::shared_ptr<::neurun::backend::operand::IObject> input, + std::shared_ptr<::neurun::backend::operand::IObject> output, const graph::operand::Shape &shape, Type type) { _input = input; @@ -45,174 +45,182 @@ void PermuteLayer::configure(::neurun::backend::operand::ITensor *input, void PermuteLayer::run() { - auto input_buffer = _input->buffer(); - auto input_size = _input->total_size(); - - auto output_buffer = _output->buffer(); - auto output_size = _output->total_size(); - auto rank = _shape.rank(); switch (_type) { case Type::NHWC_TO_NCHW: { - // TODO Fix this workaround (We may need backend::operand::IObject instead of ITensor) - auto &queue = ::arm_compute::CLScheduler::get().queue(); - auto _output_cl = dynamic_cast<::neurun::backend::acl_cl::operand::ICLTensor *>(_output); - _output_cl->map(queue); - switch (rank) - { - case 0: - case 1: - { - memcpy(output_buffer, input_buffer, input_size); - break; - } - case 2: - { - using ::arm_compute::Window; - using ::arm_compute::Iterator; - - auto matrix_shape = _shape.asMatrix(); + auto fn = [&](::neurun::backend::operand::ITensor &tensor) { + auto input_tensor = _input->ptr(); - Window window; - window.use_tensor_dimensions(_output_cl->info()->tensor_shape(), Window::DimY); + auto input_buffer = input_tensor->buffer(); + auto input_size = input_tensor->total_size(); - Iterator it(_output_cl->handle(), window); + auto output_buffer = tensor.buffer(); + auto output_size = tensor.total_size(); - const auto &y = window[Window::DimY]; - for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY)) + auto output_cl = dynamic_cast<::neurun::backend::acl_cl::operand::ICLTensor *>(&tensor); + switch (rank) + { + case 0: + case 1: { - memcpy(it.ptr(), input_buffer + h * matrix_shape.W, matrix_shape.W * sizeof(float)); + memcpy(output_buffer, input_buffer, input_size); + break; } - break; - } - case 3: - { - using ::arm_compute::Window; - using ::arm_compute::Iterator; + case 2: + { + using ::arm_compute::Window; + using ::arm_compute::Iterator; - const int32_t height_width = _shape.dim(1) * _shape.dim(2); - const int32_t width = _shape.dim(2); + auto matrix_shape = _shape.asMatrix(); - Window window; - window.use_tensor_dimensions(_output_cl->info()->tensor_shape(), Window::DimY); + Window window; + window.use_tensor_dimensions(output_cl->info()->tensor_shape(), Window::DimY); - Iterator it(_output_cl->handle(), window); + Iterator it(output_cl->handle(), window); - const auto &z = window[Window::DimZ]; - const auto &y = window[Window::DimY]; - for (auto c = z.start(); c < z.end(); c += z.step(), it.increment(Window::DimZ)) - { + const auto &y = window[Window::DimY]; for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY)) { - memcpy(it.ptr(), input_buffer + c * height_width + h * width, width * sizeof(float)); + memcpy(it.ptr(), input_buffer + h * matrix_shape.W, matrix_shape.W * sizeof(float)); } + break; } - break; - } - case 4: - { - auto feature = _shape.asFeature(); - - const util::feature::nhwc::Reader from{ - feature, reinterpret_cast(input_buffer), input_size}; - util::feature::nchw::View into{_output_cl}; - - ::nnfw::util::feature::iterate(feature) - << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { - const auto value = from.at(batch, ch, row, col); - into.at(batch, ch, row, col) = value; - }; - break; - } - default: - throw "NYI"; - break; - } - _output_cl->unmap(queue); // TODO Likewise above + case 3: + { + using ::arm_compute::Window; + using ::arm_compute::Iterator; + + const int32_t height_width = _shape.dim(1) * _shape.dim(2); + const int32_t width = _shape.dim(2); + + Window window; + window.use_tensor_dimensions(output_cl->info()->tensor_shape(), Window::DimY); + + Iterator it(output_cl->handle(), window); + const auto &z = window[Window::DimZ]; + const auto &y = window[Window::DimY]; + for (auto c = z.start(); c < z.end(); c += z.step(), it.increment(Window::DimZ)) + { + for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY)) + { + memcpy(it.ptr(), input_buffer + c * height_width + h * width, + width * sizeof(float)); + } + } + break; + } + case 4: + { + auto feature = _shape.asFeature(); + + const util::feature::nhwc::Reader from{ + feature, reinterpret_cast(input_buffer), input_size}; + util::feature::nchw::View into{output_cl}; + + ::nnfw::util::feature::iterate(feature) + << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, ch, row, col); + into.at(batch, ch, row, col) = value; + }; + break; + } + default: + throw "NYI"; + break; + } + }; + _output->access(fn); break; } case Type::NCHW_TO_NHWC: { - // TODO Fix this workaround (We may need backend::operand::IObject instead of ITensor) - auto &queue = ::arm_compute::CLScheduler::get().queue(); - auto _input_cl = dynamic_cast<::neurun::backend::acl_cl::operand::ICLTensor *>(_input); - _input_cl->map(queue); - switch (rank) - { - case 0: - case 1: - { - memcpy(output_buffer, input_buffer, output_size); - break; - } - case 2: - { - using ::arm_compute::Window; - using ::arm_compute::Iterator; - - Window window; - window.use_tensor_dimensions(_input_cl->info()->tensor_shape(), Window::DimY); + auto fn = [&](::neurun::backend::operand::ITensor &tensor) { + auto input_buffer = tensor.buffer(); + auto input_size = tensor.total_size(); - Iterator it(_input_cl->handle(), window); + auto output_tensor = _output->ptr(); - int output_width = _shape.asMatrix().W; + auto output_buffer = output_tensor->buffer(); + auto output_size = output_tensor->total_size(); - const auto &y = window[Window::DimY]; - for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY)) + auto input_cl = dynamic_cast<::neurun::backend::acl_cl::operand::ICLTensor *>(&tensor); + switch (rank) + { + case 0: + case 1: { - memcpy(output_buffer + h * output_width, it.ptr(), output_width * sizeof(float)); + memcpy(output_buffer, input_buffer, output_size); + break; } - break; - } - case 3: - { - using ::arm_compute::Window; - using ::arm_compute::Iterator; + case 2: + { + using ::arm_compute::Window; + using ::arm_compute::Iterator; - const int32_t height_width = _shape.dim(1) * _shape.dim(2); - const int32_t width = _shape.dim(2); + Window window; + window.use_tensor_dimensions(input_cl->info()->tensor_shape(), Window::DimY); - Window window; - window.use_tensor_dimensions(_input_cl->info()->tensor_shape(), Window::DimY); + Iterator it(input_cl->handle(), window); - Iterator it(_input_cl->handle(), window); + int output_width = _shape.asMatrix().W; - const auto &z = window[Window::DimZ]; - const auto &y = window[Window::DimY]; - for (auto c = z.start(); c < z.end(); c += z.step(), it.increment(Window::DimZ)) - { + const auto &y = window[Window::DimY]; for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY)) { - memcpy(output_buffer + c * height_width + h * width, it.ptr(), width * sizeof(float)); + memcpy(output_buffer + h * output_width, it.ptr(), output_width * sizeof(float)); } + break; } - break; - } - case 4: - { - auto feature = _shape.asFeature(); - - const util::feature::nchw::View from{_input_cl}; - util::feature::nhwc::View into{feature, reinterpret_cast(output_buffer), - output_size}; - - ::nnfw::util::feature::iterate(feature) - << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { - const auto value = from.at(batch, ch, row, col); - into.at(batch, ch, row, col) = value; - }; - break; - } - default: - throw "NYI"; - break; - } - _input_cl->unmap(queue); // TODO Likewise above + case 3: + { + using ::arm_compute::Window; + using ::arm_compute::Iterator; + + const int32_t height_width = _shape.dim(1) * _shape.dim(2); + const int32_t width = _shape.dim(2); + Window window; + window.use_tensor_dimensions(input_cl->info()->tensor_shape(), Window::DimY); + + Iterator it(input_cl->handle(), window); + + const auto &z = window[Window::DimZ]; + const auto &y = window[Window::DimY]; + for (auto c = z.start(); c < z.end(); c += z.step(), it.increment(Window::DimZ)) + { + for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY)) + { + memcpy(output_buffer + c * height_width + h * width, it.ptr(), + width * sizeof(float)); + } + } + break; + } + case 4: + { + auto feature = _shape.asFeature(); + + const util::feature::nchw::View from{input_cl}; + util::feature::nhwc::View into{feature, reinterpret_cast(output_buffer), + output_size}; + + ::nnfw::util::feature::iterate(feature) + << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, ch, row, col); + into.at(batch, ch, row, col) = value; + }; + break; + } + default: + throw "NYI"; + break; + } + }; + _input->access(fn); break; } case Type::COPY: diff --git a/runtimes/neurun/src/kernel/cpu/PermuteLayer.h b/runtimes/neurun/src/kernel/cpu/PermuteLayer.h index 39b0a71..abd5cf5 100644 --- a/runtimes/neurun/src/kernel/cpu/PermuteLayer.h +++ b/runtimes/neurun/src/kernel/cpu/PermuteLayer.h @@ -23,7 +23,7 @@ #include "util/feature/nhwc/View.h" #include "OperationUtils.h" -#include "backend/interface/operand/ITensor.h" +#include "backend/interface/operand/IObject.h" namespace neurun { @@ -46,14 +46,14 @@ public: PermuteLayer() = default; public: - void configure(::neurun::backend::operand::ITensor *input, - ::neurun::backend::operand::ITensor *output, const graph::operand::Shape &shape, - Type type); + void configure(std::shared_ptr<::neurun::backend::operand::IObject> input, + std::shared_ptr<::neurun::backend::operand::IObject> output, + const graph::operand::Shape &shape, Type type); void run(); private: - ::neurun::backend::operand::ITensor *_input; - ::neurun::backend::operand::ITensor *_output; + std::shared_ptr<::neurun::backend::operand::IObject> _input; + std::shared_ptr<::neurun::backend::operand::IObject> _output; graph::operand::Shape _shape; Type _type; }; -- 2.7.4