From: 김수진/동작제어Lab(SR)/Engineer/삼성전자 Date: Tue, 16 Oct 2018 09:58:09 +0000 (+0900) Subject: [neurun] Support to permute for all other dimensions (#3187) X-Git-Tag: 0.3~616 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3e148fb564f6ced3f3ff06764fe19b8bda55e4c6;p=platform%2Fcore%2Fml%2Fnnfw.git [neurun] Support to permute for all other dimensions (#3187) Related : #3106, #2874 Part of : #3178 PR This commit supports to permute for all other dimensions. Signed-off-by: sjsujinkim --- diff --git a/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc b/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc index 3ffaa2a..e63b445 100644 --- a/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc +++ b/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc @@ -50,49 +50,170 @@ void PermuteLayer::run() auto output_buffer = _output->buffer(); auto output_size = _output->info()->total_size(); - assert(_shape.rank() == 4); - auto feature = _shape.asFeature(); + auto rank = _shape.rank(); switch (_type) { case Type::NHWC_TO_NCHW: { - const util::feature::nhwc::Reader from{ - feature, reinterpret_cast(input_buffer), input_size}; - util::feature::nchw::View into{_output}; - - // TODO Fix this workaround (We may need codegen::operand::Object instead of ITensor) auto &queue = ::arm_compute::CLScheduler::get().queue(); auto _output_cl = dynamic_cast<::arm_compute::ICLTensor *>(_output); _output_cl->map(queue); - - ::nnfw::util::feature::iterate(feature) - << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { - const auto value = from.at(batch, ch, row, col); - into.at(batch, ch, row, col) = value; - }; - + switch (rank) + { + case 0: + case 1: + { + memcpy(output_buffer, input_buffer, input_size); + break; + } + case 2: + { + using ::arm_compute::Window; + using ::arm_compute::Iterator; + + auto matrix_shape = _shape.asMatrix(); + + Window window; + window.use_tensor_dimensions(_output->info()->tensor_shape(), Window::DimY); + + Iterator it(_output, window); + + const auto &y = window[Window::DimY]; + for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY)) + { + memcpy(it.ptr(), input_buffer + h * matrix_shape.W, + matrix_shape.W * sizeof(input_buffer)); + } + break; + } + case 3: + { + using ::arm_compute::Window; + using ::arm_compute::Iterator; + + const int32_t height_width = _shape.dim(1) * _shape.dim(2); + const int32_t width = _shape.dim(2); + + Window window; + window.use_tensor_dimensions(_output->info()->tensor_shape(), Window::DimY); + + Iterator it(_output, window); + + const auto &z = window[Window::DimZ]; + const auto &y = window[Window::DimY]; + for (auto c = z.start(); c < z.end(); c += z.step(), it.increment(Window::DimZ)) + { + for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY)) + { + memcpy(it.ptr(), input_buffer + c * height_width + h * width, + width * sizeof(input_buffer)); + } + } + break; + } + case 4: + { + auto feature = _shape.asFeature(); + + const util::feature::nhwc::Reader from{ + feature, reinterpret_cast(input_buffer), input_size}; + util::feature::nchw::View into{_output}; + + // TODO Fix this workaround (We may need codegen::operand::Object instead of ITensor) + ::nnfw::util::feature::iterate(feature) + << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, ch, row, col); + into.at(batch, ch, row, col) = value; + }; + break; + } + default: + throw "NYI"; + break; + } _output_cl->unmap(queue); // TODO Likewise above break; } case Type::NCHW_TO_NHWC: { - // TODO Fix this workaround (We may need codegen::operand::Object instead of ITensor) auto &queue = ::arm_compute::CLScheduler::get().queue(); auto _input_cl = dynamic_cast<::arm_compute::ICLTensor *>(_input); _input_cl->map(queue); - - const util::feature::nchw::View from{_input}; - util::feature::nhwc::View into{feature, reinterpret_cast(output_buffer), - output_size}; - - ::nnfw::util::feature::iterate(feature) - << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { - const auto value = from.at(batch, ch, row, col); - into.at(batch, ch, row, col) = value; - }; - + switch (rank) + { + case 0: + case 1: + { + memcpy(output_buffer, input_buffer, output_size); + break; + } + case 2: + { + using ::arm_compute::Window; + using ::arm_compute::Iterator; + + Window window; + window.use_tensor_dimensions(_input->info()->tensor_shape(), Window::DimY); + + Iterator it(_input, window); + + int output_width = _shape.asMatrix().W; + + const auto &y = window[Window::DimY]; + for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY)) + { + memcpy(output_buffer + h * output_width, it.ptr(), + output_width * sizeof(output_buffer)); + } + break; + } + case 3: + { + using ::arm_compute::Window; + using ::arm_compute::Iterator; + + const int32_t height_width = _shape.dim(1) * _shape.dim(2); + const int32_t width = _shape.dim(2); + + Window window; + window.use_tensor_dimensions(_input->info()->tensor_shape(), Window::DimY); + + Iterator it(_input, window); + + const auto &z = window[Window::DimZ]; + const auto &y = window[Window::DimY]; + for (auto c = z.start(); c < z.end(); c += z.step(), it.increment(Window::DimZ)) + { + for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY)) + { + memcpy(output_buffer + c * height_width + h * width, it.ptr(), + width * sizeof(output_buffer)); + } + } + break; + } + case 4: + { + auto feature = _shape.asFeature(); + + // TODO Fix this workaround (We may need codegen::operand::Object instead of ITensor) + const util::feature::nchw::View from{_input}; + util::feature::nhwc::View into{feature, reinterpret_cast(output_buffer), + output_size}; + + ::nnfw::util::feature::iterate(feature) + << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, ch, row, col); + into.at(batch, ch, row, col) = value; + }; + break; + } + default: + throw "NYI"; + break; + } _input_cl->unmap(queue); // TODO Likewise above break;