auto output_buffer = _output->buffer();
auto output_size = _output->info()->total_size();
- assert(_shape.rank() == 4);
- auto feature = _shape.asFeature();
+ auto rank = _shape.rank();
switch (_type)
{
case Type::NHWC_TO_NCHW:
{
- const util::feature::nhwc::Reader<float> from{
- feature, reinterpret_cast<const float *>(input_buffer), input_size};
- util::feature::nchw::View<float> into{_output};
-
- // TODO Fix this workaround (We may need codegen::operand::Object instead of ITensor)
auto &queue = ::arm_compute::CLScheduler::get().queue();
auto _output_cl = dynamic_cast<::arm_compute::ICLTensor *>(_output);
_output_cl->map(queue);
-
- ::nnfw::util::feature::iterate(feature)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, ch, row, col);
- into.at(batch, ch, row, col) = value;
- };
-
+ switch (rank)
+ {
+ case 0:
+ case 1:
+ {
+ memcpy(output_buffer, input_buffer, input_size);
+ break;
+ }
+ case 2:
+ {
+ using ::arm_compute::Window;
+ using ::arm_compute::Iterator;
+
+ auto matrix_shape = _shape.asMatrix();
+
+ Window window;
+ window.use_tensor_dimensions(_output->info()->tensor_shape(), Window::DimY);
+
+ Iterator it(_output, window);
+
+ const auto &y = window[Window::DimY];
+ for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+ {
+ memcpy(it.ptr(), input_buffer + h * matrix_shape.W,
+ matrix_shape.W * sizeof(input_buffer));
+ }
+ break;
+ }
+ case 3:
+ {
+ using ::arm_compute::Window;
+ using ::arm_compute::Iterator;
+
+ const int32_t height_width = _shape.dim(1) * _shape.dim(2);
+ const int32_t width = _shape.dim(2);
+
+ Window window;
+ window.use_tensor_dimensions(_output->info()->tensor_shape(), Window::DimY);
+
+ Iterator it(_output, window);
+
+ const auto &z = window[Window::DimZ];
+ const auto &y = window[Window::DimY];
+ for (auto c = z.start(); c < z.end(); c += z.step(), it.increment(Window::DimZ))
+ {
+ for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+ {
+ memcpy(it.ptr(), input_buffer + c * height_width + h * width,
+ width * sizeof(input_buffer));
+ }
+ }
+ break;
+ }
+ case 4:
+ {
+ auto feature = _shape.asFeature();
+
+ const util::feature::nhwc::Reader<float> from{
+ feature, reinterpret_cast<const float *>(input_buffer), input_size};
+ util::feature::nchw::View<float> into{_output};
+
+ // TODO Fix this workaround (We may need codegen::operand::Object instead of ITensor)
+ ::nnfw::util::feature::iterate(feature)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, ch, row, col);
+ into.at(batch, ch, row, col) = value;
+ };
+ break;
+ }
+ default:
+ throw "NYI";
+ break;
+ }
_output_cl->unmap(queue); // TODO Likewise above
break;
}
case Type::NCHW_TO_NHWC:
{
- // TODO Fix this workaround (We may need codegen::operand::Object instead of ITensor)
auto &queue = ::arm_compute::CLScheduler::get().queue();
auto _input_cl = dynamic_cast<::arm_compute::ICLTensor *>(_input);
_input_cl->map(queue);
-
- const util::feature::nchw::View<float> from{_input};
- util::feature::nhwc::View<float> into{feature, reinterpret_cast<float *>(output_buffer),
- output_size};
-
- ::nnfw::util::feature::iterate(feature)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, ch, row, col);
- into.at(batch, ch, row, col) = value;
- };
-
+ switch (rank)
+ {
+ case 0:
+ case 1:
+ {
+ memcpy(output_buffer, input_buffer, output_size);
+ break;
+ }
+ case 2:
+ {
+ using ::arm_compute::Window;
+ using ::arm_compute::Iterator;
+
+ Window window;
+ window.use_tensor_dimensions(_input->info()->tensor_shape(), Window::DimY);
+
+ Iterator it(_input, window);
+
+ int output_width = _shape.asMatrix().W;
+
+ const auto &y = window[Window::DimY];
+ for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+ {
+ memcpy(output_buffer + h * output_width, it.ptr(),
+ output_width * sizeof(output_buffer));
+ }
+ break;
+ }
+ case 3:
+ {
+ using ::arm_compute::Window;
+ using ::arm_compute::Iterator;
+
+ const int32_t height_width = _shape.dim(1) * _shape.dim(2);
+ const int32_t width = _shape.dim(2);
+
+ Window window;
+ window.use_tensor_dimensions(_input->info()->tensor_shape(), Window::DimY);
+
+ Iterator it(_input, window);
+
+ const auto &z = window[Window::DimZ];
+ const auto &y = window[Window::DimY];
+ for (auto c = z.start(); c < z.end(); c += z.step(), it.increment(Window::DimZ))
+ {
+ for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+ {
+ memcpy(output_buffer + c * height_width + h * width, it.ptr(),
+ width * sizeof(output_buffer));
+ }
+ }
+ break;
+ }
+ case 4:
+ {
+ auto feature = _shape.asFeature();
+
+ // TODO Fix this workaround (We may need codegen::operand::Object instead of ITensor)
+ const util::feature::nchw::View<float> from{_input};
+ util::feature::nhwc::View<float> into{feature, reinterpret_cast<float *>(output_buffer),
+ output_size};
+
+ ::nnfw::util::feature::iterate(feature)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, ch, row, col);
+ into.at(batch, ch, row, col) = value;
+ };
+ break;
+ }
+ default:
+ throw "NYI";
+ break;
+ }
_input_cl->unmap(queue); // TODO Likewise above
break;