#include <cassert>
#include <arm_compute/core/ITensor.h>
+#include "nnfw/std/memory.h"
+#include "kernel/cpu/PermuteLayer.h"
+#include "backend/cpu/operand/Tensor.h"
+#include "util/feature/nhwc/View.h"
+#include "util/feature/nchw/View.h"
+#include <util/feature/IndexIterator.h>
namespace neurun
{
const size_t _size;
};
+class PermutateSink final : public ISink
+{
+public:
+ PermutateSink(neurun::backend::cpu::operand::Tensor output, const graph::operand::Shape &shape)
+ : _output{output}, _shape{shape}
+ {
+ }
+
+public:
+ void pull(::arm_compute::ITensor &tensor) const override
+ {
+ // do NCHW_TO_NHWC permutation
+ auto input_buffer = tensor.buffer();
+
+ auto output_buffer = _output.buffer();
+ auto output_size = _output.info()->total_size();
+ auto rank = _shape.rank();
+ switch (rank)
+ {
+ case 0:
+ case 1:
+ {
+ memcpy(output_buffer, input_buffer, output_size);
+ break;
+ }
+ case 2:
+ {
+ using ::arm_compute::Window;
+ using ::arm_compute::Iterator;
+
+ Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape(), Window::DimY);
+
+ Iterator it(&tensor, window);
+
+ int output_width = _shape.asMatrix().W;
+
+ const auto &y = window[Window::DimY];
+ for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+ {
+ memcpy(output_buffer + h * output_width, it.ptr(), output_width * sizeof(output_buffer));
+ }
+ break;
+ }
+ case 3:
+ {
+ using ::arm_compute::Window;
+ using ::arm_compute::Iterator;
+
+ const int32_t height_width = _shape.dim(1) * _shape.dim(2);
+ const int32_t width = _shape.dim(2);
+
+ Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape(), Window::DimY);
+
+ Iterator it(&tensor, window);
+
+ const auto &z = window[Window::DimZ];
+ const auto &y = window[Window::DimY];
+ for (auto c = z.start(); c < z.end(); c += z.step(), it.increment(Window::DimZ))
+ {
+ for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+ {
+ memcpy(output_buffer + c * height_width + h * width, it.ptr(),
+ width * sizeof(output_buffer));
+ }
+ }
+ break;
+ }
+ case 4:
+ {
+ auto feature = _shape.asFeature();
+
+ // TODO Fix this workaround (We may need codegen::operand::Object instead of ITensor)
+ const util::feature::nchw::View<float> from{&tensor};
+ util::feature::nhwc::View<float> into{feature, reinterpret_cast<float *>(output_buffer),
+ output_size};
+
+ ::nnfw::util::feature::iterate(feature)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, ch, row, col);
+ into.at(batch, ch, row, col) = value;
+ };
+ break;
+ }
+ default:
+ throw "NYI";
+ break;
+ }
+ }
+
+private:
+ const neurun::backend::cpu::operand::Tensor _output;
+ const graph::operand::Shape _shape;
+};
+
} // namespace exec
} // namespace neurun
#include <cassert>
#include <arm_compute/core/ITensor.h>
+#include "kernel/cpu/PermuteLayer.h"
+#include "nnfw/std/memory.h"
+#include "backend/cpu/operand/Tensor.h"
+#include "util/feature/nchw/View.h"
+#include "util/feature/nhwc/Reader.h"
+#include <util/feature/IndexIterator.h>
namespace neurun
{
const size_t _size;
};
+class PermutateSource final : public ISource
+{
+public:
+ PermutateSource(neurun::backend::cpu::operand::Tensor input, const graph::operand::Shape &shape)
+ : _input{input}, _shape{shape}
+ {
+ }
+
+public:
+ void push(::arm_compute::ITensor &tensor) const override
+ {
+ // do NHWC_TO_NCHW permutation
+ auto input_buffer = _input.buffer();
+ auto input_size = _input.info()->total_size();
+
+ auto output_buffer = tensor.buffer();
+ auto rank = _shape.rank();
+ switch (rank)
+ {
+ case 0:
+ case 1:
+ {
+ memcpy(output_buffer, input_buffer, input_size);
+ break;
+ }
+ case 2:
+ {
+ using ::arm_compute::Window;
+ using ::arm_compute::Iterator;
+
+ auto matrix_shape = _shape.asMatrix();
+
+ Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape(), Window::DimY);
+
+ Iterator it(&tensor, window);
+
+ const auto &y = window[Window::DimY];
+ for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+ {
+ memcpy(it.ptr(), input_buffer + h * matrix_shape.W,
+ matrix_shape.W * sizeof(input_buffer));
+ }
+ break;
+ }
+ case 3:
+ {
+ using ::arm_compute::Window;
+ using ::arm_compute::Iterator;
+
+ const int32_t height_width = _shape.dim(1) * _shape.dim(2);
+ const int32_t width = _shape.dim(2);
+
+ Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape(), Window::DimY);
+
+ Iterator it(&tensor, window);
+
+ const auto &z = window[Window::DimZ];
+ const auto &y = window[Window::DimY];
+ for (auto c = z.start(); c < z.end(); c += z.step(), it.increment(Window::DimZ))
+ {
+ for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+ {
+ memcpy(it.ptr(), input_buffer + c * height_width + h * width,
+ width * sizeof(input_buffer));
+ }
+ }
+ break;
+ }
+ case 4:
+ {
+ auto feature = _shape.asFeature();
+
+ const util::feature::nhwc::Reader<float> from{
+ feature, reinterpret_cast<const float *>(input_buffer), input_size};
+ util::feature::nchw::View<float> into{&tensor};
+
+ // TODO Fix this workaround (We may need codegen::operand::Object instead of ITensor)
+ ::nnfw::util::feature::iterate(feature)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, ch, row, col);
+ into.at(batch, ch, row, col) = value;
+ };
+ break;
+ }
+ default:
+ throw "NYI";
+ break;
+ }
+ }
+
+private:
+ const neurun::backend::cpu::operand::Tensor _input;
+ const graph::operand::Shape _shape;
+};
+
} // namespace exec
} // namespace neurun
#include "graph/operand/DataType.h"
#include "graph/operand/Index.h"
+#include "kernel/cpu/PermuteLayer.h"
+#include "backend/cpu/operand/Tensor.h"
+#include "internal/Convert.h"
+#include "graph/operand/Layout.h"
+#include "backend/BackendManager.h"
+#include "backend/interface/IConfig.h"
+#include "compiler/BackendResolver.h"
inline void source(ANeuralNetworksExecution *execution,
const ::neurun::graph::operand::DataType &type, int32_t index,
const void *buffer, size_t length)
{
+ const auto &operands = execution->plan().model().operands();
+ neurun::graph::operand::IO::Index input_index{index};
+
+ const auto operand_index = execution->plan().model().getInputs().at(input_index);
+ auto operand = &operands.at(operand_index);
+ auto operand_li = operand->lower_info();
+ const auto output_backend = operand_li->def_backends().getOnlyElement();
+ const auto output_layout = output_backend->config()->getOperandLayout();
+ auto input_layout = execution->plan()
+ .model()
+ .backend_resolver()
+ ->getDefaultBackend()
+ ->config()
+ ->getOperandLayout();
+ if (input_layout == neurun::graph::operand::Layout::NHWC &&
+ output_layout == neurun::graph::operand::Layout::NCHW)
+ {
+ const auto tensor_info = ::internal::asTensorInfo(operand->shape(), operand->typeInfo());
+ auto tensor_from_interp = neurun::backend::cpu::operand::Tensor(tensor_info);
+ tensor_from_interp.setBuffer((uint8_t *)buffer);
+
+ execution->source<::neurun::exec::PermutateSource>(index, tensor_from_interp, operand->shape());
+ return;
+ }
using ::neurun::graph::operand::DataType;
switch (type)
{
const ::neurun::graph::operand::DataType &type, int32_t index, void *buffer,
size_t length)
{
+ const auto &operands = execution->plan().model().operands();
+ neurun::graph::operand::IO::Index input_index{index};
+
+ const auto operand_index = execution->plan().model().getOutputs().at(input_index);
+ auto operand = &operands.at(operand_index);
+ auto operand_li = operand->lower_info();
+ const auto input_backend = operand_li->def_backends().getOnlyElement();
+ const auto input_layout = input_backend->config()->getOperandLayout();
+ auto output_layout = execution->plan()
+ .model()
+ .backend_resolver()
+ ->getDefaultBackend()
+ ->config()
+ ->getOperandLayout();
+ if (input_layout == neurun::graph::operand::Layout::NCHW &&
+ output_layout == neurun::graph::operand::Layout::NHWC)
+ {
+ const auto tensor_info = ::internal::asTensorInfo(operand->shape(), operand->typeInfo());
+ auto tensor_from_interp = neurun::backend::cpu::operand::Tensor(tensor_info);
+ tensor_from_interp.setBuffer((uint8_t *)buffer);
+
+ execution->sink<::neurun::exec::PermutateSink>(index, tensor_from_interp, operand->shape());
+ return;
+ }
using ::neurun::graph::operand::DataType;
switch (type)
{
#include "backend/interface/IConfig.h"
#include "operation/PermuteNode.h"
#include "pass/PermutationInsertionPass.h"
+#include "pass/PermutationEliminationPass.h"
namespace neurun
{
{
pass::PermutationInsertionPass pi_pass(*this);
pi_pass.run();
+ pass::PermutationEliminationPass pe_pass(*this);
+ pe_pass.run();
}
// Graph verifications for the LOWERED phase