#include <util/tensor/IndexIterator.h>
#include "internal/Source.h"
+#include "internal/Swizzle.h"
#include "internal/nnapi/tensor/Reader.h"
#include "internal/arm_compute/tensor/View.h"
+// NOTE TensorSource is much slower than specialized Source(s)
template <typename T> class TensorSource final : public Source
{
public:
::internal::arm_compute::tensor::View<T> into{&tensor};
::nnfw::util::tensor::iterate(_shape) << [&](const nnfw::util::tensor::Index &index_nnapi) {
- const auto value = from.at(index_nnapi);
+ const auto rank = index_nnapi.rank();
+ nnfw::util::tensor::Index index_ACL(rank);
- nnfw::util::tensor::Index index_ACL = nnfw::util::tensor::copy_reverse(index_nnapi);
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ index_ACL.at(ToARMComputeAxis(rank, axis).value()) = index_nnapi.at(axis);
+ }
- into.at(index_ACL) = value;
+ into.at(index_ACL) = from.at(index_nnapi);
};
}