This commit Fixes padding bug of some kernels used by Coordinates of ACL.
Signed-off-by: jiseob.jang <jiseob.jang@samsung.com>
#include <arm_compute/runtime/CL/CLScheduler.h>
void SimpleDepthToSpace::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
- int32_t block_size,
- const ::arm_compute::Coordinates &axises = {3, 1, 0, 2})
+ int32_t block_size, const ::arm_compute::Coordinates &axises)
{
const auto rank = axises.num_dimensions();
assert(rank == 4);
_axises = axises;
}
-inline int32_t Offset4D(const ::arm_compute::TensorShape &shape, int32_t b, int32_t h, int32_t w,
- int32_t d, const ::arm_compute::Coordinates &axises)
-{
- // b, h, w, d >= 0
- size_t indexes[4];
- indexes[axises[0]] = b;
- indexes[axises[1]] = h;
- indexes[axises[2]] = w;
- indexes[axises[3]] = d;
-
- int32_t offset = indexes[3] * shape[2] * shape[1] * shape[0];
- offset += indexes[2] * shape[1] * shape[0];
- offset += indexes[1] * shape[0];
- offset += indexes[0];
- return offset;
-}
-
template <typename T>
-inline void DepthToSpace(const T *input_data, const ::arm_compute::TensorShape &input_shape,
- int32_t block_size, T *output_data,
+inline void DepthToSpace(const ::arm_compute::ITensor *input,
+ const ::arm_compute::TensorShape &input_shape, int32_t block_size,
+ ::arm_compute::ITensor *output,
const ::arm_compute::TensorShape &output_shape,
const ::arm_compute::Coordinates &axises)
{
const int in_d =
out_d + ((out_h % block_size) * block_size + out_w % block_size) * output_depth;
- const int input_index = Offset4D(input_shape, in_b, in_h, in_w, in_d, axises);
- const int output_index = Offset4D(output_shape, out_b, out_h, out_w, out_d, axises);
+ auto input_id =
+ asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises);
+ auto output_id = asARMComputeCoordinates(
+ ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises);
- output_data[output_index] = input_data[input_index];
+ *reinterpret_cast<T *>(output->ptr_to_element(output_id)) =
+ *reinterpret_cast<T *>(input->ptr_to_element(input_id));
}
}
}
CAST_CL(_output)->map(q);
}
- auto input_buf = _input->buffer();
- auto output_buf = _output->buffer();
switch (_input->info()->data_type())
{
case ::arm_compute::DataType::U8:
case ::arm_compute::DataType::QASYMM8:
- DepthToSpace(reinterpret_cast<const uint8_t *>(input_buf), _input->info()->tensor_shape(),
- _block_size, reinterpret_cast<uint8_t *>(output_buf),
- _output->info()->tensor_shape(), _axises);
+ DepthToSpace<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _output,
+ _output->info()->tensor_shape(), _axises);
+ break;
+ case ::arm_compute::DataType::S8:
+ DepthToSpace<int8_t>(_input, _input->info()->tensor_shape(), _block_size, _output,
+ _output->info()->tensor_shape(), _axises);
+ break;
+ case ::arm_compute::DataType::U32:
+ DepthToSpace<uint32_t>(_input, _input->info()->tensor_shape(), _block_size, _output,
+ _output->info()->tensor_shape(), _axises);
+ break;
+ case ::arm_compute::DataType::S32:
+ DepthToSpace<int32_t>(_input, _input->info()->tensor_shape(), _block_size, _output,
+ _output->info()->tensor_shape(), _axises);
break;
case ::arm_compute::DataType::F32:
- DepthToSpace(reinterpret_cast<const float *>(input_buf), _input->info()->tensor_shape(),
- _block_size, reinterpret_cast<float *>(output_buf),
- _output->info()->tensor_shape(), _axises);
+ DepthToSpace<float>(_input, _input->info()->tensor_shape(), _block_size, _output,
+ _output->info()->tensor_shape(), _axises);
break;
default:
ARM_COMPUTE_ERROR("DataType not supported");
#define __SIMPLE_DEPTH_TO_SPACE_H__
#include "internal/arm_compute.h"
+#include "internal/arm_compute/Cast.h"
#include <arm_compute/core/ITensor.h>
#include <arm_compute/runtime/IFunction.h>
* @param[in] block_size Block size.
*/
void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, int32_t block_size,
- const ::arm_compute::Coordinates &axises);
+ const ::arm_compute::Coordinates &axises = getARMComputeAxises(4));
void run() override;
*/
#include "internal/layers/SimpleSpaceToBatchND.h"
+#include "internal/arm_compute/Cast.h"
#include <arm_compute/runtime/CL/CLScheduler.h>
void SimpleSpaceToBatchND::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
const int32_t *block_size, const int32_t *padding_size,
- const ::arm_compute::Coordinates &axises = {3, 1, 0, 2})
+ const ::arm_compute::Coordinates &axises)
{
const auto rank = axises.num_dimensions();
assert(rank == 4);
_axises = axises;
}
-inline int32_t Offset4D(const ::arm_compute::TensorShape &shape, int32_t b, int32_t h, int32_t w,
- int32_t d, const ::arm_compute::Coordinates &axises)
-{
- // b, h, w, d >= 0
- size_t indexes[4];
- indexes[axises[0]] = b;
- indexes[axises[1]] = h;
- indexes[axises[2]] = w;
- indexes[axises[3]] = d;
-
- int32_t offset = indexes[3] * shape[2] * shape[1] * shape[0];
- offset += indexes[2] * shape[1] * shape[0];
- offset += indexes[1] * shape[0];
- offset += indexes[0];
- return offset;
-}
-
template <typename T>
-inline void SpaceToBatchND(const T *input_data, const ::arm_compute::TensorShape &input_shape,
- const int32_t *block_size_data, const int32_t *padding_size_data,
- T *output_data, const ::arm_compute::TensorShape &output_shape,
- const ::arm_compute::Coordinates &axises)
+inline void
+SpaceToBatchND(const ::arm_compute::ITensor *input, const ::arm_compute::TensorShape &input_shape,
+ const int32_t *block_size_data, const int32_t *padding_size_data,
+ const ::arm_compute::ITensor *output, const ::arm_compute::TensorShape &output_shape,
+ const ::arm_compute::Coordinates &axises)
{
const int input_batch = input_shape[axises[0]];
const int input_height = input_shape[axises[1]];
((in_h % block_size_data[0]) * block_size_data[1] + in_w % block_size_data[1]) *
input_batch;
- const int output_index = Offset4D(output_shape, out_b, out_h, out_w, out_d, axises);
+ auto output_id = asARMComputeCoordinates(
+ ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises);
if (in_h < padding_size_data[0] || in_h >= (input_height + padding_size_data[0]) ||
in_w < padding_size_data[2] || in_w >= (input_width + padding_size_data[2]))
{
- output_data[output_index] = 0;
+ *reinterpret_cast<T *>(output->ptr_to_element(output_id)) = 0;
}
else
{
- const int input_index = Offset4D(input_shape, in_b, in_h - padding_size_data[0],
- in_w - padding_size_data[2], in_d, axises);
- output_data[output_index] = input_data[input_index];
+ auto input_id = asARMComputeCoordinates(
+ ::arm_compute::Coordinates{in_b, in_h - padding_size_data[0],
+ in_w - padding_size_data[2], in_d},
+ axises);
+ *reinterpret_cast<T *>(output->ptr_to_element(output_id)) =
+ *reinterpret_cast<T *>(input->ptr_to_element(input_id));
}
}
}
{
case ::arm_compute::DataType::U8:
case ::arm_compute::DataType::QASYMM8:
- SpaceToBatchND(reinterpret_cast<const uint8_t *>(input_buf), _input->info()->tensor_shape(),
- _block_size, _padding_size, reinterpret_cast<uint8_t *>(output_buf),
- _output->info()->tensor_shape(), _axises);
+ SpaceToBatchND<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _padding_size,
+ _output, _output->info()->tensor_shape(), _axises);
break;
case ::arm_compute::DataType::F32:
- SpaceToBatchND(reinterpret_cast<const float *>(input_buf), _input->info()->tensor_shape(),
- _block_size, _padding_size, reinterpret_cast<float *>(output_buf),
- _output->info()->tensor_shape(), _axises);
+ SpaceToBatchND<float>(_input, _input->info()->tensor_shape(), _block_size, _padding_size,
+ _output, _output->info()->tensor_shape(), _axises);
break;
default:
ARM_COMPUTE_ERROR("DataType not supported");
#include <arm_compute/runtime/CL/CLScheduler.h>
void SimpleSpaceToDepth::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
- int32_t block_size,
- const ::arm_compute::Coordinates &axises = {3, 1, 0, 2})
+ int32_t block_size, const ::arm_compute::Coordinates &axises)
{
const auto rank = axises.num_dimensions();
assert(rank == 4);
_axises = axises;
}
-inline int32_t Offset4D(const ::arm_compute::TensorShape &shape, int32_t b, int32_t h, int32_t w,
- int32_t d, const ::arm_compute::Coordinates &axises)
-{
- // b, h, w, d >= 0
- size_t indexes[4];
- indexes[axises[0]] = b;
- indexes[axises[1]] = h;
- indexes[axises[2]] = w;
- indexes[axises[3]] = d;
-
- int32_t offset = indexes[3] * shape[2] * shape[1] * shape[0];
- offset += indexes[2] * shape[1] * shape[0];
- offset += indexes[1] * shape[0];
- offset += indexes[0];
- return offset;
-}
-
template <typename T>
-inline void SpaceToDepth(const T *input_data, const ::arm_compute::TensorShape &input_shape,
- int32_t block_size, T *output_data,
+inline void SpaceToDepth(const ::arm_compute::ITensor *input,
+ const ::arm_compute::TensorShape &input_shape, int32_t block_size,
+ ::arm_compute::ITensor *output,
const ::arm_compute::TensorShape &output_shape,
const ::arm_compute::Coordinates &axises)
{
const int out_d =
in_d + ((in_h % block_size) * block_size + in_w % block_size) * input_depth;
- const int input_index = Offset4D(input_shape, in_b, in_h, in_w, in_d, axises);
- const int output_index = Offset4D(output_shape, out_b, out_h, out_w, out_d, axises);
+ auto input_id =
+ asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises);
+ auto output_id = asARMComputeCoordinates(
+ ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises);
- output_data[output_index] = input_data[input_index];
+ *reinterpret_cast<T *>(output->ptr_to_element(output_id)) =
+ *reinterpret_cast<T *>(input->ptr_to_element(input_id));
}
}
}
{
case ::arm_compute::DataType::U8:
case ::arm_compute::DataType::QASYMM8:
- SpaceToDepth(reinterpret_cast<const uint8_t *>(input_buf), _input->info()->tensor_shape(),
- _block_size, reinterpret_cast<uint8_t *>(output_buf),
- _output->info()->tensor_shape(), _axises);
+ SpaceToDepth<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _output,
+ _output->info()->tensor_shape(), _axises);
break;
case ::arm_compute::DataType::F32:
- SpaceToDepth(reinterpret_cast<const float *>(input_buf), _input->info()->tensor_shape(),
- _block_size, reinterpret_cast<float *>(output_buf),
- _output->info()->tensor_shape(), _axises);
+ SpaceToDepth<float>(_input, _input->info()->tensor_shape(), _block_size, _output,
+ _output->info()->tensor_shape(), _axises);
break;
default:
ARM_COMPUTE_ERROR("DataType not supported");
#define __SIMPLE_SPACE_TO_DEPTH_H__
#include "internal/arm_compute.h"
+#include "internal/arm_compute/Cast.h"
#include <arm_compute/core/ITensor.h>
#include <arm_compute/runtime/IFunction.h>
* @param[in] block_size Block size.
*/
void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, int32_t block_size,
- const ::arm_compute::Coordinates &axises);
+ const ::arm_compute::Coordinates &axises = getARMComputeAxises(4));
void run() override;