// A "-1" 32 bit signed variable converted to unsigned gives 4294967295
z_coord = z * (int)CONV_STRIDE_Y - (int)CONV_PAD_TOP;
z_coord = min((uint)z_coord, (uint)SRC_DIM_2);
- offset = y_offset + (int4)(z_coord * src_stride_z);
- offset = min(offset, (int4)max_offset);
-
+ offset = select(y_offset + (int4)(z_coord * src_stride_z), (int4)max_offset, (int4)z_coord < 0 || (int4)z_coord >= SRC_DIM_2);
VEC_TYPE(VEC_SIZE)
values0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(src_addr + offset.s0));
VEC_TYPE(VEC_SIZE)
// Moreover z_coord cannot be out-of-bound for z = 1 so we do not need to clamp the offset
z_coord = z * (int)CONV_STRIDE_Y - (int)CONV_PAD_TOP + DILATION_Y;
z_coord = min((uint)z_coord, (uint)SRC_DIM_2);
- offset = y_offset + (int4)(z_coord * src_stride_z);
- offset = min(offset, (int4)max_offset);
+ offset = select(y_offset + (int4)(z_coord * src_stride_z), (int4)max_offset, (int4)z_coord < 0 || (int4)z_coord >= SRC_DIM_2);
VEC_TYPE(VEC_SIZE)
values3 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(src_addr + offset.s0));
VEC_TYPE(VEC_SIZE)
// Offset can be out-of-bound so we need to check if it is greater than max_offset
z_coord = z * (int)CONV_STRIDE_Y - (int)CONV_PAD_TOP + DILATION_Y * 2;
z_coord = min((uint)z_coord, (uint)SRC_DIM_2);
- offset = y_offset + (int4)(z_coord * src_stride_z);
- offset = min(offset, (int4)max_offset);
+ offset = select(y_offset + (int4)(z_coord * src_stride_z), (int4)max_offset, (int4)z_coord < 0 || (int4)z_coord >= SRC_DIM_2);
VEC_TYPE(VEC_SIZE)
values6 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(src_addr + offset.s0));
VEC_TYPE(VEC_SIZE)
// A "-1" 32 bit signed variable converted to unsigned gives 4294967295
z_coord = z * (int)NUM_PLANES_PROCESSED - (int)CONV_PAD_TOP;
z_coord = min((uint)z_coord, (uint)SRC_DIM_2);
- offset = y_offset + (int4)(z_coord * src_stride_z);
- offset = min(offset, (int4)max_offset);
-
+ offset = select(y_offset + (int4)(z_coord * src_stride_z), (int4)max_offset, (int4)z_coord < 0 || (int4)z_coord >= SRC_DIM_2);
VEC_TYPE(VEC_SIZE)
values0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(src_addr + offset.s0));
VEC_TYPE(VEC_SIZE)
// z == 1
z_coord = z * (int)NUM_PLANES_PROCESSED - (int)CONV_PAD_TOP + 1;
z_coord = min((uint)z_coord, (uint)SRC_DIM_2);
- offset = y_offset + (int4)(z_coord * src_stride_z);
- offset = min(offset, (int4)max_offset);
+ offset = select(y_offset + (int4)(z_coord * src_stride_z), (int4)max_offset, (int4)z_coord < 0 || (int4)z_coord >= SRC_DIM_2);
VEC_TYPE(VEC_SIZE)
values4 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(src_addr + offset.s0));
VEC_TYPE(VEC_SIZE)
// z == 2
z_coord = z * (int)NUM_PLANES_PROCESSED - (int)CONV_PAD_TOP + 2;
z_coord = min((uint)z_coord, (uint)SRC_DIM_2);
- offset = y_offset + (int4)(z_coord * src_stride_z);
- offset = min(offset, (int4)max_offset);
+ offset = select(y_offset + (int4)(z_coord * src_stride_z), (int4)max_offset, (int4)z_coord < 0 || (int4)z_coord >= SRC_DIM_2);
VEC_TYPE(VEC_SIZE)
values8 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(src_addr + offset.s0));
VEC_TYPE(VEC_SIZE)
// z == 3
z_coord = z * (int)NUM_PLANES_PROCESSED - (int)CONV_PAD_TOP + 3;
z_coord = min((uint)z_coord, (uint)SRC_DIM_2);
- offset = y_offset + (int4)(z_coord * src_stride_z);
- offset = min(offset, (int4)max_offset);
+ offset = select(y_offset + (int4)(z_coord * src_stride_z), (int4)max_offset, (int4)z_coord < 0 || (int4)z_coord >= SRC_DIM_2);
VEC_TYPE(VEC_SIZE)
values12 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(src_addr + offset.s0));
VEC_TYPE(VEC_SIZE)
// |__________________|
// | pad_bottom |
// |******************|
- const int max_offset = _input->info()->strides_in_bytes().z() * _input->info()->dimension(2) - (_input->info()->padding().bottom + _input->info()->padding().top) *
- _input->info()->strides_in_bytes().y();
+ const int max_offset = ((_input->info()->dimension(1) * _input->info()->dimension(2)) + (_input->info()->padding().bottom + _input->info()->padding().top) * (_input->info()->dimension(
+ 2) - 1)) * _input->info()->strides_in_bytes().y();
_kernel.setArg(idx, max_offset);
}
if(_input1->info()->num_dimensions() < 3)
{
- // The stride_w for matrix B must be the same as stride_z if we do not slice
- ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]);
+ // The stride_z for matrix B must be zero if we do not slice
+ ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0);
}
Window slice = window.first_slice_window_3D();
if(_input1->info()->num_dimensions() < 3)
{
- // The stride_w for matrix B must be the same as stride_z if we do not slice
- ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]);
+ // The stride_z for matrix B must be zero if we do not slice
+ ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0);
}
Window slice = window.first_slice_window_3D();
if(_input1->info()->num_dimensions() < 3)
{
- // The stride_w for matrix B must be the same as stride_z if we do not slice
- ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]);
+ // The stride_z for matrix B must be zero if we do not slice
+ ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0);
}
Window slice = window.first_slice_window_3D();
if(_input1->info()->num_dimensions() < 3)
{
- // The stride_w for matrix B must be the same as stride_z if we do not slice
- ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]);
+ // The stride_z for matrix B must be zero if we do not slice
+ ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0);
}
Window slice = window.first_slice_window_3D();
if(_input1->info()->num_dimensions() < 3)
{
- // The stride_w for matrix B must be the same as stride_z if we do not slice
- ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]);
+ // The stride_z for matrix B must be zero if we do not slice
+ ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0);
}
Window slice = window.first_slice_window_3D();
if(_input1->info()->num_dimensions() < 3)
{
- // The stride_w for matrix B must be the same as stride_z if we do not slice
- ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]);
+ // The stride_z for matrix B must be zero if we do not slice
+ ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0);
}
Window slice = window.first_slice_window_3D();
if(_input1->info()->num_dimensions() < 3)
{
- // The stride_w for matrix B must be the same as stride_z if we do not slice
- ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]);
+ // The stride_z for matrix B must be zero if we do not slice
+ ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0);
}
const size_t lhs_idx_batch_size = _reinterpret_input_as_3d && !_has_pad_y ? 3u : 2u;
strides.set(i, shape[i - 1] * strides[i - 1]);
}
- size_t first_zero = std::distance(strides.begin(), std::find_if(strides.begin(), strides.end(), [](uint32_t val)
- {
- return val == 0U;
- }));
-
- if(first_zero > 0)
- {
- if(first_zero == 1)
- {
- strides.set(1, strides[0] * (shape[0] + info.padding().left + info.padding().right));
- ++first_zero;
- }
- else if(first_zero == 2)
- {
- strides.set(2, strides[1] * (shape[1] + info.padding().top + info.padding().bottom));
- ++first_zero;
- }
-
- for(size_t i = first_zero; i < Strides::num_max_dimensions; ++i)
- {
- strides.set(i, strides[first_zero - 1]);
- }
- }
-
return strides;
}
PaddingSize{ 4, 36, 4, 4 }})),
framework::dataset::make("Strides", {
Strides{},
- Strides{ 1U, 50U, 50U, 50U, 50U, 50U },
- Strides{ 1U, 50U, 900U, 900U, 900U, 900U },
- Strides{ 1U, 50U, 900U, 900U, 900U, 900U },
- Strides{ 1U, 50U, 900U, 9000U, 9000U, 9000U },
- Strides{ 1U, 50U, 900U, 9000U, 90000U, 90000U },
+ Strides{ 1U, 50U },
+ Strides{ 1U, 50U },
+ Strides{ 1U, 50U, 900U },
+ Strides{ 1U, 50U, 900U, 9000U },
+ Strides{ 1U, 50U, 900U, 9000U, 90000U },
Strides{ 1U, 50U, 900U, 9000U, 90000U, 900000U }})),
framework::dataset::make("Offset", { 0U, 4U, 204U, 204U, 204U, 204U, 204U })),
shape, auto_padding, strides, offset)