*/
#include "arm_compute/core/NEON/kernels/NEChannelCombineKernel.h"
-#include "arm_compute/core/AccessWindowAutoPadding.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/IMultiImage.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/MultiImageInfo.h"
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane1, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane2, 1, DataType::U8);
- const Format fmt = output->info()->format();
+ const Format &output_format = output->info()->format();
- if(Format::RGBA8888 == fmt)
+ if(output_format == Format::RGBA8888)
{
ARM_COMPUTE_ERROR_ON(plane3 == output);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane3, 1, DataType::U8);
_num_elems_processed_per_iteration = 8;
_is_parallelizable = true;
- switch(fmt)
+ switch(output_format)
{
case Format::RGB888:
_func = &NEChannelCombineKernel::combine_3C;
_func = &NEChannelCombineKernel::combine_4C;
break;
case Format::UYVY422:
- _x_subsampling[0] = 2;
+ _x_subsampling[1] = 2;
+ _x_subsampling[2] = 2;
_num_elems_processed_per_iteration = 16;
_func = &NEChannelCombineKernel::combine_YUV_1p<true>;
break;
case Format::YUYV422:
- _x_subsampling[0] = 2;
+ _x_subsampling[1] = 2;
+ _x_subsampling[2] = 2;
_num_elems_processed_per_iteration = 16;
_func = &NEChannelCombineKernel::combine_YUV_1p<false>;
break;
break;
}
- // Configure kernel window
- Window win = calculate_max_window(*plane0->info(), Steps(_num_elems_processed_per_iteration));
- AccessWindowAutoPadding output_access(output->info());
+ Window win = calculate_max_window(*plane0->info(), Steps(_num_elems_processed_per_iteration));
- update_window_and_padding(win,
- AccessWindowAutoPadding(plane0->info()),
- AccessWindowAutoPadding(plane1->info()),
- AccessWindowAutoPadding(plane2->info()),
- AccessWindowAutoPadding(plane3 == nullptr ? nullptr : plane3->info()),
- output_access);
+ AccessWindowHorizontal output_access(output->info(), 0, _num_elems_processed_per_iteration);
+ AccessWindowHorizontal plane0_access(plane0->info(), 0, _num_elems_processed_per_iteration / _x_subsampling[1], 1.f / _x_subsampling[0]);
+ AccessWindowHorizontal plane1_access(plane1->info(), 0, _num_elems_processed_per_iteration / _x_subsampling[1], 1.f / _x_subsampling[1]);
+ AccessWindowHorizontal plane2_access(plane2->info(), 0, _num_elems_processed_per_iteration / _x_subsampling[1], 1.f / _x_subsampling[2]);
+ AccessWindowHorizontal plane3_access(plane3 == nullptr ? nullptr : plane3->info(), 0, _num_elems_processed_per_iteration);
+
+ update_window_and_padding(
+ win,
+ plane0_access,
+ plane1_access,
+ plane2_access,
+ plane3_access,
+ output_access);
- output_access.set_valid_region();
+ ValidRegion valid_region = intersect_valid_regions(plane0->info()->valid_region(),
+ plane1->info()->valid_region(),
+ plane2->info()->valid_region());
+
+ if(plane3 != nullptr)
+ {
+ valid_region = intersect_valid_regions(plane3->info()->valid_region(), valid_region);
+ }
+
+ output_access.set_valid_region(win, ValidRegion(valid_region.anchor, output->info()->tensor_shape()));
INEKernel::configure(win);
}
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane2, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output, Format::NV12, Format::NV21, Format::IYUV, Format::YUV444);
- _planes[0] = plane0;
- _planes[1] = plane1;
- _planes[2] = plane2;
- _planes[3] = nullptr;
- _output = nullptr;
- _output_multi = output;
+ _planes[0] = plane0;
+ _planes[1] = plane1;
+ _planes[2] = plane2;
+ _planes[3] = nullptr;
+ _output = nullptr;
+ _output_multi = output;
+ bool has_two_planes = false;
+ unsigned int num_elems_written_plane1 = 8;
_num_elems_processed_per_iteration = 8;
_is_parallelizable = true;
- const Format fmt = output->info()->format();
+ const Format &output_format = output->info()->format();
- switch(fmt)
+ switch(output_format)
{
case Format::NV12:
case Format::NV21:
- _x_subsampling = { { 1, 2, 2 } };
- _y_subsampling = { { 1, 2, 2 } };
- _func = &NEChannelCombineKernel::combine_YUV_2p;
+ _x_subsampling = { { 1, 2, 2 } };
+ _y_subsampling = { { 1, 2, 2 } };
+ _func = &NEChannelCombineKernel::combine_YUV_2p;
+ has_two_planes = true;
+ num_elems_written_plane1 = 16;
break;
case Format::IYUV:
_is_parallelizable = false;
const unsigned int y_step = *std::max_element(_y_subsampling.begin(), _y_subsampling.end());
- // Configure kernel window
- unsigned int output_plane_count = 3;
+ Window win = calculate_max_window(*plane0->info(), Steps(_num_elems_processed_per_iteration, y_step));
+ AccessWindowRectangle output_plane0_access(output->plane(0)->info(), 0, 0, _num_elems_processed_per_iteration, 1, 1.f, 1.f / _y_subsampling[0]);
+ AccessWindowRectangle output_plane1_access(output->plane(1)->info(), 0, 0, num_elems_written_plane1, 1, 1.f / _x_subsampling[1], 1.f / _y_subsampling[1]);
+ AccessWindowRectangle output_plane2_access(has_two_planes ? nullptr : output->plane(2)->info(), 0, 0, _num_elems_processed_per_iteration, 1, 1.f / _x_subsampling[2], 1.f / _y_subsampling[2]);
- if(output->info()->format() == Format::NV12 || output->info()->format() == Format::NV21)
- {
- output_plane_count = 2;
- }
+ update_window_and_padding(win,
+ AccessWindowHorizontal(plane0->info(), 0, _num_elems_processed_per_iteration),
+ AccessWindowRectangle(plane1->info(), 0, 0, _num_elems_processed_per_iteration, 1, 1.f / _x_subsampling[1], 1.f / _y_subsampling[1]),
+ AccessWindowRectangle(plane2->info(), 0, 0, _num_elems_processed_per_iteration, 1, 1.f / _x_subsampling[2], 1.f / _y_subsampling[2]),
+ output_plane0_access,
+ output_plane1_access,
+ output_plane2_access);
- Window win = calculate_max_window(*plane0->info(), Steps(_num_elems_processed_per_iteration, y_step));
- AccessWindowAutoPadding output_access0(output->plane(0)->info());
- AccessWindowAutoPadding output_access1(output->plane(1)->info());
- AccessWindowAutoPadding output_access2(output_plane_count == 2 ? nullptr : output->plane(2)->info());
+ ValidRegion plane0_valid_region = plane0->info()->valid_region();
- update_window_and_padding(win,
- AccessWindowAutoPadding(plane0->info()),
- AccessWindowAutoPadding(plane1->info()),
- AccessWindowAutoPadding(plane2->info()),
- output_access0,
- output_access1,
- output_access2);
+ ValidRegion output_plane1_region = has_two_planes ? intersect_valid_regions(plane1->info()->valid_region(), plane2->info()->valid_region()) : plane2->info()->valid_region();
- output_access0.set_valid_region();
- output_access1.set_valid_region();
- output_access2.set_valid_region();
+ output_plane0_access.set_valid_region(win, ValidRegion(plane0_valid_region.anchor, output->plane(0)->info()->tensor_shape()));
+ output_plane1_access.set_valid_region(win, ValidRegion(output_plane1_region.anchor, output->plane(1)->info()->tensor_shape()));
+ output_plane2_access.set_valid_region(win, ValidRegion(plane2->info()->valid_region().anchor, output->plane(2)->info()->tensor_shape()));
INEKernel::configure(win);
}
{
// Create sub-sampled uv window and init uv planes
Window win_uv(win);
- win_uv.set_dimension_step(0, win.x().step() / _x_subsampling[0]);
+ win_uv.set_dimension_step(0, win.x().step() / _x_subsampling[1]);
win_uv.validate();
Iterator p0(_planes[0], win);