3ebe96b42cac149e1ba77ffc816d4321b0bc31e7
[platform/upstream/armcl.git] / src / core / NEON / kernels / NEChannelExtractKernel.cpp
1 /*
2  * Copyright (c) 2016, 2017 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/core/NEON/kernels/NEChannelExtractKernel.h"
25
26 #include "arm_compute/core/AccessWindowAutoPadding.h"
27 #include "arm_compute/core/Error.h"
28 #include "arm_compute/core/Helpers.h"
29 #include "arm_compute/core/IMultiImage.h"
30 #include "arm_compute/core/ITensor.h"
31 #include "arm_compute/core/MultiImageInfo.h"
32 #include "arm_compute/core/NEON/INEKernel.h"
33 #include "arm_compute/core/TensorInfo.h"
34 #include "arm_compute/core/Types.h"
35 #include "arm_compute/core/Validate.h"
36 #include "arm_compute/core/Window.h"
37
38 #include <arm_neon.h>
39
40 using namespace arm_compute;
41
42 namespace arm_compute
43 {
44 class Coordinates;
45 } // namespace arm_compute
46
47 NEChannelExtractKernel::NEChannelExtractKernel()
48     : _func(nullptr), _lut_index(0)
49 {
50 }
51
52 void NEChannelExtractKernel::configure(const ITensor *input, Channel channel, ITensor *output)
53 {
54     ARM_COMPUTE_ERROR_ON(nullptr == input);
55     ARM_COMPUTE_ERROR_ON(nullptr == output);
56     ARM_COMPUTE_ERROR_ON(input == output);
57     ARM_COMPUTE_ERROR_ON(Format::U8 != output->info()->format());
58
59     unsigned int num_elems_processed_per_iteration = 8;
60
61     // Check format and channel
62     const Format       format = input->info()->format();
63     const unsigned int subsampling(((Format::YUYV422 == format || Format::UYVY422 == format) && Channel::Y != channel) ? 2 : 1);
64
65     switch(format)
66     {
67         case Format::RGB888:
68         case Format::RGBA8888:
69             num_elems_processed_per_iteration = 16;
70             _func                             = (Format::RGB888 == format) ? &NEChannelExtractKernel::extract_1C_from_3C_img : &NEChannelExtractKernel::extract_1C_from_4C_img;
71             switch(channel)
72             {
73                 case Channel::R:
74                     _lut_index = 0;
75                     break;
76                 case Channel::G:
77                     _lut_index = 1;
78                     break;
79                 case Channel::B:
80                     _lut_index = 2;
81                     break;
82                 case Channel::A:
83                     if(Format::RGBA8888 == format)
84                     {
85                         _lut_index = 3;
86                         _func      = &NEChannelExtractKernel::extract_1C_from_4C_img;
87                         break;
88                     }
89                 default:
90                     ARM_COMPUTE_ERROR("Not supported channel for this format.");
91                     break;
92             }
93             break;
94         case Format::YUYV422:
95         case Format::UYVY422:
96             switch(channel)
97             {
98                 case Channel::Y:
99                     num_elems_processed_per_iteration = 16;
100                     _func                             = &NEChannelExtractKernel::extract_1C_from_2C_img;
101                     _lut_index                        = (Format::YUYV422 == format) ? 0 : 1;
102                     break;
103                 case Channel::U:
104                     num_elems_processed_per_iteration = 32;
105                     _func                             = &NEChannelExtractKernel::extract_YUYV_uv;
106                     _lut_index                        = (Format::YUYV422 == format) ? 1 : 0;
107                     break;
108                 case Channel::V:
109                     num_elems_processed_per_iteration = 32;
110                     _func                             = &NEChannelExtractKernel::extract_YUYV_uv;
111                     _lut_index                        = (Format::YUYV422 == format) ? 3 : 2;
112                     break;
113                 default:
114                     ARM_COMPUTE_ERROR("Not supported channel for this format.");
115                     break;
116             }
117             break;
118         default:
119             ARM_COMPUTE_ERROR("Not supported format.");
120             break;
121     }
122     _input  = input;
123     _output = output;
124
125     Window                win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
126     AccessWindowRectangle output_access(input->info(), 0, 0, num_elems_processed_per_iteration, 1, 1.f / subsampling, 1.f / subsampling);
127
128     update_window_and_padding(win,
129                               AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration),
130                               output_access);
131
132     ValidRegion input_valid_region = input->info()->valid_region();
133
134     output_access.set_valid_region(win, ValidRegion(std::move(input_valid_region.anchor), output->info()->tensor_shape()));
135
136     INEKernel::configure(win);
137 }
138
139 void NEChannelExtractKernel::configure(const IMultiImage *input, Channel channel, IImage *output)
140 {
141     ARM_COMPUTE_ERROR_ON(nullptr == input);
142     ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output);
143     ARM_COMPUTE_ERROR_ON(static_cast<const void *>(input) == static_cast<void *>(output));
144     ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output, Format::U8);
145
146     unsigned int num_elems_processed_per_iteration = 32;
147
148     const Format &format = input->info()->format();
149
150     switch(format)
151     {
152         case Format::NV12:
153         case Format::NV21:
154             switch(channel)
155             {
156                 case Channel::Y:
157                     _input = input->plane(0);
158                     _func  = &NEChannelExtractKernel::copy_plane;
159                     break;
160                 case Channel::U:
161                     _input                            = input->plane(1);
162                     num_elems_processed_per_iteration = 16;
163                     _func                             = &NEChannelExtractKernel::extract_1C_from_2C_img;
164                     _lut_index                        = (Format::NV12 == format) ? 0 : 1;
165                     break;
166                 case Channel::V:
167                     _input                            = input->plane(1);
168                     num_elems_processed_per_iteration = 16;
169                     _func                             = &NEChannelExtractKernel::extract_1C_from_2C_img;
170                     _lut_index                        = (Format::NV12 == format) ? 1 : 0;
171                     break;
172                 default:
173                     ARM_COMPUTE_ERROR("Not supported channel for this format.");
174                     break;
175             }
176             break;
177         case Format::IYUV:
178         case Format::YUV444:
179             _func = &NEChannelExtractKernel::copy_plane;
180             switch(channel)
181             {
182                 case Channel::Y:
183                     _input = input->plane(0);
184                     break;
185                 case Channel::U:
186                     _input = input->plane(1);
187                     break;
188                 case Channel::V:
189                     _input = input->plane(2);
190                     break;
191                 default:
192                     ARM_COMPUTE_ERROR("Not supported channel for this format.");
193                     break;
194             }
195             break;
196         default:
197             ARM_COMPUTE_ERROR("Not supported format.");
198             break;
199     }
200
201     _output                    = output;
202     Window                 win = calculate_max_window(*_input->info(), Steps(num_elems_processed_per_iteration));
203     AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
204     AccessWindowHorizontal input_access(_input->info(), 0, num_elems_processed_per_iteration);
205     update_window_and_padding(win, input_access, output_access);
206     output_access.set_valid_region(win, _input->info()->valid_region());
207
208     INEKernel::configure(win);
209 }
210
211 void NEChannelExtractKernel::run(const Window &window)
212 {
213     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
214     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window);
215     ARM_COMPUTE_ERROR_ON(_func == nullptr);
216
217     (this->*_func)(window);
218 }
219
220 void NEChannelExtractKernel::extract_1C_from_2C_img(const Window &win)
221 {
222     Iterator in(_input, win);
223     Iterator out(_output, win);
224
225     execute_window_loop(win, [&](const Coordinates & id)
226     {
227         const auto in_ptr  = static_cast<uint8_t *>(in.ptr());
228         const auto out_ptr = static_cast<uint8_t *>(out.ptr());
229         const auto pixels  = vld2q_u8(in_ptr);
230         vst1q_u8(out_ptr, pixels.val[_lut_index]);
231     },
232     in, out);
233 }
234
235 void NEChannelExtractKernel::extract_1C_from_3C_img(const Window &win)
236 {
237     Iterator in(_input, win);
238     Iterator out(_output, win);
239
240     execute_window_loop(win, [&](const Coordinates & id)
241     {
242         const auto in_ptr  = static_cast<uint8_t *>(in.ptr());
243         const auto out_ptr = static_cast<uint8_t *>(out.ptr());
244         const auto pixels  = vld3q_u8(in_ptr);
245         vst1q_u8(out_ptr, pixels.val[_lut_index]);
246     },
247     in, out);
248 }
249
250 void NEChannelExtractKernel::extract_1C_from_4C_img(const Window &win)
251 {
252     Iterator in(_input, win);
253     Iterator out(_output, win);
254
255     execute_window_loop(win, [&](const Coordinates & id)
256     {
257         const auto in_ptr  = static_cast<uint8_t *>(in.ptr());
258         const auto out_ptr = static_cast<uint8_t *>(out.ptr());
259         const auto pixels  = vld4q_u8(in_ptr);
260         vst1q_u8(out_ptr, pixels.val[_lut_index]);
261     },
262     in, out);
263 }
264
265 void NEChannelExtractKernel::extract_YUYV_uv(const Window &win)
266 {
267     ARM_COMPUTE_ERROR_ON(win.x().step() % 2);
268
269     Window win_out(win);
270     win_out.set_dimension_step(Window::DimX, win.x().step() / 2);
271
272     Iterator in(_input, win);
273     Iterator out(_output, win_out);
274
275     execute_window_loop(win, [&](const Coordinates & id)
276     {
277         const auto in_ptr  = static_cast<uint8_t *>(in.ptr());
278         const auto out_ptr = static_cast<uint8_t *>(out.ptr());
279         const auto pixels  = vld4q_u8(in_ptr);
280         vst1q_u8(out_ptr, pixels.val[_lut_index]);
281     },
282     in, out);
283 }
284
285 void NEChannelExtractKernel::copy_plane(const Window &win)
286 {
287     Iterator in(_input, win);
288     Iterator out(_output, win);
289
290     execute_window_loop(win, [&](const Coordinates &)
291     {
292         const auto in_ptr  = static_cast<uint8_t *>(in.ptr());
293         const auto out_ptr = static_cast<uint8_t *>(out.ptr());
294         vst4_u8(out_ptr, vld4_u8(in_ptr));
295     },
296     in, out);
297 }