arm_compute v17.04
[platform/upstream/armcl.git] / src / core / CL / kernels / CLLKTrackerKernel.cpp
1 /*
2  * Copyright (c) 2017 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h"
25
26 #include "arm_compute/core/AccessWindowStatic.h"
27 #include "arm_compute/core/CL/CLKernelLibrary.h"
28 #include "arm_compute/core/CL/ICLArray.h"
29 #include "arm_compute/core/CL/ICLTensor.h"
30 #include "arm_compute/core/Coordinates.h"
31 #include "arm_compute/core/Error.h"
32 #include "arm_compute/core/Helpers.h"
33 #include "arm_compute/core/TensorInfo.h"
34 #include "arm_compute/core/Validate.h"
35 #include "arm_compute/core/Window.h"
36
37 #include <cmath>
38
39 using namespace arm_compute;
40
41 void CLLKTrackerInitKernel::configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates,
42                                       ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
43                                       bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale)
44
45 {
46     ARM_COMPUTE_ERROR_ON(old_points == nullptr);
47     ARM_COMPUTE_ERROR_ON(old_points_internal == nullptr);
48     ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr);
49
50     const float scale = std::pow(pyramid_scale, level);
51
52     // Create kernel
53     std::string kernel_name = "init_level";
54     if(level == (num_levels - 1))
55     {
56         kernel_name += (use_initial_estimate) ? std::string("_max_initial_estimate") : std::string("_max");
57     }
58     _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name));
59
60     // Set static kernel arguments
61     unsigned int idx = 0;
62     if(level == (num_levels - 1))
63     {
64         _kernel.setArg(idx++, old_points->cl_buffer());
65         if(use_initial_estimate)
66         {
67             _kernel.setArg(idx++, new_points_estimates->cl_buffer());
68         }
69     }
70     _kernel.setArg(idx++, old_points_internal->cl_buffer());
71     _kernel.setArg(idx++, new_points_internal->cl_buffer());
72     _kernel.setArg<cl_float>(idx++, scale);
73
74     // Configure kernel window
75     Window window;
76     window.set(Window::DimX, Window::Dimension(0, old_points->num_values(), 1));
77     window.set(Window::DimY, Window::Dimension(0, 1, 1));
78     ICLKernel::configure(window);
79 }
80
81 void CLLKTrackerInitKernel::run(const Window &window, cl::CommandQueue &queue)
82 {
83     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
84     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
85
86     enqueue(queue, *this, window);
87 }
88
89 void CLLKTrackerFinalizeKernel::configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points)
90
91 {
92     ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr);
93     ARM_COMPUTE_ERROR_ON(new_points == nullptr);
94
95     // Create kernel
96     _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("finalize"));
97
98     // Set static kernel arguments
99     unsigned int idx = 0;
100     _kernel.setArg(idx++, new_points_internal->cl_buffer());
101     _kernel.setArg(idx++, new_points->cl_buffer());
102
103     // Configure kernel window
104     Window window;
105     window.set(Window::DimX, Window::Dimension(0, new_points_internal->num_values(), 1));
106     window.set(Window::DimY, Window::Dimension(0, 1, 1));
107     ICLKernel::configure(window);
108 }
109
110 void CLLKTrackerFinalizeKernel::run(const Window &window, cl::CommandQueue &queue)
111 {
112     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
113     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
114
115     enqueue(queue, *this, window);
116 }
117
118 CLLKTrackerStage0Kernel::CLLKTrackerStage0Kernel()
119     : _old_input(nullptr), _old_scharr_gx(nullptr), _old_scharr_gy(nullptr)
120 {
121 }
122
123 void CLLKTrackerStage0Kernel::configure(const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy,
124                                         ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
125                                         ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
126                                         size_t window_dimension, size_t level)
127
128 {
129     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(old_input, 1, DataType::U8);
130     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(old_scharr_gx, 1, DataType::S16);
131     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(old_scharr_gy, 1, DataType::S16);
132     ARM_COMPUTE_ERROR_ON(old_points_internal == nullptr);
133     ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr);
134     ARM_COMPUTE_ERROR_ON(coeff_table == nullptr);
135     ARM_COMPUTE_ERROR_ON(old_ival == nullptr);
136
137     _old_input     = old_input;
138     _old_scharr_gx = old_scharr_gx;
139     _old_scharr_gy = old_scharr_gy;
140
141     // Configure kernel window
142     Window window;
143     window.set(Window::DimX, Window::Dimension(0, new_points_internal->num_values(), 1));
144     window.set(Window::DimY, Window::Dimension(0, 1, 1));
145
146     const ValidRegion valid_region = intersect_valid_regions(
147                                          old_input->info()->valid_region(),
148                                          old_scharr_gx->info()->valid_region(),
149                                          old_scharr_gy->info()->valid_region());
150
151     update_window_and_padding(window,
152                               AccessWindowStatic(old_input->info(), valid_region.anchor[0], valid_region.anchor[1],
153                                                  valid_region.shape[0], valid_region.shape[1]),
154                               AccessWindowStatic(old_scharr_gx->info(), valid_region.anchor[0], valid_region.anchor[1],
155                                                  valid_region.shape[0], valid_region.shape[1]),
156                               AccessWindowStatic(old_scharr_gy->info(), valid_region.anchor[0], valid_region.anchor[1],
157                                                  valid_region.shape[0], valid_region.shape[1]));
158
159     ICLKernel::configure(window);
160
161     // Initialize required variables
162     const int       level0              = (level == 0) ? 1 : 0;
163     const int       window_size         = window_dimension;
164     const int       window_size_squared = window_dimension * window_dimension;
165     const int       window_size_half    = window_dimension / 2;
166     const float     eig_const           = 1.0f / (2.0f * window_size_squared);
167     const cl_float3 border_limits =
168     {
169         {
170             // -1 because we load 2 values at once for bilinear interpolation
171             static_cast<float>(valid_region.anchor[0] + static_cast<int>(valid_region.shape[0]) - window_size - 1),
172             static_cast<float>(valid_region.anchor[1] + static_cast<int>(valid_region.shape[1]) - window_size - 1),
173             static_cast<float>(valid_region.anchor[0])
174         }
175     };
176
177     // Create kernel
178     _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("lktracker_stage0"));
179
180     // Set arguments
181     unsigned int idx = 3 * num_arguments_per_2D_tensor();
182     _kernel.setArg(idx++, old_points_internal->cl_buffer());
183     _kernel.setArg(idx++, new_points_internal->cl_buffer());
184     _kernel.setArg(idx++, coeff_table->cl_buffer());
185     _kernel.setArg(idx++, old_ival->cl_buffer());
186     _kernel.setArg<cl_int>(idx++, window_size);
187     _kernel.setArg<cl_int>(idx++, window_size_squared);
188     _kernel.setArg<cl_int>(idx++, window_size_half);
189     _kernel.setArg<cl_float3>(idx++, border_limits);
190     _kernel.setArg<cl_float>(idx++, eig_const);
191     _kernel.setArg<cl_int>(idx++, level0);
192 }
193
194 void CLLKTrackerStage0Kernel::run(const Window &window, cl::CommandQueue &queue)
195 {
196     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
197     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
198
199     // Set static tensor arguments. Setting here as allocation might be deferred.
200     unsigned int idx = 0;
201     add_2D_tensor_argument(idx, _old_input, window);
202     add_2D_tensor_argument(idx, _old_scharr_gx, window);
203     add_2D_tensor_argument(idx, _old_scharr_gy, window);
204
205     enqueue(queue, *this, window);
206 }
207
208 CLLKTrackerStage1Kernel::CLLKTrackerStage1Kernel()
209     : _new_input(nullptr)
210 {
211 }
212
213 void CLLKTrackerStage1Kernel::configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
214                                         Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level)
215
216 {
217     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(new_input, 1, DataType::U8);
218     ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr);
219     ARM_COMPUTE_ERROR_ON(coeff_table == nullptr);
220     ARM_COMPUTE_ERROR_ON(old_ival == nullptr);
221
222     _new_input = new_input;
223
224     // Configure kernel window
225     Window window;
226     window.set(Window::DimX, Window::Dimension(0, new_points_internal->num_values(), 1));
227     window.set(Window::DimY, Window::Dimension(0, 1, 1));
228
229     const ValidRegion &valid_region = new_input->info()->valid_region();
230
231     update_window_and_padding(window,
232                               AccessWindowStatic(new_input->info(), valid_region.anchor[0], valid_region.anchor[1],
233                                                  valid_region.shape[0], valid_region.shape[1]));
234
235     ICLKernel::configure(window);
236
237     // Initialize required variables
238     const int       level0              = (level == 0) ? 1 : 0;
239     const int       window_size         = window_dimension;
240     const int       window_size_squared = window_dimension * window_dimension;
241     const int       window_size_half    = window_dimension / 2;
242     const float     eig_const           = 1.0f / (2.0f * window_size_squared);
243     const cl_float3 border_limits =
244     {
245         {
246             // -1 because we load 2 values at once for bilinear interpolation
247             static_cast<float>(valid_region.anchor[0] + static_cast<int>(valid_region.shape[0]) - window_size - 1),
248             static_cast<float>(valid_region.anchor[1] + static_cast<int>(valid_region.shape[1]) - window_size - 1),
249             static_cast<float>(valid_region.anchor[0])
250         }
251     };
252     const int term_iteration = (termination == Termination::TERM_CRITERIA_ITERATIONS || termination == Termination::TERM_CRITERIA_BOTH) ? 1 : 0;
253     const int term_epsilon   = (termination == Termination::TERM_CRITERIA_EPSILON || termination == Termination::TERM_CRITERIA_BOTH) ? 1 : 0;
254
255     // Create kernel
256     _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("lktracker_stage1"));
257
258     // Set static kernel arguments
259     unsigned int idx = num_arguments_per_2D_tensor();
260     _kernel.setArg(idx++, new_points_internal->cl_buffer());
261     _kernel.setArg(idx++, coeff_table->cl_buffer());
262     _kernel.setArg(idx++, old_ival->cl_buffer());
263     _kernel.setArg<cl_int>(idx++, window_size);
264     _kernel.setArg<cl_int>(idx++, window_size_squared);
265     _kernel.setArg<cl_int>(idx++, window_size_half);
266     _kernel.setArg<cl_int>(idx++, num_iterations);
267     _kernel.setArg<cl_float>(idx++, epsilon);
268     _kernel.setArg<cl_float3>(idx++, border_limits);
269     _kernel.setArg<cl_float>(idx++, eig_const);
270     _kernel.setArg<cl_int>(idx++, level0);
271     _kernel.setArg<cl_int>(idx++, term_iteration);
272     _kernel.setArg<cl_int>(idx++, term_epsilon);
273 }
274
275 void CLLKTrackerStage1Kernel::run(const Window &window, cl::CommandQueue &queue)
276 {
277     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
278     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
279
280     // Set static tensor arguments. Setting here as allocation might be deferred.
281     unsigned int idx = 0;
282     add_2D_tensor_argument(idx, _new_input, window);
283
284     enqueue(queue, *this, window);
285 }