X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=documentation%2F_i_c_l_kernel_8h_source.xhtml;h=7aa354ff05dc5e174a4f4c793d5edf520f6cc636;hb=HEAD;hp=d2943943b601cceb3009b7aab6f21ca8b5bdc0be;hpb=871448ee8eff790c4ccc3250008dd71170cc78b2;p=platform%2Fupstream%2Farmcl.git diff --git a/documentation/_i_c_l_kernel_8h_source.xhtml b/documentation/_i_c_l_kernel_8h_source.xhtml index d294394..7aa354f 100644 --- a/documentation/_i_c_l_kernel_8h_source.xhtml +++ b/documentation/_i_c_l_kernel_8h_source.xhtml @@ -6,7 +6,7 @@ -ARM Compute Library: arm_compute/core/CL/ICLKernel.h Source File +Compute Library: arm_compute/core/CL/ICLKernel.h Source File @@ -39,8 +39,8 @@ -
ARM Compute Library -  17.03.1 +
Compute Library +  18.05
@@ -55,6 +55,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
-Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016, 2017 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef __ARM_COMPUTE_ICLKERNEL_H__
25 #define __ARM_COMPUTE_ICLKERNEL_H__
26 
29 
30 namespace arm_compute
31 {
32 class ICLTensor;
33 class Window;
34 
36 class ICLKernel : public IKernel
37 {
38 public:
40  ICLKernel();
45  cl::Kernel &kernel();
52  void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
59  void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
66  void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
71  unsigned int num_arguments_per_1D_tensor() const;
76  unsigned int num_arguments_per_2D_tensor() const;
81  unsigned int num_arguments_per_3D_tensor() const;
89  virtual void run(const Window &window, cl::CommandQueue &queue) = 0;
95  template <typename T>
96  void add_argument(unsigned int &idx, T value)
97  {
98  _kernel.setArg(idx++, value);
99  }
100 
101 private:
108  template <unsigned int dimension_size>
109  void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
114  template <unsigned int dimension_size>
115  unsigned int num_arguments_per_tensor() const;
116 
117 protected:
118  cl::Kernel _kernel;
119  cl::NDRange _lws_hint;
120 };
121 
135 void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = cl::Range_128_1);
136 }
137 #endif /*__ARM_COMPUTE_ICLKERNEL_H__ */
Common information for all the kernels.
Definition: IKernel.h:33
-
unsigned int num_arguments_per_3D_tensor() const
Returns the number of arguments enqueued per 3D tensor object.
-
cl::Kernel & kernel()
Returns a reference to the OpenCL kernel of this object.
-
void add_argument(unsigned int &idx, T value)
Add the passed parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: ICLKernel.h:96
-
unsigned int num_arguments_per_1D_tensor() const
Returns the number of arguments enqueued per 1D tensor object.
+Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2018 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef __ARM_COMPUTE_ICLKERNEL_H__
25 #define __ARM_COMPUTE_ICLKERNEL_H__
26 
32 
33 #include <string>
34 
35 namespace arm_compute
36 {
37 template <typename T>
38 class ICLArray;
39 class ICLTensor;
40 class Window;
41 
43 class ICLKernel : public IKernel
44 {
45 private:
50  template <unsigned int dimension_size>
51  constexpr static unsigned int num_arguments_per_array()
52  {
53  return num_arguments_per_tensor<dimension_size>();
54  }
59  template <unsigned int dimension_size>
60  constexpr static unsigned int num_arguments_per_tensor()
61  {
62  return 2 + 2 * dimension_size;
63  }
64 
65 public:
68  : _kernel(nullptr), _lws_hint(CLKernelLibrary::get().default_ndrange()), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0)
69  {
70  }
75  cl::Kernel &kernel()
76  {
77  return _kernel;
78  }
87  template <typename T>
88  void add_1D_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window)
89  {
90  add_array_argument<T, 1>(idx, array, strides, num_dimensions, window);
91  }
98  void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
99  {
100  add_tensor_argument<1>(idx, tensor, window);
101  }
108  void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
109  {
110  add_tensor_argument<2>(idx, tensor, window);
111  }
118  void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
119  {
120  add_tensor_argument<3>(idx, tensor, window);
121  }
128  void add_4D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
129  {
130  add_tensor_argument<4>(idx, tensor, window);
131  }
136  constexpr static unsigned int num_arguments_per_1D_array()
137  {
138  return num_arguments_per_array<1>();
139  }
144  constexpr static unsigned int num_arguments_per_1D_tensor()
145  {
146  return num_arguments_per_tensor<1>();
147  }
152  constexpr static unsigned int num_arguments_per_2D_tensor()
153  {
154  return num_arguments_per_tensor<2>();
155  }
160  constexpr static unsigned int num_arguments_per_3D_tensor()
161  {
162  return num_arguments_per_tensor<3>();
163  }
168  constexpr static unsigned int num_arguments_per_4D_tensor()
169  {
170  return num_arguments_per_tensor<4>();
171  }
179  virtual void run(const Window &window, cl::CommandQueue &queue) = 0;
185  template <typename T>
186  void add_argument(unsigned int &idx, T value)
187  {
188  _kernel.setArg(idx++, value);
189  }
190 
197  void set_lws_hint(const cl::NDRange &lws_hint)
198  {
199  _lws_hint = lws_hint;
200  }
201 
206  cl::NDRange lws_hint() const
207  {
208  return _lws_hint;
209  }
210 
221  const std::string &config_id() const
222  {
223  return _config_id;
224  }
225 
230  void set_target(GPUTarget target)
231  {
232  _target = target;
233  }
234 
239  void set_target(cl::Device &device);
240 
246  {
247  return _target;
248  }
249 
254  size_t get_max_workgroup_size();
261  static cl::NDRange gws_from_window(const Window &window);
262 
263 private:
272  template <typename T, unsigned int dimension_size>
273  void add_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window);
280  template <unsigned int dimension_size>
281  void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
282 
283 protected:
284  cl::Kernel _kernel;
285  cl::NDRange _lws_hint;
286  GPUTarget _target;
287  std::string _config_id;
288  size_t _max_workgroup_size;
289 };
290 
304 void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = CLKernelLibrary::get().default_ndrange());
305 
314 template <typename T, unsigned int dimension_size>
315 void ICLKernel::add_array_argument(unsigned &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window)
316 {
317  ARM_COMPUTE_ERROR_ON(array == nullptr);
318 
319  // Calculate offset to the start of the window
320  unsigned int offset_first_element = 0;
321 
322  for(unsigned int n = 0; n < num_dimensions; ++n)
323  {
324  offset_first_element += window[n].start() * strides[n];
325  }
326 
327  unsigned int idx_start = idx;
328  _kernel.setArg(idx++, array->cl_buffer());
329 
330  for(unsigned int dimension = 0; dimension < dimension_size; dimension++)
331  {
332  _kernel.setArg<cl_uint>(idx++, strides[dimension]);
333  _kernel.setArg<cl_uint>(idx++, strides[dimension] * window[dimension].step());
334  }
335 
336  _kernel.setArg<cl_uint>(idx++, offset_first_element);
337 
338  ARM_COMPUTE_ERROR_ON_MSG(idx_start + num_arguments_per_array<dimension_size>() != idx,
339  "add_%dD_array_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_array<dimension_size>());
340  ARM_COMPUTE_UNUSED(idx_start);
341 }
342 }
343 #endif /*__ARM_COMPUTE_ICLKERNEL_H__ */
static constexpr unsigned int num_arguments_per_1D_tensor()
Returns the number of arguments enqueued per 1D tensor object.
Definition: ICLKernel.h:144
+
Common information for all the kernels.
Definition: IKernel.h:33
+
static constexpr unsigned int num_arguments_per_1D_array()
Returns the number of arguments enqueued per 1D array object.
Definition: ICLKernel.h:136
+
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
Definition: ICLKernel.h:206
+
cl::Kernel & kernel()
Returns a reference to the OpenCL kernel of this object.
Definition: ICLKernel.h:75
+
void set_lws_hint(const cl::NDRange &lws_hint)
Set the Local-Workgroup-Size hint.
Definition: ICLKernel.h:197
+
void add_argument(unsigned int &idx, T value)
Add the passed parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: ICLKernel.h:186
+ +
void add_1D_array_argument(unsigned int &idx, const ICLArray< T > *array, const Strides &strides, unsigned int num_dimensions, const Window &window)
Add the passed 1D array&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: ICLKernel.h:88
+
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:328
-
Common interface for all the OpenCL kernels.
Definition: ICLKernel.h:36
-
void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 3D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
- -
unsigned int num_arguments_per_2D_tensor() const
Returns the number of arguments enqueued per 2D tensor object.
-
ICLImage ICLTensor
Definition: CLHistogram.h:34
+
Common interface for all the OpenCL kernels.
Definition: ICLKernel.h:43
+
void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 3D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: ICLKernel.h:118
+
This file contains all available output stages for GEMMLowp on OpenCL.
+
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
+
Interface for OpenCL Array.
Definition: ICLArray.h:35
+
static constexpr unsigned int num_arguments_per_3D_tensor()
Returns the number of arguments enqueued per 3D tensor object.
Definition: ICLKernel.h:160
+
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:159
+
GPUTarget get_target() const
Get the targeted GPU architecture.
Definition: ICLKernel.h:245
+
static constexpr unsigned int num_arguments_per_2D_tensor()
Returns the number of arguments enqueued per 2D tensor object.
Definition: ICLKernel.h:152
+
static constexpr unsigned int num_arguments_per_4D_tensor()
Returns the number of arguments enqueued per 4D tensor object.
Definition: ICLKernel.h:168
+
const std::string & config_id() const
Get the configuration ID.
Definition: ICLKernel.h:221
+
static cl::NDRange gws_from_window(const Window &window)
Get the global work size given an execution window.
+
Strides of an item in bytes.
Definition: Strides.h:37
+
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=CLKernelLibrary::get().default_ndrange())
Add the kernel to the command queue with the given window.
+
virtual void run(const Window &window, cl::CommandQueue &queue)=0
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
-
void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 2D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
-
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=cl::Range_128_1)
Add the kernel to the command queue with the given window.
+
void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 2D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: ICLKernel.h:108
Interface for OpenCL tensor.
Definition: ICLTensor.h:40
-
ICLKernel()
Constructor.
+
ICLKernel()
Constructor.
Definition: ICLKernel.h:67
+
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34
+ + -
void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 1D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
+
CLKernelLibrary class.
+
size_t get_max_workgroup_size()
Get the maximum workgroup size for the device the CLKernelLibrary uses.
+
void set_target(GPUTarget target)
Set the targeted GPU architecture.
Definition: ICLKernel.h:230
+
void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 1D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: ICLKernel.h:98
+
void add_4D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 4D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: ICLKernel.h:128
+
virtual const cl::Buffer & cl_buffer() const =0
Interface to be implemented by the child class to return a reference to the OpenCL buffer containing ...
const Window & window() const
The maximum window the kernel can be executed on.
Describe a multidimensional execution window.
Definition: Window.h:39
+
#define ARM_COMPUTE_ERROR_ON_MSG(cond,...)
Definition: Error.h:319