2 * Copyright (c) 2016-2018 ARM Limited.
4 * SPDX-License-Identifier: MIT
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #ifndef ARM_COMPUTE_HELPER_H
25 #define ARM_COMPUTE_HELPER_H
27 #if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
28 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
29 #endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
31 #if defined(ARM_COMPUTE_DEBUG_ENABLED)
32 #pragma OPENCL EXTENSION cl_arm_printf : enable
33 #endif // defined(ARM_COMPUTE_DEBUG_ENABLED)
37 #define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val)
39 #define VLOAD_STR(size) vload##size
40 #define VLOAD(size) VLOAD_STR(size)
42 #define VSTORE_STR(size) vstore##size
43 #define VSTORE(size) VSTORE_STR(size)
45 #define VEC_DATA_TYPE_STR(type, size) type##size
46 #define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size)
48 #define CL_VEC_DATA_TYPE_STR(type, size) type##size
49 #define CL_VEC_DATA_TYPE(type, size) CL_VEC_DATA_TYPE_STR(type, size)
51 #define CONVERT_STR(x, type) (convert_##type((x)))
52 #define CONVERT(x, type) CONVERT_STR(x, type)
54 #define CONVERT_SAT_STR(x, type) (convert_##type##_sat((x)))
55 #define CONVERT_SAT(x, type) CONVERT_SAT_STR(x, type)
57 #define CONVERT_SAT_ROUND_STR(x, type, round) (convert_##type##_sat_##round((x)))
58 #define CONVERT_SAT_ROUND(x, type, round) CONVERT_SAT_ROUND_STR(x, type, round)
60 #define VECTOR_DECLARATION(name) \
61 __global uchar *name##_ptr, \
62 uint name##_stride_x, \
64 uint name##_offset_first_element_in_bytes
66 #define IMAGE_DECLARATION(name) \
67 __global uchar *name##_ptr, \
68 uint name##_stride_x, \
70 uint name##_stride_y, \
72 uint name##_offset_first_element_in_bytes
74 #define TENSOR3D_DECLARATION(name) \
75 __global uchar *name##_ptr, \
76 uint name##_stride_x, \
78 uint name##_stride_y, \
80 uint name##_stride_z, \
82 uint name##_offset_first_element_in_bytes
84 #define TENSOR4D_DECLARATION(name) \
85 __global uchar *name##_ptr, \
86 uint name##_stride_x, \
88 uint name##_stride_y, \
90 uint name##_stride_z, \
92 uint name##_stride_w, \
94 uint name##_offset_first_element_in_bytes
96 #define CONVERT_TO_VECTOR_STRUCT(name) \
97 update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)
99 #define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \
100 update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0)
102 #define CONVERT_TO_IMAGE_STRUCT(name) \
103 update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)
105 #define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \
106 update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0)
108 #define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
109 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
111 #define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \
112 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, name##_step_z)
114 #define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
115 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
117 #define CONVERT_TO_TENSOR3D_STRUCT(name) \
118 update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
119 name##_stride_z, name##_step_z)
121 #define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \
122 update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, 0)
124 #define CONVERT_TO_TENSOR4D_STRUCT(name, mod_size) \
125 update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
126 name##_stride_z, name##_step_z, name##_stride_w, name##_step_w, mod_size)
128 #define CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(name, mod_size) \
129 update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, 0, name##_stride_w, 0, mod_size)
131 /** Structure to hold Vector information */
132 typedef struct Vector
134 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
135 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
136 int stride_x; /**< Stride of the image in X dimension (in bytes) */
139 /** Structure to hold Image information */
142 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
143 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
144 int stride_x; /**< Stride of the image in X dimension (in bytes) */
145 int stride_y; /**< Stride of the image in Y dimension (in bytes) */
148 /** Structure to hold 3D tensor information */
149 typedef struct Tensor3D
151 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
152 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
153 int stride_x; /**< Stride of the image in X dimension (in bytes) */
154 int stride_y; /**< Stride of the image in Y dimension (in bytes) */
155 int stride_z; /**< Stride of the image in Z dimension (in bytes) */
158 /** Structure to hold 4D tensor information */
159 typedef struct Tensor4D
161 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
162 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
163 int stride_x; /**< Stride of the image in X dimension (in bytes) */
164 int stride_y; /**< Stride of the image in Y dimension (in bytes) */
165 int stride_z; /**< Stride of the image in Z dimension (in bytes) */
166 int stride_w; /**< Stride of the image in W dimension (in bytes) */
169 /** Wrap vector information into an Vector structure, and make the pointer point at this workitem's data.
171 * @param[in] ptr Pointer to the starting postion of the buffer
172 * @param[in] offset_first_element_in_bytes The offset of the first element in the source vector
173 * @param[in] stride_x Stride of the vector in X dimension (in bytes)
174 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
176 * @return An image object
178 Vector inline update_vector_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x)
183 .offset_first_element_in_bytes = offset_first_element_in_bytes,
184 .stride_x = stride_x,
186 vector.ptr += vector.offset_first_element_in_bytes + get_global_id(0) * step_x;
190 /** Wrap image information into an Image structure, and make the pointer point at this workitem's data.
192 * @param[in] ptr Pointer to the starting postion of the buffer
193 * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
194 * @param[in] stride_x Stride of the image in X dimension (in bytes)
195 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
196 * @param[in] stride_y Stride of the image in Y dimension (in bytes)
197 * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
199 * @return An image object
201 Image inline update_image_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
206 .offset_first_element_in_bytes = offset_first_element_in_bytes,
207 .stride_x = stride_x,
210 img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y;
214 /** Wrap 3D tensor information into an image structure, and make the pointer point at this workitem's data.
216 * @param[in] ptr Pointer to the starting postion of the buffer
217 * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
218 * @param[in] stride_x Stride of the image in X dimension (in bytes)
219 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
220 * @param[in] stride_y Stride of the image in Y dimension (in bytes)
221 * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
222 * @param[in] stride_z Stride of the image in Z dimension (in bytes)
223 * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
225 * @return A 3D tensor object
227 Image inline update_image_from_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
232 .offset_first_element_in_bytes = offset_first_element_in_bytes,
233 .stride_x = stride_x,
236 img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + get_global_id(2) * step_z;
240 /** Wrap 3D tensor information into an tensor structure, and make the pointer point at this workitem's data.
242 * @param[in] ptr Pointer to the starting postion of the buffer
243 * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
244 * @param[in] stride_x Stride of the image in X dimension (in bytes)
245 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
246 * @param[in] stride_y Stride of the image in Y dimension (in bytes)
247 * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
248 * @param[in] stride_z Stride of the image in Z dimension (in bytes)
249 * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
251 * @return A 3D tensor object
253 Tensor3D inline update_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
258 .offset_first_element_in_bytes = offset_first_element_in_bytes,
259 .stride_x = stride_x,
260 .stride_y = stride_y,
263 tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + get_global_id(2) * step_z;
267 Tensor4D inline update_tensor4D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z, uint stride_w,
274 .offset_first_element_in_bytes = offset_first_element_in_bytes,
275 .stride_x = stride_x,
276 .stride_y = stride_y,
277 .stride_z = stride_z,
281 tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + (get_global_id(2) % mod_size) * step_z + (get_global_id(2) / mod_size) * step_w;
285 /** Get the pointer position of a Vector
287 * @param[in] vec Pointer to the starting position of the buffer
288 * @param[in] x Relative X position
290 __global inline const uchar *vector_offset(const Vector *vec, int x)
292 return vec->ptr + x * vec->stride_x;
295 /** Get the pointer position of a Image
297 * @param[in] img Pointer to the starting position of the buffer
298 * @param[in] x Relative X position
299 * @param[in] y Relative Y position
301 __global inline uchar *offset(const Image *img, int x, int y)
303 return img->ptr + x * img->stride_x + y * img->stride_y;
306 /** Get the pointer position of a Tensor3D
308 * @param[in] tensor Pointer to the starting position of the buffer
309 * @param[in] x Relative X position
310 * @param[in] y Relative Y position
311 * @param[in] z Relative Z position
313 __global inline const uchar *tensor3D_offset(const Tensor3D *tensor, int x, int y, int z)
315 return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z;
318 /** Get the pointer position of a Tensor4D
320 * @param[in] tensor Pointer to the starting position of the buffer
321 * @param[in] x Relative X position
322 * @param[in] y Relative Y position
323 * @param[in] z Relative Z position
324 * @param[in] w Relative W position
326 __global inline const uchar *tensor4D_offset(const Tensor4D *tensor, int x, int y, int z, int w)
328 return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z + w * tensor->stride_w;