2 * Copyright (c) 2016-2018 ARM Limited.
4 * SPDX-License-Identifier: MIT
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #ifndef __ARM_COMPUTE_HELPERS_H__
25 #define __ARM_COMPUTE_HELPERS_H__
27 #include "arm_compute/core/Coordinates.h"
28 #include "arm_compute/core/Error.h"
29 #include "arm_compute/core/IAccessWindow.h"
30 #include "arm_compute/core/Steps.h"
31 #include "arm_compute/core/Strides.h"
32 #include "arm_compute/core/TensorShape.h"
33 #include "arm_compute/core/Types.h"
34 #include "arm_compute/core/Window.h"
41 #include <type_traits>
50 /** Disable bitwise operations by default */
52 struct enable_bitwise_ops
54 static constexpr bool value = false; /**< Disabled */
57 #ifndef DOXYGEN_SKIP_THIS
59 typename std::enable_if<enable_bitwise_ops<T>::value, T>::type operator&(T lhs, T rhs)
61 using underlying_type = typename std::underlying_type<T>::type;
62 return static_cast<T>(static_cast<underlying_type>(lhs) & static_cast<underlying_type>(rhs));
64 #endif /* DOXYGEN_SKIP_THIS */
66 /** Helper function to create and return a unique_ptr pointed to a CL/GLES kernel object
67 * It also calls the kernel's configuration.
69 * @param[in] args All the arguments that need pass to kernel's configuration.
71 * @return A unique pointer pointed to a CL/GLES kernel object
73 template <typename Kernel, typename... T>
74 std::unique_ptr<Kernel> create_configure_kernel(T &&... args)
76 std::unique_ptr<Kernel> k = arm_compute::support::cpp14::make_unique<Kernel>();
77 k->configure(std::forward<T>(args)...);
81 /** Helper function to create and return a unique_ptr pointed to a CL/GLES kernel object
83 * @return A unique pointer pointed to a Kernel kernel object
85 template <typename Kernel>
86 std::unique_ptr<Kernel> create_kernel()
88 std::unique_ptr<Kernel> k = arm_compute::support::cpp14::make_unique<Kernel>();
94 /** Check if a type T is contained in a tuple Tuple of types */
95 template <typename T, typename Tuple>
99 struct is_contained<T, std::tuple<>> : std::false_type
103 template <typename T, typename... Ts>
104 struct is_contained<T, std::tuple<T, Ts...>> : std::true_type
108 template <typename T, typename U, typename... Ts>
109 struct is_contained<T, std::tuple<U, Ts...>> : is_contained<T, std::tuple<Ts...>>
114 /** Computes bilinear interpolation using the pointer to the top-left pixel and the pixel's distance between
115 * the real coordinates and the smallest following integer coordinates. Input must be in single channel format.
117 * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input.
118 * @param[in] stride Stride to access the bottom-left and bottom-right pixel values
119 * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer
120 * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer
122 * @note dx and dy must be in the range [0, 1.0]
124 * @return The bilinear interpolated pixel value
126 template <typename T>
127 inline T delta_bilinear_c1(const T *pixel_ptr, size_t stride, float dx, float dy)
129 ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
131 const float dx1 = 1.0f - dx;
132 const float dy1 = 1.0f - dy;
134 const T a00 = *pixel_ptr;
135 const T a01 = *(pixel_ptr + 1);
136 const T a10 = *(pixel_ptr + stride);
137 const T a11 = *(pixel_ptr + stride + 1);
139 const float w1 = dx1 * dy1;
140 const float w2 = dx * dy1;
141 const float w3 = dx1 * dy;
142 const float w4 = dx * dy;
144 return static_cast<T>(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4);
147 /** Computes linear interpolation using the pointer to the top pixel and the pixel's distance between
148 * the real coordinates and the smallest following integer coordinates. Input must be in single channel format.
150 * @param[in] pixel_ptr Pointer to the top pixel value of a single channel input.
151 * @param[in] stride Stride to access the bottom pixel value
152 * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer
154 * @note dy must be in the range [0, 1.0]
156 * @return The linear interpolated pixel value
158 template <typename T>
159 inline T delta_linear_c1_y(const T *pixel_ptr, size_t stride, float dy)
161 ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
163 const float dy1 = 1.0f - dy;
165 const T a00 = *pixel_ptr;
166 const T a10 = *(pixel_ptr + stride);
168 const float w1 = dy1;
171 return static_cast<T>(a00 * w1 + a10 * w3);
173 /** Computes linear interpolation using the pointer to the left pixel and the pixel's distance between
174 * the real coordinates and the smallest following integer coordinates. Input must be in single channel format.
176 * @param[in] pixel_ptr Pointer to the left pixel value of a single channel input.
177 * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer
179 * @note dx must be in the range [0, 1.0]
181 * @return The linear interpolated pixel value
183 template <typename T>
184 inline T delta_linear_c1_x(const T *pixel_ptr, float dx)
186 ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
188 const T a00 = *pixel_ptr;
189 const T a01 = *(pixel_ptr + 1);
191 const float dx1 = 1.0f - dx;
193 const float w1 = dx1;
196 return static_cast<T>(a00 * w1 + a01 * w2);
198 /** Return the pixel at (x,y) using bilinear interpolation.
200 * @warning Only works if the iterator was created with an IImage
202 * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel input.
203 * @param[in] stride Stride in bytes of the image;
204 * @param[in] x X position of the wanted pixel
205 * @param[in] y Y position of the wanted pixel
207 * @return The pixel at (x, y) using bilinear interpolation.
209 template <typename T>
210 inline T pixel_bilinear_c1(const T *first_pixel_ptr, size_t stride, float x, float y)
212 ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr);
214 const int32_t xi = std::floor(x);
215 const int32_t yi = std::floor(y);
217 const float dx = x - xi;
218 const float dy = y - yi;
220 return delta_bilinear_c1(first_pixel_ptr + xi + yi * stride, stride, dx, dy);
223 /** Return the pixel at (x,y) using bilinear interpolation by clamping when out of borders. The image must be single channel input
225 * @warning Only works if the iterator was created with an IImage
227 * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel image.
228 * @param[in] stride Stride in bytes of the image
229 * @param[in] width Width of the image
230 * @param[in] height Height of the image
231 * @param[in] x X position of the wanted pixel
232 * @param[in] y Y position of the wanted pixel
234 * @return The pixel at (x, y) using bilinear interpolation.
236 template <typename T>
237 inline uint8_t pixel_bilinear_c1_clamp(const T *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y)
239 ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr);
241 x = std::max(-1.f, std::min(x, static_cast<float>(width)));
242 y = std::max(-1.f, std::min(y, static_cast<float>(height)));
244 const float xi = std::floor(x);
245 const float yi = std::floor(y);
247 const float dx = x - xi;
248 const float dy = y - yi;
254 return static_cast<T>(first_pixel_ptr[static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride]);
256 return delta_linear_c1_y(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride, stride, dy);
260 return delta_linear_c1_x(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride, dx);
262 return delta_bilinear_c1(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride, stride, dx, dy);
265 /** Return the pixel at (x,y) using area interpolation by clamping when out of borders. The image must be single channel U8
267 * @note The interpolation area depends on the width and height ration of the input and output images
268 * @note Currently average of the contributing pixels is calculated
270 * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image.
271 * @param[in] stride Stride in bytes of the image
272 * @param[in] width Width of the image
273 * @param[in] height Height of the image
274 * @param[in] wr Width ratio among the input image width and output image width.
275 * @param[in] hr Height ratio among the input image height and output image height.
276 * @param[in] x X position of the wanted pixel
277 * @param[in] y Y position of the wanted pixel
279 * @return The pixel at (x, y) using area interpolation.
281 inline uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y);
283 /** Iterator updated by @ref execute_window_loop for each window element */
287 /** Default constructor to create an empty iterator */
288 constexpr Iterator();
289 /** Create a container iterator for the metadata and allocation contained in the ITensor
291 * @param[in] tensor The tensor to associate to the iterator.
292 * @param[in] window The window which will be used to iterate over the tensor.
294 Iterator(const ITensor *tensor, const Window &window);
296 /** Increment the iterator along the specified dimension of the step value associated to the dimension.
298 * @warning It is the caller's responsibility to call increment(dimension+1) when reaching the end of a dimension, the iterator will not check for overflow.
300 * @note When incrementing a dimension 'n' the coordinates of all the dimensions in the range (0,n-1) are reset. For example if you iterate over a 2D image, everytime you change row (dimension 1), the iterator for the width (dimension 0) is reset to its start.
302 * @param[in] dimension Dimension to increment
304 void increment(size_t dimension);
306 /** Return the offset in bytes from the first element to the current position of the iterator
308 * @return The current position of the iterator in bytes relative to the first element.
310 constexpr int offset() const;
312 /** Return a pointer to the current pixel.
314 * @warning Only works if the iterator was created with an ITensor.
316 * @return equivalent to buffer() + offset()
318 constexpr uint8_t *ptr() const;
320 /** Move the iterator back to the beginning of the specified dimension.
322 * @param[in] dimension Dimension to reset
324 void reset(size_t dimension);
332 constexpr Dimension()
333 : _dim_start(0), _stride(0)
341 std::array<Dimension, Coordinates::num_max_dimensions> _dims;
344 /** Iterate through the passed window, automatically adjusting the iterators and calling the lambda_functino for each element.
345 * It passes the x and y positions to the lambda_function for each iteration
347 * @param[in] w Window to iterate through.
348 * @param[in] lambda_function The function of type void(function)( const Coordinates & id ) to call at each iteration.
349 * Where id represents the absolute coordinates of the item to process.
350 * @param[in,out] iterators Tensor iterators which will be updated by this function before calling lambda_function.
352 template <typename L, typename... Ts>
353 inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators);
355 /** Update window and padding size for each of the access patterns.
357 * First the window size is reduced based on all access patterns that are not
358 * allowed to modify the padding of the underlying tensor. Then the padding of
359 * the remaining tensors is increased to match the window.
361 * @param[in] win Window that is used by the kernel.
362 * @param[in] patterns Access patterns used to calculate the final window and padding.
364 * @return True if the window has been changed. Changes to the padding do not
365 * influence the returned value.
367 template <typename... Ts>
368 bool update_window_and_padding(Window &win, Ts &&... patterns)
370 bool window_changed = false;
372 utility::for_each([&](const IAccessWindow & w)
374 window_changed |= w.update_window_if_needed(win);
378 bool padding_changed = false;
380 utility::for_each([&](IAccessWindow & w)
382 padding_changed |= w.update_padding_if_needed(win);
386 return window_changed;
389 /** Calculate the maximum window for a given tensor shape and border setting
391 * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created.
392 * @param[in] steps (Optional) Number of elements processed for each step.
393 * @param[in] skip_border (Optional) If true exclude the border region from the window.
394 * @param[in] border_size (Optional) Border size.
396 * @return The maximum window the kernel can be executed on.
398 Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize());
400 /** Calculate the maximum window for a given tensor shape and border setting
402 * @param[in] info Tensor info object defining the shape of the object for which the window is created.
403 * @param[in] steps (Optional) Number of elements processed for each step.
404 * @param[in] skip_border (Optional) If true exclude the border region from the window.
405 * @param[in] border_size (Optional) Border size.
407 * @return The maximum window the kernel can be executed on.
409 inline Window calculate_max_window(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize())
411 return calculate_max_window(info.valid_region(), steps, skip_border, border_size);
414 /** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting
416 * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created.
417 * @param[in] steps (Optional) Number of elements processed for each step.
418 * @param[in] skip_border (Optional) If true exclude the border region from the window.
419 * @param[in] border_size (Optional) Border size. The border region will be excluded from the window.
421 * @return The maximum window the kernel can be executed on.
423 Window calculate_max_window_horizontal(const ValidRegion &valid_region, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize());
425 /** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting
427 * @param[in] info Tensor info object defining the shape of the object for which the window is created.
428 * @param[in] steps (Optional) Number of elements processed for each step.
429 * @param[in] skip_border (Optional) If true exclude the border region from the window.
430 * @param[in] border_size (Optional) Border size.
432 * @return The maximum window the kernel can be executed on.
434 inline Window calculate_max_window_horizontal(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize())
436 return calculate_max_window_horizontal(info.valid_region(), steps, skip_border, border_size);
439 /** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border.
441 * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created.
442 * @param[in] steps (Optional) Number of elements processed for each step.
443 * @param[in] border_size (Optional) Border size. The border region will be included in the window.
445 * @return The maximum window the kernel can be executed on.
447 Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Steps &steps = Steps(), BorderSize border_size = BorderSize());
449 /** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border.
451 * @param[in] info Tensor info object defining the shape of the object for which the window is created.
452 * @param[in] steps (Optional) Number of elements processed for each step.
453 * @param[in] border_size (Optional) Border size. The border region will be included in the window.
455 * @return The maximum window the kernel can be executed on.
457 inline Window calculate_max_enlarged_window(const ITensorInfo &info, const Steps &steps = Steps(), BorderSize border_size = BorderSize())
459 return calculate_max_enlarged_window(info.valid_region(), steps, border_size);
462 /** Intersect multiple valid regions.
464 * @param[in] regions Valid regions.
466 * @return Intersection of all regions.
468 template <typename... Ts>
469 ValidRegion intersect_valid_regions(const Ts &... regions)
471 auto intersect = [](const ValidRegion & r1, const ValidRegion & r2) -> ValidRegion
475 for(size_t d = 0; d < std::min(r1.anchor.num_dimensions(), r2.anchor.num_dimensions()); ++d)
477 region.anchor.set(d, std::max(r1.anchor[d], r2.anchor[d]));
480 for(size_t d = 0; d < std::min(r1.shape.num_dimensions(), r2.shape.num_dimensions()); ++d)
482 region.shape.set(d, std::min(r1.shape[d], r2.shape[d]));
488 return utility::foldl(intersect, regions...);
491 /** Create a strides object based on the provided strides and the tensor dimensions.
493 * @param[in] info Tensor info object providing the shape of the tensor for unspecified strides.
494 * @param[in] stride_x Stride to be used in X dimension (in bytes).
495 * @param[in] fixed_strides Strides to be used in higher dimensions starting at Y (in bytes).
497 * @return Strides object based on the specified strides. Missing strides are
498 * calculated based on the tensor shape and the strides of lower dimensions.
500 template <typename T, typename... Ts>
501 inline Strides compute_strides(const ITensorInfo &info, T stride_x, Ts &&... fixed_strides)
503 const TensorShape &shape = info.tensor_shape();
505 // Create strides object
506 Strides strides(stride_x, fixed_strides...);
508 for(size_t i = 1 + sizeof...(Ts); i < info.num_dimensions(); ++i)
510 strides.set(i, shape[i - 1] * strides[i - 1]);
516 /** Create a strides object based on the tensor dimensions.
518 * @param[in] info Tensor info object used to compute the strides.
520 * @return Strides object based on element size and tensor shape.
522 template <typename... Ts>
523 inline Strides compute_strides(const ITensorInfo &info)
525 return compute_strides(info, info.element_size());
528 /** Permutes given Dimensions according to a permutation vector
530 * @warning Validity of permutation is not checked
532 * @param[in, out] dimensions Dimensions to permute
533 * @param[in] perm Permutation vector
535 template <typename T>
536 inline void permute(Dimensions<T> &dimensions, const PermutationVector &perm)
538 auto dimensions_copy = utility::make_array<Dimensions<T>::num_max_dimensions>(dimensions.begin(), dimensions.end());
539 for(unsigned int i = 0; i < perm.num_dimensions(); ++i)
541 T dimension_val = (perm[i] < dimensions.num_dimensions()) ? dimensions_copy[perm[i]] : 0;
542 dimensions.set(i, dimension_val);
546 /** Permutes given TensorShape according to a permutation vector
548 * @warning Validity of permutation is not checked
550 * @param[in, out] shape Shape to permute
551 * @param[in] perm Permutation vector
553 inline void permute(TensorShape &shape, const PermutationVector &perm)
555 TensorShape shape_copy = shape;
556 for(unsigned int i = 0; i < perm.num_dimensions(); ++i)
558 size_t dimension_val = (perm[i] < shape.num_dimensions()) ? shape_copy[perm[i]] : 1;
559 shape.set(i, dimension_val, false); // Avoid changes in _num_dimension
563 /** Auto initialize the tensor info (shape, number of channels, data type and fixed point position) if the current assignment is empty.
565 * @param[in,out] info Tensor info used to check and assign.
566 * @param[in] shape New shape.
567 * @param[in] num_channels New number of channels.
568 * @param[in] data_type New data type
569 * @param[in] fixed_point_position New fixed point position
570 * @param[in] quantization_info (Optional) New quantization info
572 * @return True if the tensor info has been initialized
574 bool auto_init_if_empty(ITensorInfo &info,
575 const TensorShape &shape,
576 int num_channels, DataType data_type,
577 int fixed_point_position,
578 QuantizationInfo quantization_info = QuantizationInfo());
580 /** Auto initialize the tensor info using another tensor info.
582 * @param info_sink Tensor info used to check and assign
583 * @param info_source Tensor info used to assign
585 * @return True if the tensor info has been initialized
587 bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_source);
589 /** Set the shape to the specified value if the current assignment is empty.
591 * @param[in,out] info Tensor info used to check and assign.
592 * @param[in] shape New shape.
594 * @return True if the shape has been changed.
596 bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape);
598 /** Set the format, data type and number of channels to the specified value if
599 * the current data type is unknown.
601 * @param[in,out] info Tensor info used to check and assign.
602 * @param[in] format New format.
604 * @return True if the format has been changed.
606 bool set_format_if_unknown(ITensorInfo &info, Format format);
608 /** Set the data type and number of channels to the specified value if
609 * the current data type is unknown.
611 * @param[in,out] info Tensor info used to check and assign.
612 * @param[in] data_type New data type.
614 * @return True if the data type has been changed.
616 bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type);
618 /** Set the data layout to the specified value if
619 * the current data layout is unknown.
621 * @param[in,out] info Tensor info used to check and assign.
622 * @param[in] data_layout New data layout.
624 * @return True if the data type has been changed.
626 bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout);
628 /** Set the fixed point position to the specified value if
629 * the current fixed point position is 0 and the data type is QS8 or QS16
631 * @param[in,out] info Tensor info used to check and assign.
632 * @param[in] fixed_point_position New fixed point position
634 * @return True if the fixed point position has been changed.
636 bool set_fixed_point_position_if_zero(ITensorInfo &info, int fixed_point_position);
638 /** Set the quantization info to the specified value if
639 * the current quantization info is empty and the data type of asymmetric quantized type
641 * @param[in,out] info Tensor info used to check and assign.
642 * @param[in] quantization_info Quantization info
644 * @return True if the quantization info has been changed.
646 bool set_quantization_info_if_empty(ITensorInfo &info, QuantizationInfo quantization_info);
648 /** Helper function to calculate the Valid Region for Scale.
650 * @param[in] src_info Input tensor info used to check.
651 * @param[in] dst_shape Shape of the output.
652 * @param[in] interpolate_policy Interpolation policy.
653 * @param[in] sampling_policy Sampling policy.
654 * @param[in] border_undefined True if the border is undefined.
656 * @return The corresponding valid region
658 ValidRegion calculate_valid_region_scale(const ITensorInfo &src_info, const TensorShape &dst_shape,
659 InterpolationPolicy interpolate_policy, SamplingPolicy sampling_policy, bool border_undefined);
661 /** Convert a linear index into n-dimensional coordinates.
663 * @param[in] shape Shape of the n-dimensional tensor.
664 * @param[in] index Linear index specifying the i-th element.
666 * @return n-dimensional coordinates.
668 inline Coordinates index2coords(const TensorShape &shape, int index);
670 /** Convert n-dimensional coordinates into a linear index.
672 * @param[in] shape Shape of the n-dimensional tensor.
673 * @param[in] coord N-dimensional coordinates.
675 * @return linead index
677 inline int coords2index(const TensorShape &shape, const Coordinates &coord);
679 /** Get the index of the given dimension.
681 * @param[in] data_layout The data layout.
682 * @param[in] data_layout_dimension The dimension which this index is requested for.
684 * @return The int conversion of the requested data layout index.
686 inline size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension);
687 } // namespace arm_compute
689 #include "arm_compute/core/Helpers.inl"
690 #endif /*__ARM_COMPUTE_HELPERS_H__ */