arm_compute/core/Helpers.h

   1 /*
   2  * Copyright (c) 2016, 2017 ARM Limited.
   3  *
   4  * SPDX-License-Identifier: MIT
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7  * of this software and associated documentation files (the "Software"), to
   8  * deal in the Software without restriction, including without limitation the
   9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10  * sell copies of the Software, and to permit persons to whom the Software is
  11  * furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in all
  14  * copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  */
  24 #ifndef __ARM_COMPUTE_HELPERS_H__
  25 #define __ARM_COMPUTE_HELPERS_H__
  26
  27 #include "arm_compute/core/Coordinates.h"
  28 #include "arm_compute/core/IAccessWindow.h"
  29 #include "arm_compute/core/Steps.h"
  30 #include "arm_compute/core/Strides.h"
  31 #include "arm_compute/core/TensorShape.h"
  32 #include "arm_compute/core/Window.h"
  33
  34 #include <array>
  35 #include <cstddef>
  36 #include <cstdint>
  37 #include <memory>
  38 #include <tuple>
  39 #include <type_traits>
  40 #include <utility>
  41
  42 namespace arm_compute
  43 {
  44 class IKernel;
  45 class ITensor;
  46 class TensorInfo;
  47
  48 namespace cpp14
  49 {
  50 #ifndef DOXYGEN_SKIP_THIS /* Doxygen gets confused by the templates and can't match the implementation to the declaration */
  51 template <class T>
  52 struct _Unique_if
  53 {
  54     typedef std::unique_ptr<T> _Single_object;
  55 };
  56
  57 template <class T>
  58 struct _Unique_if<T[]>
  59 {
  60     typedef std::unique_ptr<T[]> _Unknown_bound;
  61 };
  62
  63 template <class T, size_t N>
  64 struct _Unique_if<T[N]>
  65 {
  66     typedef void _Known_bound;
  67 };
  68
  69 template <class T, class... Args>
  70 typename _Unique_if<T>::_Single_object
  71 make_unique(Args &&... args)
  72 {
  73     return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
  74 }
  75
  76 template <class T>
  77 typename _Unique_if<T>::_Unknown_bound
  78 make_unique(size_t n)
  79 {
  80     typedef typename std::remove_extent<T>::type U;
  81     return std::unique_ptr<T>(new U[n]());
  82 }
  83
  84 template <class T, class... Args>
  85 typename _Unique_if<T>::_Known_bound
  86 make_unique(Args &&...) = delete;
  87 #endif /* DOXYGEN_SKIP_THIS */
  88 }
  89 }
  90
  91 namespace
  92 {
  93 /** Computes bilinear interpolation using the pointer to the top-left pixel and the pixel's distance between
  94  * the real coordinates and the smallest following integer coordinates.
  95  *
  96  * @param[in] pixel_ptr Pointer to the top-left pixel value. Format: Single channel U8
  97  * @param[in] stride    Stride to access the bottom-left and bottom-right pixel values
  98  * @param[in] dx        Pixel's distance between the X real coordinate and the smallest X following integer
  99  * @param[in] dy        Pixel's distance between the Y real coordinate and the smallest Y following integer
 100  *
 101  * @note dx and dy must be in the range [0, 1.0]
 102  *
 103  * @return The bilinear interpolated pixel value
 104  */
 105 inline uint8_t delta_bilinear_c1u8(const uint8_t *pixel_ptr, size_t stride, float dx, float dy);
 106
 107 /** Return the pixel at (x,y) using bilinear interpolation. The image must be single channel U8
 108  *
 109  * @warning Only works if the iterator was created with an IImage
 110  *
 111  * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image.
 112  * @param[in] stride          Stride in bytes of the image;
 113  * @param[in] x               X position of the wanted pixel
 114  * @param[in] y               Y position of the wanted pixel
 115  *
 116  * @return The pixel at (x, y) using bilinear interpolation.
 117  */
 118 inline uint8_t pixel_bilinear_c1u8(const uint8_t *first_pixel_ptr, size_t stride, float x, float y);
 119
 120 /** Return the pixel at (x,y) using bilinear interpolation by clamping when out of borders. The image must be single channel U8
 121  *
 122  * @warning Only works if the iterator was created with an IImage
 123  *
 124  * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image.
 125  * @param[in] stride          Stride in bytes of the image
 126  * @param[in] width           Width of the image
 127  * @param[in] height          Height of the image
 128  * @param[in] x               X position of the wanted pixel
 129  * @param[in] y               Y position of the wanted pixel
 130  *
 131  * @return The pixel at (x, y) using bilinear interpolation.
 132  */
 133 inline uint8_t pixel_bilinear_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y);
 134
 135 /** Return the pixel at (x,y) using area interpolation by clamping when out of borders. The image must be single channel U8
 136  *
 137  * @note The interpolation area depends on the width and height ration of the input and output images
 138  * @note Currently average of the contributing pixels is calculated
 139  *
 140  * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image.
 141  * @param[in] stride          Stride in bytes of the image
 142  * @param[in] width           Width of the image
 143  * @param[in] height          Height of the image
 144  * @param[in] wr              Width ratio among the input image width and output image width.
 145  * @param[in] hr              Height ratio among the input image height and output image height.
 146  * @param[in] x               X position of the wanted pixel
 147  * @param[in] y               Y position of the wanted pixel
 148  *
 149  * @return The pixel at (x, y) using area interpolation.
 150  */
 151 inline uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y);
 152
 153 /** Performs clamping among a lower and upper value.
 154  *
 155  * @param[in] n     Value to clamp.
 156  * @param[in] lower Lower threshold.
 157  * @param[in] upper Upper threshold.
 158  *
 159  *  @return Clamped value.
 160  */
 161 template <typename T>
 162 inline T clamp(const T &n, const T &lower, const T &upper)
 163 {
 164     return std::max(lower, std::min(n, upper));
 165 }
 166
 167 /** Base case of for_each. Does nothing. */
 168 template <typename F>
 169 inline void for_each(F &&)
 170 {
 171 }
 172
 173 /** Call the function for each of the arguments
 174  *
 175  * @param[in] func Function to be called
 176  * @param[in] arg  Argument passed to the function
 177  * @param[in] args Remaining arguments
 178  */
 179 template <typename F, typename T, typename... Ts>
 180 inline void for_each(F &&func, T &&arg, Ts &&... args)
 181 {
 182     func(arg);
 183     for_each(func, args...);
 184 }
 185
 186 /** Base case of foldl.
 187  *
 188  * @return value.
 189  */
 190 template <typename F, typename T>
 191 inline T foldl(F &&, const T &value)
 192 {
 193     return value;
 194 }
 195
 196 /** Base case of foldl.
 197  *
 198  * @return Function evaluation for value1 and value2
 199  */
 200 template <typename F, typename T, typename U>
 201 inline auto foldl(F &&func, T &&value1, U &&value2) -> decltype(func(value1, value2))
 202 {
 203     return func(value1, value2);
 204 }
 205
 206 /** Fold left.
 207  *
 208  * @param[in] func    Function to be called
 209  * @param[in] initial Initial value
 210  * @param[in] value   Argument passed to the function
 211  * @param[in] values  Remaining arguments
 212  */
 213 template <typename F, typename I, typename T, typename... Vs>
 214 inline I foldl(F &&func, I &&initial, T &&value, Vs &&... values)
 215 {
 216     return foldl(std::forward<F>(func), func(std::forward<I>(initial), std::forward<T>(value)), std::forward<Vs>(values)...);
 217 }
 218 }
 219
 220 namespace arm_compute
 221 {
 222 /** Iterator updated by @ref execute_window_loop for each window element */
 223 class Iterator
 224 {
 225 public:
 226     /** Default constructor to create an empty iterator */
 227     constexpr Iterator();
 228     /** Create a container iterator for the metadata and allocation contained in the ITensor
 229      *
 230      * @param[in] tensor The tensor to associate to the iterator.
 231      * @param[in] window The window which will be used to iterate over the tensor.
 232      */
 233     Iterator(const ITensor *tensor, const Window &window);
 234
 235     /** Increment the iterator along the specified dimension of the step value associated to the dimension.
 236      *
 237      * @warning It is the caller's responsibility to call increment(dimension+1) when reaching the end of a dimension, the iterator will not check for overflow.
 238      *
 239      * @note When incrementing a dimension 'n' the coordinates of all the dimensions in the range (0,n-1) are reset. For example if you iterate over a 2D image, everytime you change row (dimension 1), the iterator for the width (dimension 0) is reset to its start.
 240      *
 241      * @param[in] dimension Dimension to increment
 242      */
 243     void increment(size_t dimension);
 244
 245     /** Return the offset in bytes from the first element to the current position of the iterator
 246      *
 247      * @return The current position of the iterator in bytes relative to the first element.
 248      */
 249     constexpr int offset() const;
 250
 251     /** Return a pointer to the current pixel.
 252      *
 253      * @warning Only works if the iterator was created with an ITensor.
 254      *
 255      * @return equivalent to  buffer() + offset()
 256      */
 257     constexpr uint8_t *ptr() const;
 258
 259     /** Move the iterator back to the beginning of the specified dimension.
 260      *
 261      * @param[in] dimension Dimension to reset
 262      */
 263     void reset(size_t dimension);
 264
 265 private:
 266     uint8_t *_ptr;
 267
 268     class Dimension
 269     {
 270     public:
 271         constexpr Dimension()
 272             : _dim_start(0), _stride(0)
 273         {
 274         }
 275
 276         int _dim_start;
 277         int _stride;
 278     };
 279
 280     std::array<Dimension, Coordinates::num_max_dimensions> _dims;
 281 };
 282
 283 /** Iterate through the passed window, automatically adjusting the iterators and calling the lambda_functino for each element.
 284  *  It passes the x and y positions to the lambda_function for each iteration
 285  *
 286  * @param[in]     w               Window to iterate through.
 287  * @param[in]     lambda_function The function of type void(function)( const Coordinates & id ) to call at each iteration.
 288  *                                Where id represents the absolute coordinates of the item to process.
 289  * @param[in,out] iterators       Tensor iterators which will be updated by this function before calling lambda_function.
 290  */
 291 template <typename L, typename... Ts>
 292 inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators);
 293
 294 /** Update window and padding size for each of the access patterns.
 295  *
 296  * First the window size is reduced based on all access patterns that are not
 297  * allowed to modify the padding of the underlying tensor. Then the padding of
 298  * the remaining tensors is increased to match the window.
 299  *
 300  * @param[in] win      Window that is used by the kernel.
 301  * @param[in] patterns Access patterns used to calculate the final window and padding.
 302  *
 303  * @return True if the window has been changed. Changes to the padding do not
 304  *         influence the returned value.
 305  */
 306 template <typename... Ts>
 307 bool update_window_and_padding(Window &win, Ts &&... patterns)
 308 {
 309     bool window_changed = false;
 310
 311     for_each([&](const IAccessWindow & w)
 312     {
 313         window_changed |= w.update_window_if_needed(win);
 314     },
 315     patterns...);
 316
 317     bool padding_changed = false;
 318
 319     for_each([&](const IAccessWindow & w)
 320     {
 321         padding_changed |= w.update_padding_if_needed(win);
 322     },
 323     patterns...);
 324
 325     return window_changed;
 326 }
 327
 328 /** Calculate the maximum window for a given tensor shape and border setting
 329  *
 330  * @param[in] info        Tensor info object defining the shape of the object for which the window is created.
 331  * @param[in] steps       (Optional) Number of elements processed for each step.
 332  * @param[in] skip_border (Optional) If true exclude the border region from the window.
 333  * @param[in] border_size (Optional) Border size.
 334  *
 335  * @return The maximum window the kernel can be executed on.
 336  */
 337 Window calculate_max_window(const TensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize());
 338
 339 /** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting
 340  *
 341  * @param[in] info        Tensor info object defining the shape of the object for which the window is created.
 342  * @param[in] steps       (Optional) Number of elements processed for each step.
 343  * @param[in] skip_border (Optional) If true exclude the border region from the window.
 344  * @param[in] border_size (Optional) Border size. The border region will be excluded from the window.
 345  *
 346  * @return The maximum window the kernel can be executed on.
 347  */
 348 Window calculate_max_window_horizontal(const TensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize());
 349
 350 /** Intersect multiple valid regions.
 351  *
 352  * @param[in] regions Valid regions.
 353  *
 354  * @return Intersection of all regions.
 355  */
 356 template <typename... Ts>
 357 ValidRegion intersect_valid_regions(Ts &&... regions)
 358 {
 359     auto intersect = [](const ValidRegion & r1, const ValidRegion & r2) -> ValidRegion
 360     {
 361         ValidRegion region;
 362
 363         for(size_t d = 0; d < std::min(r1.anchor.num_dimensions(), r2.anchor.num_dimensions()); ++d)
 364         {
 365             region.anchor.set(d, std::max(r1.anchor[d], r2.anchor[d]));
 366         }
 367
 368         for(size_t d = 0; d < std::min(r1.shape.num_dimensions(), r2.shape.num_dimensions()); ++d)
 369         {
 370             region.shape.set(d, std::min(r1.shape[d], r2.shape[d]));
 371         }
 372
 373         return region;
 374     };
 375
 376     return foldl(intersect, std::forward<Ts>(regions)...);
 377 }
 378
 379 /** Create a strides object based on the provided strides and the tensor dimensions.
 380  *
 381  * @param[in] info          Tensor info object providing the shape of the tensor for unspecified strides.
 382  * @param[in] stride_x      Stride to be used in X dimension (in bytes).
 383  * @param[in] fixed_strides Strides to be used in higher dimensions starting at Y (in bytes).
 384  *
 385  * @return Strides object based on the specified strides. Missing strides are
 386  *         calculated based on the tensor shape and the strides of lower dimensions.
 387  */
 388 template <typename T, typename... Ts>
 389 inline Strides compute_strides(const TensorInfo &info, T stride_x, Ts &&... fixed_strides)
 390 {
 391     const TensorShape &shape = info.tensor_shape();
 392
 393     // Create strides object
 394     Strides strides(stride_x, fixed_strides...);
 395
 396     for(size_t i = 1 + sizeof...(Ts); i < info.num_dimensions(); ++i)
 397     {
 398         strides.set(i, shape[i - 1] * strides[i - 1]);
 399     }
 400
 401     return strides;
 402 }
 403
 404 /** Create a strides object based on the tensor dimensions.
 405  *
 406  * @param[in] info Tensor info object used to compute the strides.
 407  *
 408  * @return Strides object based on element size and tensor shape.
 409  */
 410 template <typename... Ts>
 411 inline Strides compute_strides(const TensorInfo &info)
 412 {
 413     return compute_strides(info, info.element_size());
 414 }
 415 }
 416
 417 #include "arm_compute/core/Helpers.inl"
 418 #endif /*__ARM_COMPUTE_HELPERS_H__ */