src/core/CL/cl_kernels/convolution_layer.cl

   1 /*
   2  * Copyright (c) 2017-2018 ARM Limited.
   3  *
   4  * SPDX-License-Identifier: MIT
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7  * of this software and associated documentation files (the "Software"), to
   8  * deal in the Software without restriction, including without limitation the
   9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10  * sell copies of the Software, and to permit persons to whom the Software is
  11  * furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in all
  14  * copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  */
  24 #include "helpers.h"
  25
  26 #if defined(FIXED_POINT_POSITION)
  27 #include "fixed_point.h"
  28 #endif // FIXED_POINT_POSITION
  29
  30 #if defined(DATA_TYPE)
  31 /** This kernel reshapes the tensor's low three dimensions to single column
  32  *
  33  * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
  34  *
  35  * @param[in]  src_ptr                            Pointer to the source tensor. Supported data types: F16/F32
  36  * @param[in]  src_stride_x                       Stride of the source tensor in X dimension (in bytes)
  37  * @param[in]  src_step_x                         src_stride_x * number of elements along X processed per workitem(in bytes)
  38  * @param[in]  src_stride_y                       Stride of the source tensor in Y dimension (in bytes)
  39  * @param[in]  src_step_y                         src_stride_y * number of elements along Y processed per workitem(in bytes)
  40  * @param[in]  src_stride_z                       Stride of the source tensor in Z dimension (in bytes)
  41  * @param[in]  src_step_z                         src_stride_z * number of elements along Y processed per workitem(in bytes)
  42  * @param[in]  src_offset_first_element_in_bytes  The offset of the first element in the source tensor
  43  * @param[out] dst_ptr                            Pointer to the destination tensor. Same as @p src_ptr
  44  * @param[in]  dst_stride_x                       Stride of the destination tensor in X dimension (in bytes)
  45  * @param[in]  dst_step_x                         dst_stride_x * number of elements along X processed per workitem(in bytes)
  46  * @param[in]  dst_stride_y                       Stride of the destination tensor in Y dimension (in bytes)
  47  * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y processed per workitem(in bytes)
  48  * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination tensor
  49  * @param[in]  bias_ptr                           Pointer to the bias tensor. Same as @p src_ptr
  50  * @param[in]  bias_stride_x                      Stride of the bias tensor in X dimension (in bytes)
  51  * @param[in]  bias_step_x                        bias_stride_x * number of elements along X processed per workitem(in bytes)
  52  * @param[in]  bias_offset_first_element_in_bytes The offset of the first element in the source tensor
  53  * @param[in]  width                              The width of the input tensor
  54  * @param[in]  height                             The height of the input tensor
  55  * @param[in]  depth                              The depth of the input tensor
  56  * @param[in]  total_filters                      Total number of filters. 4th dimension of the weights matrix
  57  */
  58 __kernel void reshape_to_columns(
  59     TENSOR3D_DECLARATION(src),
  60     IMAGE_DECLARATION(dst),
  61 #ifdef HAS_BIAS
  62     VECTOR_DECLARATION(bias),
  63 #endif /* HAS_BIAS */
  64     uint width, uint height, uint depth, uint total_filters)
  65 {
  66     Tensor3D src            = CONVERT_TO_TENSOR3D_STRUCT(src);
  67     bool     is_last_thread = (get_global_id(0) == (get_global_size(0) - 1) && get_global_id(1) == (get_global_size(1) - 1) && get_global_id(2) == (get_global_size(2) - 1));
  68
  69     __global uchar *tmp_src_ptr = src.ptr;
  70     __global uchar *tmp_dst_ptr = dst_ptr + dst_offset_first_element_in_bytes + get_global_id(0) * dst_stride_y + get_global_id(1) * width * dst_stride_y + get_global_id(
  71                                       2) * width * height * dst_stride_y;
  72 #ifdef HAS_BIAS
  73     __global uchar *tmp_bias_ptr = bias_ptr + bias_offset_first_element_in_bytes;
  74 #endif /* HAS_BIAS */
  75
  76     if(is_last_thread)
  77     {
  78         for(uint i = 0; i < total_filters; ++i)
  79         {
  80             *((__global DATA_TYPE *)tmp_dst_ptr) = *((__global DATA_TYPE *)tmp_src_ptr);
  81
  82 #ifdef HAS_BIAS
  83             *((__global DATA_TYPE *)(tmp_dst_ptr + dst_stride_y)) = *((__global DATA_TYPE *)(tmp_bias_ptr));
  84             tmp_bias_ptr += bias_stride_x;
  85 #endif /* HAS_BIAS */
  86             tmp_src_ptr += depth * src_stride_z;
  87             tmp_dst_ptr += dst_stride_x;
  88         }
  89     }
  90     else
  91     {
  92         for(uint i = 0; i < total_filters; ++i)
  93         {
  94             *((__global DATA_TYPE *)tmp_dst_ptr) = *((__global DATA_TYPE *)tmp_src_ptr);
  95             tmp_src_ptr += depth * src_stride_z;
  96             tmp_dst_ptr += dst_stride_x;
  97         }
  98     }
  99 }
 100 #endif // defined(DATA_TYPE)