inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/pooling_gpu_b_fs_yx_fsv4.cl

   1 // Copyright (c) 2019 Intel Corporation
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //      http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15
  16 #include "include/include_all.cl"
  17
  18 #if MAX_POOLING
  19     #define INIT_VAL CHAR_MIN
  20 #elif AVG_POOLING
  21     #define INIT_VAL 0
  22 #else
  23 #error
  24 #endif
  25
  26
  27 inline int FUNC(apply_pooling)(int tmp, int in)
  28 {
  29 #if MAX_POOLING
  30     return max(tmp, in);
  31 #elif AVG_POOLING
  32     return tmp + in;
  33 #endif
  34 }
  35
  36 KERNEL(pooling_gpu_b_fs_yx_fsv4)(
  37     const __global UNIT_TYPE* input,
  38     __global UNIT_TYPE* output)
  39 {
  40     const uint x    = (uint)get_global_id(0);
  41     const uint y    = (uint)get_global_id(1);
  42     const uint bf   = (uint)get_global_id(2);
  43     const uint f    = (bf * 4) % INPUT0_FEATURE_NUM;
  44     const uint b    = (bf * 4) / INPUT0_FEATURE_NUM;
  45
  46     const int offset_x = (int)x*STRIDE_SIZE_X - PADDING_SIZE_X;
  47     const int offset_y = (int)y*STRIDE_SIZE_Y - PADDING_SIZE_Y;
  48
  49     int result[4] = { INIT_VAL, INIT_VAL, INIT_VAL, INIT_VAL };
  50
  51 #ifdef CHECK_BOUNDRY
  52     if (offset_x + POOL_SIZE_X < 0 || offset_x >= INPUT0_SIZE_X ||
  53         offset_y + POOL_SIZE_Y < 0 || offset_y >= INPUT0_SIZE_Y)
  54     {
  55         return;
  56     }
  57
  58 #ifdef DYNAMIC_KERNEL_DIVIDER
  59     uint num_elementes = 0;
  60 #endif
  61
  62     const uint batch_and_feature_offset = GET_DATA_B_FS_YX_FSV4_INDEX(INPUT0, b, f, 0, 0);
  63     for(uint j = 0; j < POOL_SIZE_Y; j++)
  64     {
  65         int input_offset_y = offset_y + j;
  66         bool zero_y = input_offset_y >= INPUT0_SIZE_Y || input_offset_y < 0;
  67         if(!zero_y)
  68         {
  69             for(uint i = 0; i < POOL_SIZE_X; i++)
  70             {
  71                 int input_offset_x = offset_x + i;
  72                 bool zero = input_offset_x >= INPUT0_SIZE_X || input_offset_x < 0;
  73                 if(!zero)
  74                 {
  75                     const uint input_idx = batch_and_feature_offset + input_offset_y*IN_Y_PITCH + input_offset_x*IN_X_PITCH;
  76
  77                     int int_data   = *((const __global int*)(input + input_idx));
  78                     char4 ch4_data = as_char4(int_data);
  79                     result[0] = FUNC_CALL(apply_pooling)(result[0], (int)ch4_data[0]);
  80                     result[1] = FUNC_CALL(apply_pooling)(result[1], (int)ch4_data[1]);
  81                     result[2] = FUNC_CALL(apply_pooling)(result[2], (int)ch4_data[2]);
  82                     result[3] = FUNC_CALL(apply_pooling)(result[3], (int)ch4_data[3]);
  83
  84 #ifdef DYNAMIC_KERNEL_DIVIDER
  85                     num_elementes++;
  86 #endif
  87                 }
  88             }
  89         }
  90     }
  91 #ifdef DYNAMIC_WITH_PADDING_KERNEL_DIVIDER
  92     const int hend = min(offset_y + POOL_SIZE_Y, INPUT0_SIZE_Y + PADDING_SIZE_Y);
  93     const int wend = min(offset_x + POOL_SIZE_X, INPUT0_SIZE_X + PADDING_SIZE_X);
  94     const uint num_elementes = (hend - offset_y) * (wend - offset_x);
  95 #endif
  96 #else // !CHECK_BOUNDRY
  97     uint input_idx = GET_DATA_B_FS_YX_FSV4_INDEX(INPUT0, b, f, offset_y, offset_x);
  98
  99     for(uint j = 0; j < POOL_SIZE_Y; j++)
 100     {
 101         for(uint i = 0; i < POOL_SIZE_X; i++)
 102         {
 103             int int_data   = *((const __global int*)(input + input_idx));
 104             char4 ch4_data = as_char4(int_data);
 105             result[0] = FUNC_CALL(apply_pooling)(result[0], (int)ch4_data[0]);
 106             result[1] = FUNC_CALL(apply_pooling)(result[1], (int)ch4_data[1]);
 107             result[2] = FUNC_CALL(apply_pooling)(result[2], (int)ch4_data[2]);
 108             result[3] = FUNC_CALL(apply_pooling)(result[3], (int)ch4_data[3]);
 109
 110             input_idx += IN_X_PITCH;
 111         }
 112         input_idx += (IN_Y_PITCH - POOL_SIZE_X*IN_X_PITCH);
 113     }
 114
 115 #if defined(DYNAMIC_KERNEL_DIVIDER) || defined(DYNAMIC_WITH_PADDING_KERNEL_DIVIDER)
 116     const uint num_elementes = POOL_SIZE_X*POOL_SIZE_Y;
 117 #endif
 118 #endif
 119
 120 #if defined AVG_POOLING
 121     #if defined(DYNAMIC_KERNEL_DIVIDER) || defined(DYNAMIC_WITH_PADDING_KERNEL_DIVIDER)
 122         for(uint i = 0; i < 4; i++)
 123         {
 124             result[i] = convert_int(round(((float)result[i] / max(num_elementes, (uint)1)));
 125         }
 126     #else
 127         for(uint i = 0; i < 4; i++)
 128         {
 129             result[i] = convert_int(round((float)result[i] / (int)(POOL_SIZE_Y * POOL_SIZE_X)));
 130         }
 131     #endif
 132 #endif
 133
 134     char4 char_res;
 135     for(uint op = 0; op < 4; op++)
 136     {
 137         char_res[op] = ACTIVATION(convert_char(result[op]), NL_M ,NL_N);
 138     }
 139     const uint output_pos = GET_DATA_B_FS_YX_FSV4_INDEX(OUTPUT, b, f, y, x);
 140     *((__global int*)(output + output_pos)) = as_int(char_res);
 141 }
 142
 143 #undef INIT_VAL