inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/roi_pooling_ref.cl

   1 // Copyright (c) 2016-2017 Intel Corporation
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //      http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15
  16 #include "include/include_all.cl"
  17
  18
  19 /****************************************************************************
  20  *                                                                          *
  21  *                               Utility Defines                            *
  22  *                                                                          *
  23  ***************************************************************************/
  24
  25 // Each RoI is described by 5 elements, the first one being unused. This is
  26 // required for the kernel to have the same API as other implmentations.
  27 #define ROI_NUM_ELEMENTS 5
  28
  29 #define SRC_W INPUT0_SIZE_X
  30 #define SRC_H INPUT0_SIZE_Y
  31 #define DST_W POOLED_WIDTH
  32 #define DST_H POOLED_HEIGHT
  33 #define PITCH_ROI_R INPUT1_BATCH_PITCH
  34
  35 #if GROUP_SIZE == 0
  36 #define DST_C INPUT0_FEATURE_NUM
  37 #else
  38 #define DST_C (GROUP_SIZE ? (INPUT0_FEATURE_NUM / GROUP_SIZE / GROUP_SIZE) : INPUT0_FEATURE_NUM)
  39 #endif
  40
  41 // Note: In the non-ROI_OLD case we keep the coordinates in float instead
  42 //       of using UNIT_TYPE, since with FP16 we might actually lose some
  43 //       precision in the coordinates, given a sufficiently large W or H.
  44 #define COORD_T float
  45 #define ACCUM_T float
  46
  47 #define MIN(a,b) ((a) < (b) ? (a) : (b))
  48 #define MAX(a,b) ((a) > (b) ? (a) : (b))
  49 #define CLAMP(v,l,u) MAX((l),MIN((v),(u)))
  50
  51 #if INPUT1_FEATURE_NUM != ROI_NUM_ELEMENTS
  52 #error - unknown ROI_POOLING kernel type
  53 #endif
  54
  55 /****************************************************************************
  56  *                                                                          *
  57  *                                RoI Pooling                               *
  58  *                                                                          *
  59  ***************************************************************************/
  60
  61 KERNEL(roi_pooling_gpu)
  62 (
  63     const __global UNIT_TYPE * src_data,
  64     __global UNIT_TYPE * dst_data,
  65     const __global UNIT_TYPE * src_rois
  66 )
  67 {
  68     const size_t i = get_global_id(0);
  69
  70     const uint x = i % DST_W;
  71     const uint y = i / DST_W % DST_H;
  72     const uint c = i / DST_W / DST_H % DST_C;
  73     const uint r = i / DST_W / DST_H / DST_C % OUTPUT_ROI_NUM;
  74     // const uint b = i / DST_W / DST_H / DST_C / OUTPUT_ROI_NUM; - TODO: support batching correctly
  75     // Note: The rounding of the coordinates is done prior to the mul
  76     //       with SPATIAL_SCALE: It makes sense since the resolution of
  77     //       the pooled data is limited by its dimensions. (Is this clear?)
  78
  79     const __global UNIT_TYPE * roi_ptr = &src_rois[PITCH_ROI_R * r];
  80 #if USE_OLD_SCALE_AND_ROUNDING
  81     const int roi_x  = round(roi_ptr[1] * SPATIAL_SCALE);
  82     const int roi_y  = round(roi_ptr[2] * SPATIAL_SCALE);
  83     const int roi_x1 = round(roi_ptr[3] * SPATIAL_SCALE);
  84     const int roi_y1 = round(roi_ptr[4] * SPATIAL_SCALE);
  85
  86     // The final coordinate is within the ROI and malformed dimensions are treated as 1
  87     const uint roi_w = max(roi_x1 - roi_x, 0) + 1;
  88     const uint roi_h = max(roi_y1 - roi_y, 0) + 1;
  89 #else
  90     const COORD_T roi_x  = (COORD_T)(round(roi_ptr[1]) + 0.f) * SPATIAL_SCALE;
  91     const COORD_T roi_y  = (COORD_T)(round(roi_ptr[2]) + 0.f) * SPATIAL_SCALE;
  92     const COORD_T roi_x1 = (COORD_T)(round(roi_ptr[3]) + 1.f) * SPATIAL_SCALE;
  93     const COORD_T roi_y1 = (COORD_T)(round(roi_ptr[4]) + 1.f) * SPATIAL_SCALE;
  94
  95     // The final coordinate is within the ROI and malformed dimensions are treated as 1
  96     const COORD_T roi_w = max(roi_x1 - roi_x, .1f);
  97     const COORD_T roi_h = max(roi_y1 - roi_y, .1f);
  98 #endif
  99
 100     // Note that when the "after" is rounded rounded up else we get the last cell,
 101     // instead of the cell beyond (For "symmetry").
 102     //
 103     // For ex. with src being a 6 cell row and dest being a 4 cell one:
 104     // >>> [((x + 0) * 6) // 4 for x in [0, 1, 2, 3]]   # "begin" values
 105     // [0, 1, 3, 4]                                     # as expected
 106     // >>> [((x + 1) * 6) // 4 for x in [0, 1, 2, 3]]   # "after" values
 107     // [1, 3, 4 ,6]                                     # [2, 3, 5, 6] expected!
 108 #if USE_OLD_SCALE_AND_ROUNDING
 109     const int dx_begin = ((x + 0) * roi_w) / DST_W;
 110     const int dy_begin = ((y + 0) * roi_h) / DST_H;
 111     const int dx_after = ((x + 1) * roi_w + (DST_W - 1)) / DST_W;
 112     const int dy_after = ((y + 1) * roi_h + (DST_H - 1)) / DST_H;
 113
 114     // clamp in case roi_x or roi_y were unreasonable
 115     const int x_begin = clamp(roi_x + dx_begin, 0, SRC_W);
 116     const int y_begin = clamp(roi_y + dy_begin, 0, SRC_H);
 117     const int x_after = clamp(roi_x + dx_after, 0, SRC_W);
 118     const int y_after = clamp(roi_y + dy_after, 0, SRC_H);
 119 #else
 120     const COORD_T dx_begin = (x + 0) * (COORD_T)(roi_w / DST_W);
 121     const COORD_T dy_begin = (y + 0) * (COORD_T)(roi_h / DST_H);
 122     const COORD_T dx_after = (x + 1) * (COORD_T)(roi_w / DST_W);
 123     const COORD_T dy_after = (y + 1) * (COORD_T)(roi_h / DST_H);
 124
 125     // clamp in case roi_x or roi_y were unreasonable
 126     const int x_begin = CLAMP(floor(roi_x + dx_begin), 0, SRC_W);
 127     const int y_begin = CLAMP(floor(roi_y + dy_begin), 0, SRC_H);
 128     const int x_after = CLAMP(ceil(roi_x + dx_after), 0, SRC_W);
 129     const int y_after = CLAMP(ceil(roi_y + dy_after), 0, SRC_H);
 130 #endif
 131
 132 #if GROUP_SIZE == 0
 133     const uint work_c = c;
 134 #else
 135
 136 #if 0
 137     const COORD_T group_bin_w = (COORD_T)roi_w / DST_W;
 138     const COORD_T group_bin_h = (COORD_T)roi_h / DST_H;
 139
 140     const uint group_x = CLAMP(x * group_bin_w, 0, GROUP_SIZE - 1);
 141     const uint group_y = CLAMP(y * group_bin_h, 0, GROUP_SIZE - 1);
 142 #else
 143     const uint group_x = x;
 144     const uint group_y = y;
 145 #endif
 146
 147     const uint work_c = group_x + GROUP_SIZE * (group_y + GROUP_SIZE * c);
 148 #endif
 149
 150     const __global UNIT_TYPE * data = src_data + INPUT0_OFFSET + INPUT0_FEATURE_PITCH*work_c;
 151
 152     ACCUM_T res = MAX_POOL && x_begin < x_after && y_begin < y_after ? UNIT_VAL_MIN : 0;
 153
 154     for (int yy = y_begin; yy < y_after; ++yy)
 155     for (int xx = x_begin; xx < x_after; ++xx)
 156     {
 157         UNIT_TYPE val = data[xx*INPUT0_X_PITCH + yy*INPUT0_Y_PITCH];
 158
 159         res = MAX_POOL ? MAX(res, (ACCUM_T)val) : res + (ACCUM_T)val;
 160     }
 161
 162     if (!MAX_POOL)
 163     {
 164         //TODO(ruv): again, differs from the standard fixed size area (?)
 165         const COORD_T area = (y_after - y_begin) * (x_after - x_begin);
 166         if (area) res /= area;
 167     }
 168
 169     const uint output_offset = OUTPUT_OFFSET + x*OUTPUT_X_PITCH + y*OUTPUT_Y_PITCH + c*OUTPUT_FEATURE_PITCH + r*OUTPUT_ROI_PITCH;
 170     dst_data[output_offset] = ACTIVATION((UNIT_TYPE)res, NL_M, NL_N);
 171 }