inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/roi_pooling_ref.cl

   1 // Copyright (c) 2016-2019 Intel Corporation
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //      http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15 #include "include/common.cl"
  16 #include "include/data_types.cl"
  17
  18
  19 /****************************************************************************
  20  *                                                                          *
  21  *                               Utility Defines                            *
  22  *                                                                          *
  23  ***************************************************************************/
  24
  25 // Each RoI is described by 5 elements, the first one being unused. This is
  26 // required for the kernel to have the same API as other implmentations.
  27 #define ROI_NUM_ELEMENTS 5
  28
  29 #define SRC_W INPUT0_SIZE_X
  30 #define SRC_H INPUT0_SIZE_Y
  31 #define DST_W POOLED_WIDTH
  32 #define DST_H POOLED_HEIGHT
  33 #define PITCH_ROI_R INPUT1_BATCH_PITCH
  34
  35 #define DST_C INPUT0_FEATURE_NUM
  36
  37 // Note: In the non-ROI_OLD case we keep the coordinates in float instead
  38 //       of using UNIT_TYPE, since with FP16 we might actually lose some
  39 //       precision in the coordinates, given a sufficiently large W or H.
  40 #define COORD_T float
  41 #define ACCUM_T float
  42
  43 #define MIN(a,b) ((a) < (b) ? (a) : (b))
  44 #define MAX(a,b) ((a) > (b) ? (a) : (b))
  45 #define CLAMP(v,l,u) MAX((l),MIN((v),(u)))
  46
  47 #if INPUT1_FEATURE_NUM != ROI_NUM_ELEMENTS
  48 #error - unknown ROI_POOLING kernel type
  49 #endif
  50
  51 KERNEL(roi_pooling_gpu)
  52 (
  53     const __global INPUT0_TYPE * src_data,
  54     __global OUTPUT_TYPE * dst_data,
  55     const __global INPUT1_TYPE * src_rois
  56 )
  57 {
  58     const size_t i = get_global_id(0);
  59
  60     const uint x = i % DST_W;
  61     const uint y = i / DST_W % DST_H;
  62     const uint c = i / DST_W / DST_H % DST_C;
  63     const uint r = i / DST_W / DST_H / DST_C % OUTPUT_ROI_NUM;
  64     // const uint b = i / DST_W / DST_H / DST_C / OUTPUT_ROI_NUM; - TODO: support batching correctly
  65     // Note: The rounding of the coordinates is done prior to the mul
  66     //       with SPATIAL_SCALE: It makes sense since the resolution of
  67     //       the pooled data is limited by its dimensions. (Is this clear?)
  68
  69     const __global INPUT1_TYPE* roi_ptr = &src_rois[PITCH_ROI_R * r];
  70
  71     const int src_batch_idx = (int)(roi_ptr[0]);
  72
  73 #if BILINEAR_POOLING
  74     const uint output_offset = OUTPUT_OFFSET + x*OUTPUT_X_PITCH + y*OUTPUT_Y_PITCH + c*OUTPUT_FEATURE_PITCH + r*OUTPUT_ROI_PITCH;
  75
  76     COORD_T roi_start_w = roi_ptr[1];
  77     COORD_T roi_start_h = roi_ptr[2];
  78     COORD_T roi_end_w   = roi_ptr[3];
  79     COORD_T roi_end_h   = roi_ptr[4];
  80
  81     COORD_T height_scale = (roi_end_h - roi_start_h) * (SRC_H - 1.0f) / (COORD_T)(POOLED_HEIGHT - 1.0f);
  82     COORD_T width_scale  = (roi_end_w - roi_start_w) * (SRC_W - 1.0f) / (COORD_T)(POOLED_WIDTH  - 1.0f);
  83
  84     COORD_T in_y = y*height_scale + roi_start_h*(COORD_T)(SRC_H - 1.0f);
  85     COORD_T in_x = x*width_scale  + roi_start_w*(COORD_T)(SRC_W - 1.0f);
  86
  87     if (in_y < 0 || in_y > (COORD_T)(SRC_H - 1) || in_x < 0 || in_x > (COORD_T)(SRC_W - 1) || src_batch_idx == -1) {
  88         dst_data[output_offset] = ACTIVATION((OUTPUT_TYPE)0, NL_M, NL_N);
  89         return;
  90     }
  91
  92     int top_y_index    = (int)(floor(in_y));
  93     int bottom_y_index = (int)(min(ceil(in_y), (COORD_T)SRC_H - 1));
  94     int left_x_index   = (int)(floor(in_x));
  95     int right_x_index  = (int)(min(ceil(in_x), (COORD_T)SRC_W - 1));
  96
  97     const __global INPUT0_TYPE* data = src_data + INPUT0_OFFSET + src_batch_idx*INPUT0_BATCH_PITCH + INPUT0_FEATURE_PITCH*c;
  98
  99     ACCUM_T top_left     = (ACCUM_T)data[top_y_index*INPUT0_Y_PITCH + left_x_index*INPUT0_X_PITCH];
 100     ACCUM_T top_right    = (ACCUM_T)data[top_y_index*INPUT0_Y_PITCH + right_x_index*INPUT0_X_PITCH];
 101     ACCUM_T bottom_left  = (ACCUM_T)data[bottom_y_index*INPUT0_Y_PITCH + left_x_index*INPUT0_X_PITCH];
 102     ACCUM_T bottom_right = (ACCUM_T)data[bottom_y_index*INPUT0_Y_PITCH + right_x_index*INPUT0_X_PITCH];
 103
 104     ACCUM_T top    = top_left + (top_right - top_left) * (in_x - left_x_index);
 105     ACCUM_T bottom = bottom_left + (bottom_right - bottom_left) * (in_x - left_x_index);
 106
 107     ACCUM_T res = top + (bottom - top) * (in_y - top_y_index);
 108
 109     dst_data[output_offset] = ACTIVATION((OUTPUT_TYPE)res, NL_M, NL_N);
 110 #else
 111
 112     const int roi_x  = round(roi_ptr[1] * SPATIAL_SCALE);
 113     const int roi_y  = round(roi_ptr[2] * SPATIAL_SCALE);
 114     const int roi_x1 = round(roi_ptr[3] * SPATIAL_SCALE);
 115     const int roi_y1 = round(roi_ptr[4] * SPATIAL_SCALE);
 116
 117     // The final coordinate is within the ROI and malformed dimensions are treated as 1
 118     const uint roi_w = max(roi_x1 - roi_x, 0) + 1;
 119     const uint roi_h = max(roi_y1 - roi_y, 0) + 1;
 120
 121     // Note that when the "after" is rounded rounded up else we get the last cell,
 122     // instead of the cell beyond (For "symmetry").
 123     //
 124     // For ex. with src being a 6 cell row and dest being a 4 cell one:
 125     // >>> [((x + 0) * 6) // 4 for x in [0, 1, 2, 3]]   # "begin" values
 126     // [0, 1, 3, 4]                                     # as expected
 127     // >>> [((x + 1) * 6) // 4 for x in [0, 1, 2, 3]]   # "after" values
 128     // [1, 3, 4 ,6]                                     # [2, 3, 5, 6] expected!
 129     const int dx_begin = ((x + 0) * roi_w) / DST_W;
 130     const int dy_begin = ((y + 0) * roi_h) / DST_H;
 131     const int dx_after = ((x + 1) * roi_w + (DST_W - 1)) / DST_W;
 132     const int dy_after = ((y + 1) * roi_h + (DST_H - 1)) / DST_H;
 133
 134     // clamp in case roi_x or roi_y were unreasonable
 135     const int x_begin = clamp(roi_x + dx_begin, 0, SRC_W);
 136     const int y_begin = clamp(roi_y + dy_begin, 0, SRC_H);
 137     const int x_after = clamp(roi_x + dx_after, 0, SRC_W);
 138     const int y_after = clamp(roi_y + dy_after, 0, SRC_H);
 139
 140     const __global INPUT0_TYPE* data = src_data + INPUT0_OFFSET + src_batch_idx*INPUT0_BATCH_PITCH + INPUT0_FEATURE_PITCH*c;
 141
 142 #if MAX_POOLING
 143     ACCUM_T res = x_begin < x_after && y_begin < y_after ? -FLT_MAX : 0;
 144 #else
 145     ACCUM_T res = 0;
 146 #endif
 147
 148     for (int yy = y_begin; yy < y_after; ++yy)
 149     for (int xx = x_begin; xx < x_after; ++xx)
 150     {
 151         INPUT0_TYPE val = data[xx*INPUT0_X_PITCH + yy*INPUT0_Y_PITCH];
 152 #if MAX_POOLING
 153         res = MAX(res, (ACCUM_T)val);
 154 #else
 155         res = res + (ACCUM_T)val;
 156 #endif
 157     }
 158
 159 #if (!MAX_POOLING)
 160     {
 161         const COORD_T area = (y_after - y_begin) * (x_after - x_begin);
 162         if (area) res /= area;
 163     }
 164 #endif
 165
 166     const uint output_offset = OUTPUT_OFFSET + x*OUTPUT_X_PITCH + y*OUTPUT_Y_PITCH + c*OUTPUT_FEATURE_PITCH + r*OUTPUT_ROI_PITCH;
 167     dst_data[output_offset] = ACTIVATION((OUTPUT_TYPE)res, NL_M, NL_N);
 168 #endif
 169 }