inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/pyramid_roi_align_gpu_ref.cl

   1 // Copyright (c) 2018 Intel Corporation
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //      http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15 #include "include/include_all.cl"
  16
  17 #define META_OFFSET_X                   4
  18 #define META_OFFSET_Y                   5
  19
  20 #define SIZE_TAB_PARAMETERS             4
  21
  22 struct Parameters
  23 {
  24     int h_source, w_source, f_Size, x_Size, y_Size, offset;
  25 };
  26
  27 __constant struct Parameters parameters [SIZE_TAB_PARAMETERS] =
  28         {
  29             { INPUT2_SIZE_Y, INPUT2_SIZE_X, INPUT2_FEATURE_PITCH, INPUT2_X_PITCH, INPUT2_Y_PITCH, INPUT2_OFFSET },
  30             { INPUT3_SIZE_Y, INPUT3_SIZE_X, INPUT3_FEATURE_PITCH, INPUT3_X_PITCH, INPUT3_Y_PITCH, INPUT3_OFFSET },
  31             { INPUT4_SIZE_Y, INPUT4_SIZE_X, INPUT4_FEATURE_PITCH, INPUT4_X_PITCH, INPUT4_Y_PITCH, INPUT4_OFFSET },
  32             { INPUT5_SIZE_Y, INPUT5_SIZE_X, INPUT5_FEATURE_PITCH, INPUT5_X_PITCH, INPUT5_Y_PITCH, INPUT5_OFFSET }
  33         };
  34
  35
  36 KERNEL(pyramidROIAlign_gpu_ref)(
  37     const __global INPUT0_TYPE *boxes,
  38     const __global INPUT1_TYPE *image_meta,
  39     const __global INPUT2_TYPE *P2,
  40     const __global INPUT3_TYPE *P3,
  41     const __global INPUT4_TYPE *P4,
  42     const __global INPUT5_TYPE *P5,
  43     const __global INPUT6_TYPE *dim,
  44     __global OUTPUT_TYPE *output)
  45 {
  46     // [CONSTEXPR]:
  47     const uint kerNum = (uint) get_global_id(0);
  48
  49     const __global float *feature_map_Ptr[SIZE_TAB_PARAMETERS];
  50     int f_Size;
  51
  52     INPUT1_TYPE img_dim_X = image_meta[GET_DATA_INDEX(INPUT1, 0, 0, 0, META_OFFSET_X)];
  53     INPUT1_TYPE img_dim_Y = image_meta[GET_DATA_INDEX(INPUT1, 0, 0, 0, META_OFFSET_Y)];
  54
  55     INPUT1_TYPE image_area = img_dim_X * img_dim_Y;
  56     INPUT1_TYPE scale = sqrt(image_area) / 224.0;
  57
  58     INPUT0_TYPE hU = boxes[GET_DATA_INDEX(INPUT0, 0, 0, kerNum, 2)];
  59     INPUT0_TYPE hL = boxes[GET_DATA_INDEX(INPUT0, 0, 0, kerNum, 0)];
  60     INPUT0_TYPE h = hU - hL;
  61     INPUT0_TYPE wU = boxes[GET_DATA_INDEX(INPUT0, 0, 0, kerNum, 3)];
  62     INPUT0_TYPE wL = boxes[GET_DATA_INDEX(INPUT0, 0, 0, kerNum, 1)];
  63     INPUT0_TYPE w = wU - wL;
  64
  65     int roi_level = (int)round(log2(sqrt(h*w) * scale));
  66
  67     // 0 <= roi_level <= 3
  68     roi_level = min(3, max(0, 2 + roi_level));
  69
  70     feature_map_Ptr[0] = P2;
  71     feature_map_Ptr[1] = P3;
  72     feature_map_Ptr[2] = P4;
  73     feature_map_Ptr[3] = P5;
  74
  75     f_Size = parameters[roi_level].f_Size;
  76
  77     //calculate cooficients for transformation
  78     INPUT0_TYPE y1 = hL * (parameters[roi_level].h_source - 1);
  79     INPUT0_TYPE x1 = wL * (parameters[roi_level].w_source - 1);
  80     INPUT0_TYPE y2 = hU * (parameters[roi_level].h_source - 1);
  81     INPUT0_TYPE x2 = wU * (parameters[roi_level].w_source - 1);
  82     INPUT0_TYPE deltaX = (x2 - x1) / (OUTPUT_SIZE_X - 1);
  83     INPUT0_TYPE deltaY = (y2 - y1) / (OUTPUT_SIZE_Y - 1);
  84     INPUT0_TYPE y = y1;
  85
  86    //transformation
  87     for (int i = 0; i < OUTPUT_SIZE_Y; ++i) //loop by 'y' coordinate
  88     {
  89         int ya = (int)floor(y);
  90         int yb = (int)ceil(y);
  91
  92         if (ya < 0) ya = 0;
  93         if (yb >= parameters[roi_level].h_source) yb = parameters[roi_level].h_source - 1;
  94         if (yb - ya == 0)
  95         {
  96             if (yb + 2 < parameters[roi_level].h_source) ++yb;
  97             else --ya;
  98         }
  99
 100         INPUT0_TYPE x = x1;
 101
 102         for (int j = 0; j < OUTPUT_SIZE_X; ++j) //loop by 'x' coordinate
 103         {
 104             int xa = (int)floor(x);
 105             int xb = (int)ceil(x);
 106             if (xa < 0) xa = 0;
 107             if (xb >= parameters[roi_level].w_source) xb = parameters[roi_level].w_source - 1;
 108             if (xb - xa == 0)
 109             {
 110                 if (xb + 2 < parameters[roi_level].w_source) ++xb;
 111                 else --xa;
 112             }
 113
 114     /* BILINEAR TRANSFORMATION
 115          (xa,yb,f3)*---------------------------------*(xb,yb,f2)
 116                    |                                 |
 117                    |          *(x,y)                 |
 118                    |                                 |
 119          (xa,ya,f0)*---------------------------------*(xb,ya,f1)
 120    */
 121             //cooficients for bilinear transformation
 122             INPUT0_TYPE a = yb - y;
 123             INPUT0_TYPE b = y - ya;
 124             INPUT0_TYPE c = xb - x;
 125             INPUT0_TYPE d = x - xa;
 126
 127             /*#define GET_DATA_INDEX(prefix, b, f, y, x)  \
 128                 CAT(prefix, _OFFSET) +                  \
 129                 (x)*CAT(prefix, _X_PITCH) +             \
 130                 (y)*CAT(prefix, _Y_PITCH) +             \
 131                 (f)*CAT(prefix, _FEATURE_PITCH) +       \
 132                 (b)*CAT(prefix, _BATCH_PITCH)
 133
 134             For P2, P3, P4, P5 batch size is always 0 */
 135
 136             size_t f0Ind = parameters[roi_level].offset + parameters[roi_level].y_Size * ya + parameters[roi_level].x_Size * xa;
 137             size_t f1Ind = parameters[roi_level].offset + parameters[roi_level].y_Size * ya + parameters[roi_level].x_Size * xb;
 138             size_t f2Ind = parameters[roi_level].offset + parameters[roi_level].y_Size * yb + parameters[roi_level].x_Size * xb;
 139             size_t f3Ind = parameters[roi_level].offset + parameters[roi_level].y_Size * yb + parameters[roi_level].x_Size * xa;
 140             size_t ind_out = OUTPUT_OFFSET + i * OUTPUT_Y_PITCH + j * OUTPUT_X_PITCH + kerNum * OUTPUT_BATCH_PITCH;
 141
 142             for (int k = 0; k < OUTPUT_FEATURE_NUM; ++k) //transformation for every feature
 143             {
 144                 INPUT0_TYPE f0 = feature_map_Ptr[roi_level][k * f_Size + f0Ind];
 145                 INPUT0_TYPE f1 = feature_map_Ptr[roi_level][k * f_Size + f1Ind];
 146                 INPUT0_TYPE f2 = feature_map_Ptr[roi_level][k * f_Size + f2Ind];
 147                 INPUT0_TYPE f3 = feature_map_Ptr[roi_level][k * f_Size + f3Ind];
 148
 149                 INPUT0_TYPE f03 = f3 * b + f0 * a;
 150                 INPUT0_TYPE f12 = f2 * b + f1 * a;
 151                 INPUT0_TYPE f = f03 * c + f12 * d;
 152
 153                 output[k * OUTPUT_FEATURE_PITCH + ind_out] = f;
 154             }
 155             x += deltaX;
 156         }
 157         y += deltaY;
 158     }
 159 }