Publishing 2019 R1 content

[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / kernel_selector / core / cl_kernels / roi_pooling_ref.cl
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/roi_pooling_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/roi_pooling_ref.cl

index 0c006bc..2006d57 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/roi_pooling_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/roi_pooling_ref.cl
@@ -1,4 +1,4 @@
-// Copyright (c) 2016-2018 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -32,11 +32,7 @@
  #define DST_H POOLED_HEIGHT
  #define PITCH_ROI_R INPUT1_BATCH_PITCH
  
-#if GROUP_SIZE == 0
  #define DST_C INPUT0_FEATURE_NUM
-#else
-#define DST_C (GROUP_SIZE ? (INPUT0_FEATURE_NUM / GROUP_SIZE / GROUP_SIZE) : INPUT0_FEATURE_NUM)
-#endif
  
  // Note: In the non-ROI_OLD case we keep the coordinates in float instead
  //       of using UNIT_TYPE, since with FP16 we might actually lose some
@@ -52,12 +48,6 @@
  #error - unknown ROI_POOLING kernel type
  #endif
  
-/****************************************************************************
- *                                                                          *
- *                                RoI Pooling                               *
- *                                                                          *
- ***************************************************************************/
-
  KERNEL(roi_pooling_gpu)
  (
      const __global INPUT0_TYPE * src_data,
@@ -76,7 +66,9 @@ KERNEL(roi_pooling_gpu)
      //       with SPATIAL_SCALE: It makes sense since the resolution of
      //       the pooled data is limited by its dimensions. (Is this clear?)
  
-    const __global INPUT1_TYPE * roi_ptr = &src_rois[PITCH_ROI_R * r];
+    const __global INPUT1_TYPE* roi_ptr = &src_rois[PITCH_ROI_R * r];
+
+    const int src_batch_idx = (int)(roi_ptr[0]);
  
  #if BILINEAR_POOLING
      const uint output_offset = OUTPUT_OFFSET + x*OUTPUT_X_PITCH + y*OUTPUT_Y_PITCH + c*OUTPUT_FEATURE_PITCH + r*OUTPUT_ROI_PITCH;
@@ -86,13 +78,13 @@ KERNEL(roi_pooling_gpu)
      COORD_T roi_end_w   = roi_ptr[3];
      COORD_T roi_end_h   = roi_ptr[4];
  
-    COORD_T height_scale = (roi_end_h - roi_start_h) * (SRC_H - 1) / (COORD_T)(POOLED_HEIGHT - 1);
-    COORD_T width_scale  = (roi_end_w - roi_start_w) * (SRC_W - 1) / (COORD_T)(POOLED_WIDTH  - 1);
+    COORD_T height_scale = (roi_end_h - roi_start_h) * (SRC_H - 1.0f) / (COORD_T)(POOLED_HEIGHT - 1.0f);
+    COORD_T width_scale  = (roi_end_w - roi_start_w) * (SRC_W - 1.0f) / (COORD_T)(POOLED_WIDTH  - 1.0f);
  
-    COORD_T in_y = y*height_scale + roi_start_h*(COORD_T)(SRC_H - 1);
-    COORD_T in_x = x*width_scale  + roi_start_w*(COORD_T)(SRC_W - 1);
+    COORD_T in_y = y*height_scale + roi_start_h*(COORD_T)(SRC_H - 1.0f);
+    COORD_T in_x = x*width_scale  + roi_start_w*(COORD_T)(SRC_W - 1.0f);
  
-    if (in_y < 0 || in_y > (COORD_T)(SRC_H - 1) || in_x < 0 || in_x > (COORD_T)(SRC_W - 1) || roi_ptr[0] == -1) {
+    if (in_y < 0 || in_y > (COORD_T)(SRC_H - 1) || in_x < 0 || in_x > (COORD_T)(SRC_W - 1) || src_batch_idx == -1) {
          dst_data[output_offset] = ACTIVATION((OUTPUT_TYPE)0, NL_M, NL_N);
          return;
      }
@@ -102,7 +94,7 @@ KERNEL(roi_pooling_gpu)
      int left_x_index   = (int)(floor(in_x));
      int right_x_index  = (int)(min(ceil(in_x), (COORD_T)SRC_W - 1));
  
-    const __global INPUT0_TYPE* data = src_data + INPUT0_OFFSET + INPUT0_FEATURE_PITCH*c;
+    const __global INPUT0_TYPE* data = src_data + INPUT0_OFFSET + src_batch_idx*INPUT0_BATCH_PITCH + INPUT0_FEATURE_PITCH*c;
  
      ACCUM_T top_left     = (ACCUM_T)data[top_y_index*INPUT0_Y_PITCH + left_x_index*INPUT0_X_PITCH];
      ACCUM_T top_right    = (ACCUM_T)data[top_y_index*INPUT0_Y_PITCH + right_x_index*INPUT0_X_PITCH];
@@ -117,7 +109,6 @@ KERNEL(roi_pooling_gpu)
      dst_data[output_offset] = ACTIVATION((OUTPUT_TYPE)res, NL_M, NL_N);
  #else
  
-#if USE_OLD_SCALE_AND_ROUNDING
      const int roi_x  = round(roi_ptr[1] * SPATIAL_SCALE);
      const int roi_y  = round(roi_ptr[2] * SPATIAL_SCALE);
      const int roi_x1 = round(roi_ptr[3] * SPATIAL_SCALE);
@@ -126,16 +117,6 @@ KERNEL(roi_pooling_gpu)
      // The final coordinate is within the ROI and malformed dimensions are treated as 1
      const uint roi_w = max(roi_x1 - roi_x, 0) + 1;
      const uint roi_h = max(roi_y1 - roi_y, 0) + 1;
-#else
-    const COORD_T roi_x  = (COORD_T)(round(roi_ptr[1]) + 0.f) * SPATIAL_SCALE;
-    const COORD_T roi_y  = (COORD_T)(round(roi_ptr[2]) + 0.f) * SPATIAL_SCALE;
-    const COORD_T roi_x1 = (COORD_T)(round(roi_ptr[3]) + 1.f) * SPATIAL_SCALE;
-    const COORD_T roi_y1 = (COORD_T)(round(roi_ptr[4]) + 1.f) * SPATIAL_SCALE;
-
-    // The final coordinate is within the ROI and malformed dimensions are treated as 1
-    const COORD_T roi_w = max(roi_x1 - roi_x, .1f);
-    const COORD_T roi_h = max(roi_y1 - roi_y, .1f);
-#endif
  
      // Note that when the "after" is rounded rounded up else we get the last cell,
      // instead of the cell beyond (For "symmetry").
@@ -145,7 +126,6 @@ KERNEL(roi_pooling_gpu)
      // [0, 1, 3, 4]                                     # as expected
      // >>> [((x + 1) * 6) // 4 for x in [0, 1, 2, 3]]   # "after" values
      // [1, 3, 4 ,6]                                     # [2, 3, 5, 6] expected!
-#if USE_OLD_SCALE_AND_ROUNDING
      const int dx_begin = ((x + 0) * roi_w) / DST_W;
      const int dy_begin = ((y + 0) * roi_h) / DST_H;
      const int dx_after = ((x + 1) * roi_w + (DST_W - 1)) / DST_W;
@@ -156,38 +136,8 @@ KERNEL(roi_pooling_gpu)
      const int y_begin = clamp(roi_y + dy_begin, 0, SRC_H);
      const int x_after = clamp(roi_x + dx_after, 0, SRC_W);
      const int y_after = clamp(roi_y + dy_after, 0, SRC_H);
-#else
-    const COORD_T dx_begin = (x + 0) * (COORD_T)(roi_w / DST_W);
-    const COORD_T dy_begin = (y + 0) * (COORD_T)(roi_h / DST_H);
-    const COORD_T dx_after = (x + 1) * (COORD_T)(roi_w / DST_W);
-    const COORD_T dy_after = (y + 1) * (COORD_T)(roi_h / DST_H);
-
-    // clamp in case roi_x or roi_y were unreasonable
-    const int x_begin = CLAMP(floor(roi_x + dx_begin), 0, SRC_W);
-    const int y_begin = CLAMP(floor(roi_y + dy_begin), 0, SRC_H);
-    const int x_after = CLAMP(ceil(roi_x + dx_after), 0, SRC_W);
-    const int y_after = CLAMP(ceil(roi_y + dy_after), 0, SRC_H);
-#endif
-
-#if GROUP_SIZE == 0
-    const uint work_c = c;
-#else
-
-#if 0
-    const COORD_T group_bin_w = (COORD_T)roi_w / DST_W;
-    const COORD_T group_bin_h = (COORD_T)roi_h / DST_H;
-
-    const uint group_x = CLAMP(x * group_bin_w, 0, GROUP_SIZE - 1);
-    const uint group_y = CLAMP(y * group_bin_h, 0, GROUP_SIZE - 1);
-#else
-    const uint group_x = x;
-    const uint group_y = y;
-#endif
-
-    const uint work_c = group_x + GROUP_SIZE * (group_y + GROUP_SIZE * c);
-#endif
  
-    const __global INPUT0_TYPE* data = src_data + INPUT0_OFFSET + INPUT0_FEATURE_PITCH*work_c;
+    const __global INPUT0_TYPE* data = src_data + INPUT0_OFFSET + src_batch_idx*INPUT0_BATCH_PITCH + INPUT0_FEATURE_PITCH*c;
  
  #if MAX_POOLING
      ACCUM_T res = x_begin < x_after && y_begin < y_after ? -FLT_MAX : 0;
@@ -208,7 +158,6 @@ KERNEL(roi_pooling_gpu)
  
  #if (!MAX_POOLING)
      {
-        //TODO(ruv): again, differs from the standard fixed size area (?)
          const COORD_T area = (y_after - y_begin) * (x_after - x_begin);
          if (area) res /= area;
      }