1 // Copyright (c) 2016-2019 Intel Corporation
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "include/common.cl"
16 #include "include/data_types.cl"
18 // Each RoI is described by 5 elements [batch_id xmin ymin xmax ymax]
19 #define ROI_NUM_ELEMENTS 5
24 #define MIN(a,b) ((a) < (b) ? (a) : (b))
25 #define MAX(a,b) ((a) > (b) ? (a) : (b))
26 #define CLAMP(v,l,u) MAX((l),MIN((v),(u)))
28 KERNEL(roi_pooling_ps_gpu)(const __global INPUT0_TYPE * src_data,
29 __global OUTPUT_TYPE * dst_data,
30 const __global INPUT1_TYPE * src_rois)
32 const size_t i = get_global_id(0);
34 const uint x = i % OUTPUT_SIZE_X;
35 const uint y = i / OUTPUT_SIZE_X % OUTPUT_SIZE_Y;
36 const uint c = i / OUTPUT_SIZE_X / OUTPUT_SIZE_Y % OUTPUT_FEATURE_NUM;
37 const uint r = i / OUTPUT_SIZE_X / OUTPUT_SIZE_Y / OUTPUT_FEATURE_NUM % OUTPUT_ROI_NUM;
39 const __global INPUT1_TYPE* roi_ptr = &src_rois[INPUT1_BATCH_PITCH * r];
40 const int src_batch_idx = (int)(roi_ptr[0]);
44 COORD_T roi_start_w = roi_ptr[1] * SPATIAL_SCALE;
45 COORD_T roi_start_h = roi_ptr[2] * SPATIAL_SCALE;
46 COORD_T roi_end_w = roi_ptr[3] * SPATIAL_SCALE;
47 COORD_T roi_end_h = roi_ptr[4] * SPATIAL_SCALE;
49 COORD_T roi_height = (roi_end_h - roi_start_h);
50 COORD_T roi_width = (roi_end_w - roi_start_w);
54 for (int bin_y = 0; bin_y < SPATIAL_BINS_Y; bin_y++)
56 for (int bin_x = 0; bin_x < SPATIAL_BINS_X; bin_x++)
58 COORD_T box_xmin = roi_start_w + (bin_x + 0) * (roi_width / SPATIAL_BINS_X);
59 COORD_T box_xmax = roi_start_w + (bin_x + 1) * (roi_width / SPATIAL_BINS_X);
60 COORD_T box_ymin = roi_start_h + (bin_y + 0) * (roi_height / SPATIAL_BINS_Y);
61 COORD_T box_ymax = roi_start_h + (bin_y + 1) * (roi_height / SPATIAL_BINS_Y);
63 const uint gc = c + (bin_y*SPATIAL_BINS_X + bin_x)*OUTPUT_FEATURE_NUM;
64 const __global INPUT0_TYPE* data = src_data + INPUT0_OFFSET + src_batch_idx*INPUT0_BATCH_PITCH + INPUT0_FEATURE_PITCH*gc;
65 COORD_T height_scale = POOLED_HEIGHT > 1 ? (box_ymax - box_ymin) * (INPUT0_SIZE_Y - 1) / (POOLED_HEIGHT - 1)
67 COORD_T width_scale = POOLED_WIDTH > 1 ? (box_xmax - box_xmin) * (INPUT0_SIZE_X - 1) / (POOLED_WIDTH - 1)
70 float in_y = POOLED_HEIGHT > 1 ? (y * height_scale + box_ymin * (INPUT0_SIZE_Y - 1))
71 : 0.5f * (box_ymin + box_ymax) * (INPUT0_SIZE_Y - 1);
72 float in_x = POOLED_WIDTH > 1 ? (x * width_scale + box_xmin * (INPUT0_SIZE_X - 1))
73 : 0.5f * (box_xmin + box_xmax) * (INPUT0_SIZE_X - 1);
75 if (!(in_y < 0 || in_y > (COORD_T)(INPUT0_SIZE_Y - 1) ||
76 in_x < 0 || in_x > (COORD_T)(INPUT0_SIZE_X - 1) || src_batch_idx == -1))
78 int top_y_index = (int)(floor(in_y));
79 int bottom_y_index = (int)(min(ceil(in_y), (COORD_T)INPUT0_SIZE_Y - 1));
80 int left_x_index = (int)(floor(in_x));
81 int right_x_index = (int)(min(ceil(in_x), (COORD_T)INPUT0_SIZE_X - 1));
83 ACCUM_T top_left = (ACCUM_T)data[top_y_index*INPUT0_Y_PITCH + left_x_index*INPUT0_X_PITCH];
84 ACCUM_T top_right = (ACCUM_T)data[top_y_index*INPUT0_Y_PITCH + right_x_index*INPUT0_X_PITCH];
85 ACCUM_T bottom_left = (ACCUM_T)data[bottom_y_index*INPUT0_Y_PITCH + left_x_index*INPUT0_X_PITCH];
86 ACCUM_T bottom_right = (ACCUM_T)data[bottom_y_index*INPUT0_Y_PITCH + right_x_index*INPUT0_X_PITCH];
88 ACCUM_T top = top_left + (top_right - top_left) * (in_x - left_x_index);
89 ACCUM_T bottom = bottom_left + (bottom_right - bottom_left) * (in_x - left_x_index);
91 res += top + (bottom - top) * (in_y - top_y_index);
96 res /= (SPATIAL_BINS_Y*SPATIAL_BINS_X);
98 const uint work_c = x + POOLED_WIDTH * (y + POOLED_HEIGHT * c);
99 const __global INPUT0_TYPE* data = src_data + INPUT0_OFFSET + src_batch_idx*INPUT0_BATCH_PITCH + INPUT0_FEATURE_PITCH*work_c;
101 const COORD_T roi_x = (COORD_T)(round(roi_ptr[1]) + 0.f) * SPATIAL_SCALE;
102 const COORD_T roi_y = (COORD_T)(round(roi_ptr[2]) + 0.f) * SPATIAL_SCALE;
103 const COORD_T roi_x1 = (COORD_T)(round(roi_ptr[3]) + 1.f) * SPATIAL_SCALE;
104 const COORD_T roi_y1 = (COORD_T)(round(roi_ptr[4]) + 1.f) * SPATIAL_SCALE;
106 // The final coordinate is within the ROI and malformed dimensions are treated as 1
107 const COORD_T roi_w = max(roi_x1 - roi_x, .1f);
108 const COORD_T roi_h = max(roi_y1 - roi_y, .1f);
110 const COORD_T dx_begin = (x + 0) * (COORD_T)(roi_w / POOLED_WIDTH);
111 const COORD_T dy_begin = (y + 0) * (COORD_T)(roi_h / POOLED_HEIGHT);
112 const COORD_T dx_after = (x + 1) * (COORD_T)(roi_w / POOLED_WIDTH);
113 const COORD_T dy_after = (y + 1) * (COORD_T)(roi_h / POOLED_HEIGHT);
115 // clamp in case roi_x or roi_y were unreasonable
116 const int x_begin = CLAMP(floor(roi_x + dx_begin), 0, INPUT0_SIZE_X);
117 const int y_begin = CLAMP(floor(roi_y + dy_begin), 0, INPUT0_SIZE_Y);
118 const int x_after = CLAMP(ceil(roi_x + dx_after), 0, INPUT0_SIZE_X);
119 const int y_after = CLAMP(ceil(roi_y + dy_after), 0, INPUT0_SIZE_Y);
123 for (int yy = y_begin; yy < y_after; ++yy)
125 for (int xx = x_begin; xx < x_after; ++xx)
127 INPUT0_TYPE val = data[xx*INPUT0_X_PITCH + yy*INPUT0_Y_PITCH];
132 const COORD_T area = (y_after - y_begin) * (x_after - x_begin);
137 #error "Unsupported pooling mode"
139 const uint output_offset = OUTPUT_OFFSET + x*OUTPUT_X_PITCH + y*OUTPUT_Y_PITCH + c*OUTPUT_FEATURE_PITCH + r*OUTPUT_ROI_PITCH;
140 dst_data[output_offset] = ACTIVATION((OUTPUT_TYPE)(res), NL_M, NL_N);