1 // Copyright (c) 2018 Intel Corporation
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "include/include_all.cl"
17 #define META_OFFSET_X 4
18 #define META_OFFSET_Y 5
20 #define SIZE_TAB_PARAMETERS 4
24 int h_source, w_source, f_Size, x_Size, y_Size, offset;
27 __constant struct Parameters parameters [SIZE_TAB_PARAMETERS] =
29 { INPUT2_SIZE_Y, INPUT2_SIZE_X, INPUT2_FEATURE_PITCH, INPUT2_X_PITCH, INPUT2_Y_PITCH, INPUT2_OFFSET },
30 { INPUT3_SIZE_Y, INPUT3_SIZE_X, INPUT3_FEATURE_PITCH, INPUT3_X_PITCH, INPUT3_Y_PITCH, INPUT3_OFFSET },
31 { INPUT4_SIZE_Y, INPUT4_SIZE_X, INPUT4_FEATURE_PITCH, INPUT4_X_PITCH, INPUT4_Y_PITCH, INPUT4_OFFSET },
32 { INPUT5_SIZE_Y, INPUT5_SIZE_X, INPUT5_FEATURE_PITCH, INPUT5_X_PITCH, INPUT5_Y_PITCH, INPUT5_OFFSET }
36 KERNEL(pyramidROIAlign_gpu_ref)(
37 const __global INPUT0_TYPE *boxes,
38 const __global INPUT1_TYPE *image_meta,
39 const __global INPUT2_TYPE *P2,
40 const __global INPUT3_TYPE *P3,
41 const __global INPUT4_TYPE *P4,
42 const __global INPUT5_TYPE *P5,
43 const __global INPUT6_TYPE *dim,
44 __global OUTPUT_TYPE *output)
47 const uint kerNum = (uint) get_global_id(0);
49 const __global float *feature_map_Ptr[SIZE_TAB_PARAMETERS];
52 INPUT1_TYPE img_dim_X = image_meta[GET_DATA_INDEX(INPUT1, 0, 0, 0, META_OFFSET_X)];
53 INPUT1_TYPE img_dim_Y = image_meta[GET_DATA_INDEX(INPUT1, 0, 0, 0, META_OFFSET_Y)];
55 INPUT1_TYPE image_area = img_dim_X * img_dim_Y;
56 INPUT1_TYPE scale = sqrt(image_area) / 224.0;
58 INPUT0_TYPE hU = boxes[GET_DATA_INDEX(INPUT0, 0, 0, kerNum, 2)];
59 INPUT0_TYPE hL = boxes[GET_DATA_INDEX(INPUT0, 0, 0, kerNum, 0)];
60 INPUT0_TYPE h = hU - hL;
61 INPUT0_TYPE wU = boxes[GET_DATA_INDEX(INPUT0, 0, 0, kerNum, 3)];
62 INPUT0_TYPE wL = boxes[GET_DATA_INDEX(INPUT0, 0, 0, kerNum, 1)];
63 INPUT0_TYPE w = wU - wL;
65 int roi_level = (int)round(log2(sqrt(h*w) * scale));
67 // 0 <= roi_level <= 3
68 roi_level = min(3, max(0, 2 + roi_level));
70 feature_map_Ptr[0] = P2;
71 feature_map_Ptr[1] = P3;
72 feature_map_Ptr[2] = P4;
73 feature_map_Ptr[3] = P5;
75 f_Size = parameters[roi_level].f_Size;
77 //calculate cooficients for transformation
78 INPUT0_TYPE y1 = hL * (parameters[roi_level].h_source - 1);
79 INPUT0_TYPE x1 = wL * (parameters[roi_level].w_source - 1);
80 INPUT0_TYPE y2 = hU * (parameters[roi_level].h_source - 1);
81 INPUT0_TYPE x2 = wU * (parameters[roi_level].w_source - 1);
82 INPUT0_TYPE deltaX = (x2 - x1) / (OUTPUT_SIZE_X - 1);
83 INPUT0_TYPE deltaY = (y2 - y1) / (OUTPUT_SIZE_Y - 1);
87 for (int i = 0; i < OUTPUT_SIZE_Y; ++i) //loop by 'y' coordinate
89 int ya = (int)floor(y);
90 int yb = (int)ceil(y);
93 if (yb >= parameters[roi_level].h_source) yb = parameters[roi_level].h_source - 1;
96 if (yb + 2 < parameters[roi_level].h_source) ++yb;
102 for (int j = 0; j < OUTPUT_SIZE_X; ++j) //loop by 'x' coordinate
104 int xa = (int)floor(x);
105 int xb = (int)ceil(x);
107 if (xb >= parameters[roi_level].w_source) xb = parameters[roi_level].w_source - 1;
110 if (xb + 2 < parameters[roi_level].w_source) ++xb;
114 /* BILINEAR TRANSFORMATION
115 (xa,yb,f3)*---------------------------------*(xb,yb,f2)
119 (xa,ya,f0)*---------------------------------*(xb,ya,f1)
121 //cooficients for bilinear transformation
122 INPUT0_TYPE a = yb - y;
123 INPUT0_TYPE b = y - ya;
124 INPUT0_TYPE c = xb - x;
125 INPUT0_TYPE d = x - xa;
127 /*#define GET_DATA_INDEX(prefix, b, f, y, x) \
128 CAT(prefix, _OFFSET) + \
129 (x)*CAT(prefix, _X_PITCH) + \
130 (y)*CAT(prefix, _Y_PITCH) + \
131 (f)*CAT(prefix, _FEATURE_PITCH) + \
132 (b)*CAT(prefix, _BATCH_PITCH)
134 For P2, P3, P4, P5 batch size is always 0 */
136 size_t f0Ind = parameters[roi_level].offset + parameters[roi_level].y_Size * ya + parameters[roi_level].x_Size * xa;
137 size_t f1Ind = parameters[roi_level].offset + parameters[roi_level].y_Size * ya + parameters[roi_level].x_Size * xb;
138 size_t f2Ind = parameters[roi_level].offset + parameters[roi_level].y_Size * yb + parameters[roi_level].x_Size * xb;
139 size_t f3Ind = parameters[roi_level].offset + parameters[roi_level].y_Size * yb + parameters[roi_level].x_Size * xa;
140 size_t ind_out = OUTPUT_OFFSET + i * OUTPUT_Y_PITCH + j * OUTPUT_X_PITCH + kerNum * OUTPUT_BATCH_PITCH;
142 for (int k = 0; k < OUTPUT_FEATURE_NUM; ++k) //transformation for every feature
144 INPUT0_TYPE f0 = feature_map_Ptr[roi_level][k * f_Size + f0Ind];
145 INPUT0_TYPE f1 = feature_map_Ptr[roi_level][k * f_Size + f1Ind];
146 INPUT0_TYPE f2 = feature_map_Ptr[roi_level][k * f_Size + f2Ind];
147 INPUT0_TYPE f3 = feature_map_Ptr[roi_level][k * f_Size + f3Ind];
149 INPUT0_TYPE f03 = f3 * b + f0 * a;
150 INPUT0_TYPE f12 = f2 * b + f1 * a;
151 INPUT0_TYPE f = f03 * c + f12 * d;
153 output[k * OUTPUT_FEATURE_PITCH + ind_out] = f;