1 // Copyright (c) 2016-2017 Intel Corporation
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
16 #include "include/include_all.cl"
19 #define INIT_VAL CHAR_MIN
26 inline int FUNC(apply_pooling)(int tmp, int in)
35 KERNEL(pooling_gpu_int8_ref)(
36 const __global UNIT_TYPE* input,
37 __global UNIT_TYPE* output)
39 #if OUTPUT_LAYOUT_BFYX || OUTPUT_LAYOUT_BYXF
40 const uint x = (uint)get_global_id(0);
41 const uint y = (uint)get_global_id(1);
42 const uint bf = (uint)get_global_id(2);
43 const uint f = bf % INPUT0_FEATURE_NUM;
44 const uint b = bf / INPUT0_FEATURE_NUM;
46 if (x >= OUTPUT_SIZE_X)
50 #elif OUTPUT_LAYOUT_YXFB
51 const uint x = (uint)get_global_id(1);
52 const uint y = (uint)get_global_id(2);
53 const uint bf = (uint)get_global_id(0);
54 const uint f = bf / INPUT0_BATCH_NUM;
55 const uint b = bf % INPUT0_BATCH_NUM;
58 const int offset_x = (int)x*STRIDE_SIZE_X - PADDING_SIZE_X;
59 const int offset_y = (int)y*STRIDE_SIZE_Y - PADDING_SIZE_Y;
61 int result = INIT_VAL;
64 if (offset_x + POOL_SIZE_X < 0 || offset_x >= INPUT0_SIZE_X ||
65 offset_y + POOL_SIZE_Y < 0 || offset_y >= INPUT0_SIZE_Y)
70 #ifdef DYNAMIC_KERNEL_DIVIDER
71 uint num_elementes = 0;
74 const uint batch_and_feature_offset = GET_DATA_INDEX(INPUT0, b, f, 0, 0);
75 for(uint j = 0; j < POOL_SIZE_Y; j++)
77 int input_offset_y = offset_y + j;
78 bool zero_y = input_offset_y >= INPUT0_SIZE_Y || input_offset_y < 0;
81 for(uint i = 0; i < POOL_SIZE_X; i++)
83 int input_offset_x = offset_x + i;
84 bool zero = input_offset_x >= INPUT0_SIZE_X || input_offset_x < 0;
87 const uint input_idx = batch_and_feature_offset + input_offset_y*INPUT0_Y_PITCH + input_offset_x*INPUT0_X_PITCH;
89 result = FUNC_CALL(apply_pooling)(result, (int)input[input_idx]);
91 #ifdef DYNAMIC_KERNEL_DIVIDER
98 #ifdef DYNAMIC_WITH_PADDING_KERNEL_DIVIDER
99 const int hend = min(offset_y + POOL_SIZE_Y, INPUT0_SIZE_Y + PADDING_SIZE_Y);
100 const int wend = min(offset_x + POOL_SIZE_X, INPUT0_SIZE_X + PADDING_SIZE_X);
101 const uint num_elementes = (hend - offset_y) * (wend - offset_x);
104 uint input_idx = GET_DATA_INDEX(INPUT0, b, f, offset_y, offset_x);
106 for(uint j = 0; j < POOL_SIZE_Y; j++)
108 for(uint i = 0; i < POOL_SIZE_X; i++)
110 result = FUNC_CALL(apply_pooling)(result, (int)input[input_idx]);
111 input_idx += INPUT0_X_PITCH;
113 input_idx += (INPUT0_Y_PITCH - POOL_SIZE_X*INPUT0_X_PITCH);
116 #if defined(DYNAMIC_KERNEL_DIVIDER) || defined(DYNAMIC_WITH_PADDING_KERNEL_DIVIDER)
117 const uint num_elementes = POOL_SIZE_X*POOL_SIZE_Y;
121 #if defined AVG_POOLING
122 #if defined(DYNAMIC_KERNEL_DIVIDER) || defined(DYNAMIC_WITH_PADDING_KERNEL_DIVIDER)
123 result = convert_int(round(((float)result / max(num_elementes, (uint)1)));
125 result = convert_int(round((float)result / (int)(POOL_SIZE_Y * POOL_SIZE_X)));
129 const uint output_pos = GET_DATA_INDEX(OUTPUT, b, f, y, x);
130 output[output_pos] = ACTIVATION(convert_char(result), NL_M ,NL_N);