// Copyright (c) 2016-2017 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "include/include_all.cl" #if MAX_POOLING #define INIT_VAL CHAR_MIN #elif AVG_POOLING #define INIT_VAL 0 #else #error #endif inline int FUNC(apply_pooling)(int tmp, int in) { #if MAX_POOLING return max(tmp, in); #elif AVG_POOLING return tmp + in; #endif } KERNEL(pooling_gpu_int8_ref)( const __global UNIT_TYPE* input, __global UNIT_TYPE* output) { #if OUTPUT_LAYOUT_BFYX || OUTPUT_LAYOUT_BYXF const uint x = (uint)get_global_id(0); const uint y = (uint)get_global_id(1); const uint bf = (uint)get_global_id(2); const uint f = bf % INPUT0_FEATURE_NUM; const uint b = bf / INPUT0_FEATURE_NUM; if (x >= OUTPUT_SIZE_X) { return; } #elif OUTPUT_LAYOUT_YXFB const uint x = (uint)get_global_id(1); const uint y = (uint)get_global_id(2); const uint bf = (uint)get_global_id(0); const uint f = bf / INPUT0_BATCH_NUM; const uint b = bf % INPUT0_BATCH_NUM; #endif const int offset_x = (int)x*STRIDE_SIZE_X - PADDING_SIZE_X; const int offset_y = (int)y*STRIDE_SIZE_Y - PADDING_SIZE_Y; int result = INIT_VAL; #ifdef CHECK_BOUNDRY if (offset_x + POOL_SIZE_X < 0 || offset_x >= INPUT0_SIZE_X || offset_y + POOL_SIZE_Y < 0 || offset_y >= INPUT0_SIZE_Y) { return; } #ifdef DYNAMIC_KERNEL_DIVIDER uint num_elementes = 0; #endif const uint batch_and_feature_offset = GET_DATA_INDEX(INPUT0, b, f, 0, 0); for(uint j = 0; j < POOL_SIZE_Y; j++) { int input_offset_y = offset_y + j; bool zero_y = input_offset_y >= INPUT0_SIZE_Y || input_offset_y < 0; if(!zero_y) { for(uint i = 0; i < POOL_SIZE_X; i++) { int input_offset_x = offset_x + i; bool zero = input_offset_x >= INPUT0_SIZE_X || input_offset_x < 0; if(!zero) { const uint input_idx = batch_and_feature_offset + input_offset_y*INPUT0_Y_PITCH + input_offset_x*INPUT0_X_PITCH; result = FUNC_CALL(apply_pooling)(result, (int)input[input_idx]); #ifdef DYNAMIC_KERNEL_DIVIDER num_elementes++; #endif } } } } #ifdef DYNAMIC_WITH_PADDING_KERNEL_DIVIDER const int hend = min(offset_y + POOL_SIZE_Y, INPUT0_SIZE_Y + PADDING_SIZE_Y); const int wend = min(offset_x + POOL_SIZE_X, INPUT0_SIZE_X + PADDING_SIZE_X); const uint num_elementes = (hend - offset_y) * (wend - offset_x); #endif #else uint input_idx = GET_DATA_INDEX(INPUT0, b, f, offset_y, offset_x); for(uint j = 0; j < POOL_SIZE_Y; j++) { for(uint i = 0; i < POOL_SIZE_X; i++) { result = FUNC_CALL(apply_pooling)(result, (int)input[input_idx]); input_idx += INPUT0_X_PITCH; } input_idx += (INPUT0_Y_PITCH - POOL_SIZE_X*INPUT0_X_PITCH); } #if defined(DYNAMIC_KERNEL_DIVIDER) || defined(DYNAMIC_WITH_PADDING_KERNEL_DIVIDER) const uint num_elementes = POOL_SIZE_X*POOL_SIZE_Y; #endif #endif #if defined AVG_POOLING #if defined(DYNAMIC_KERNEL_DIVIDER) || defined(DYNAMIC_WITH_PADDING_KERNEL_DIVIDER) result = convert_int(round(((float)result / max(num_elementes, (uint)1))); #else result = convert_int(round((float)result / (int)(POOL_SIZE_Y * POOL_SIZE_X))); #endif #endif const uint output_pos = GET_DATA_INDEX(OUTPUT, b, f, y, x); output[output_pos] = ACTIVATION(convert_char(result), NL_M ,NL_N); } #undef INIT_VAL