1 // Copyright (c) 2018 Intel Corporation
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
16 #include "include/include_all.cl"
18 KERNEL(pooling_gpu)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output, const __global float* arg_max)
20 #if OUTPUT_LAYOUT_BFYX || OUTPUT_LAYOUT_BYXF
21 const uint x = (uint)get_global_id(0);
22 const uint y = (uint)get_global_id(1);
23 const uint bf = (uint)get_global_id(2);
24 const uint f = bf % INPUT0_FEATURE_NUM;
25 const uint b = bf / INPUT0_FEATURE_NUM;
27 if (x >= INPUT0_SIZE_X)
31 #elif OUTPUT_LAYOUT_YXFB
32 const uint x = (uint)get_global_id(1);
33 const uint y = (uint)get_global_id(2);
34 const uint bf = (uint)get_global_id(0);
35 const uint f = bf / INPUT0_BATCH_NUM;
36 const uint b = bf % INPUT0_BATCH_NUM;
39 const uint input_id = GET_DATA_INDEX(INPUT0, b, f, y, x);
40 const uint arg_max_id = GET_DATA_INDEX(INPUT1, b, f, y, x);
41 const uint pool_idx = convert_uint(arg_max[arg_max_id]);
44 const uint x_output = pool_idx % OUTPUT_SIZE_X;
45 const uint y_output = (pool_idx / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
46 const uint f_output = (pool_idx / OUTPUT_SIZE_X / OUTPUT_SIZE_Y) % OUTPUT_FEATURE_NUM;
47 const uint b_output = pool_idx / OUTPUT_SIZE_X / OUTPUT_SIZE_Y / OUTPUT_FEATURE_NUM;
49 const uint output_pos = GET_DATA_INDEX(OUTPUT, b_output, f_output, y_output, x_output);
50 output[output_pos] += ACTIVATION(input[input_id], NL_M ,NL_N);
52 output[pool_idx] += ACTIVATION(input[input_id], NL_M ,NL_N);