Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / kernel_selector / core / cl_kernels / convolution_gpu_yxfb_ref.cl
1 // Copyright (c) 2016-2017 Intel Corporation
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "include/include_all.cl"
16
17 KERNEL(convolution_gpu_yxfb_ref)(
18     const __global UNIT_TYPE* input,
19     __global UNIT_TYPE* output,
20     const __global UNIT_TYPE* filter,
21 #if BIAS_TERM
22     const __global UNIT_TYPE* bias,
23 #endif
24     uint split_idx)
25 {
26     UNIT_TYPE result = UNIT_VAL_ZERO;
27
28     const uint batch_offset = (uint)get_global_id(0) % INPUT0_BATCH_NUM;
29     const uint ofm_offset   = (uint)get_global_id(0) / INPUT0_BATCH_NUM;
30     const uint out_x        = (uint)get_global_id(1);
31     const uint out_y        = (uint)get_global_id(2);
32
33     const int x = (int)out_x * STRIDE_SIZE_X - PADDING_SIZE_X;
34     const int y = (int)out_y * STRIDE_SIZE_Y - PADDING_SIZE_Y;
35     
36 #if DEPTHWISE_SEPARABLE_OPT
37     const uint in_split_offset = (ofm_offset / FILTER_OFM_NUM) * INPUT0_FEATURE_PITCH * FILTER_IFM_NUM;
38 #else
39     const uint in_split_offset = split_idx * INPUT0_FEATURE_PITCH * FILTER_IFM_NUM;
40 #endif
41     const uint input_offset = INPUT0_OFFSET + batch_offset*INPUT0_BATCH_PITCH + in_split_offset;
42 #if GROUPED && !DEPTHWISE_SEPARABLE_OPT
43     const uint filter_offset = split_idx * FILTER_LENGTH;
44 #else
45     const uint filter_offset = 0;
46 #endif
47
48     for (uint i = 0; i < FILTER_SIZE_Y; i++)
49     {
50         const int input_offset_y = y + i * DILATION_SIZE_Y;
51         const bool zero_y = input_offset_y >= INPUT0_SIZE_Y || input_offset_y < 0;
52
53         if(!zero_y)
54         {
55             for (uint j = 0; j < FILTER_SIZE_X; j++)
56             {
57                 const int input_offset_x = x + j * DILATION_SIZE_X;
58                 const bool zero = input_offset_x >= INPUT0_SIZE_X || input_offset_x < 0;
59
60                 if(!zero)
61                 {
62                     uint input_idx = input_offset + (uint)input_offset_x*INPUT0_X_PITCH + (uint)input_offset_y*INPUT0_Y_PITCH;
63                     uint filter_idx = filter_offset + ofm_offset*FILTER_OFM_PITCH + i*FILTER_Y_PITCH + j*FILTER_X_PITCH;
64
65                     for (uint h = 0; h < FILTER_IFM_NUM; h++)
66                     {
67                         result = fma(input[input_idx], filter[filter_idx], result);
68                         filter_idx += FILTER_IFM_PITCH;
69                         input_idx += INPUT0_FEATURE_PITCH;
70                     }
71                 }
72             }
73         }
74     }
75 #if BIAS_TERM
76 #if GROUPED && !DEPTHWISE_SEPARABLE_OPT
77     const uint bias_offset = split_idx * BIAS_LENGTH;
78 #else
79     const uint bias_offset = 0;
80 #endif
81     result += bias[ofm_offset + bias_offset];
82 #endif
83     const uint out_split_offset = split_idx * OUTPUT_FEATURE_PITCH * FILTER_OFM_NUM;
84     const uint dst_index = batch_offset*OUTPUT_BATCH_PITCH + ofm_offset*OUTPUT_FEATURE_PITCH + out_y*OUTPUT_Y_PITCH + out_x*OUTPUT_X_PITCH + OUTPUT_OFFSET + out_split_offset;
85     output[dst_index] = ACTIVATION(result, NL_M, NL_N);
86 }