Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / kernel_selector / core / cl_kernels / fully_connected_gpu_yxfb_ref.cl
1 // Copyright (c) 2016-2017 Intel Corporation
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15
16 #include "include/include_all.cl"
17 #include "include/reshape_dims.cl"
18
19 // Required JIT constants:
20 //  - FP16_SUPPORTED       - [0/1] Value indicating whether device supports FP16 OpenCL extension (cl_khr_fp16).
21 //  - FP16_UNIT_USED       - [0/1] Value indicating that current kernel should use FP16.
22 //  - UNIT_TYPE            - Type of unit of input/output/weights/biases.
23 //  - UNIT_VAL_ZERO        - Literal of current UNIT_TYPE that represents 0.
24 //  - INPUT_BATCH_NUM      - [int] Number of elements from single spatial and single feature that are grouped in single batch in input.
25 //  - INPUT_ELEMENTS_COUNT - [int] Cumulative number of elements from input that are processed in single batch.
26 //  - FILTER_OFM_NUM       - [int] Cumulative number of elements that are outputted in single batch.
27 //  - RELU                 - [0/1] Indicates that ReLU activation function should be used on output.
28 //  - NEGATIVE_SLOPE       - [float] Factor for negative output values (required when ReLU is specified).
29
30 KERNEL (fully_connected_gpu_yxfn)(
31     const __global INPUT0_TYPE* input,
32     __global OUTPUT_TYPE* output,
33     const __global FILTER_TYPE* weights
34 #if BIAS_TERM
35     , const __global BIAS_TYPE* biases
36 #endif
37     )
38 {
39     const uint x = get_global_id(0);
40     const uint batch_id = x % INPUT0_BATCH_NUM;
41     const uint neuronIdx = x / INPUT0_BATCH_NUM;
42
43     UNIT_TYPE result = UNIT_VAL_ZERO;
44
45     uint weight_offset = neuronIdx * FILTER_OFM_PITCH;
46     for (uint k = 0; k < INPUT0_FEATURE_NUM; k++)
47     {
48         for (uint j = 0; j < INPUT0_SIZE_Y; j++)
49         {
50             for(uint i = 0; i < INPUT0_SIZE_X; i++)
51             {
52                 uint4 widx = FUNC(reshape_dims)(batch_id, k,j,i, INPUT0_SIZE_Y, INPUT0_SIZE_X, FILTER_SIZE_Y, FILTER_SIZE_X, INPUT0_DIMS, FILTER_DIMS);
53                 uint weight_idx = weight_offset + widx[1]*FILTER_IFM_PITCH + widx[2]*FILTER_Y_PITCH + widx[3]*FILTER_X_PITCH;
54                 uint input_idx = INPUT0_OFFSET + k*INPUT0_FEATURE_PITCH + j*INPUT0_Y_PITCH + i*INPUT0_X_PITCH + batch_id*INPUT0_BATCH_PITCH;
55                 result += input[input_idx] * weights[weight_idx];
56             }
57         }
58     }
59     const uint output_idx = OUTPUT_OFFSET + batch_id*OUTPUT_BATCH_PITCH + neuronIdx*OUTPUT_FEATURE_PITCH;
60
61 #if BIAS_TERM
62     result += biases[neuronIdx];
63 #endif
64     output[output_idx] = ACTIVATION(result, NL_M, NL_N);
65 }