inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_yxfb_ref.cl

   1 // Copyright (c) 2016-2017 Intel Corporation
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //      http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15
  16 #include "include/include_all.cl"
  17 #include "include/reshape_dims.cl"
  18
  19 // Required JIT constants:
  20 //  - FP16_SUPPORTED       - [0/1] Value indicating whether device supports FP16 OpenCL extension (cl_khr_fp16).
  21 //  - FP16_UNIT_USED       - [0/1] Value indicating that current kernel should use FP16.
  22 //  - UNIT_TYPE            - Type of unit of input/output/weights/biases.
  23 //  - UNIT_VAL_ZERO        - Literal of current UNIT_TYPE that represents 0.
  24 //  - INPUT_BATCH_NUM      - [int] Number of elements from single spatial and single feature that are grouped in single batch in input.
  25 //  - INPUT_ELEMENTS_COUNT - [int] Cumulative number of elements from input that are processed in single batch.
  26 //  - FILTER_OFM_NUM       - [int] Cumulative number of elements that are outputted in single batch.
  27 //  - RELU                 - [0/1] Indicates that ReLU activation function should be used on output.
  28 //  - NEGATIVE_SLOPE       - [float] Factor for negative output values (required when ReLU is specified).
  29
  30 KERNEL (fully_connected_gpu_yxfn)(
  31     const __global INPUT0_TYPE* input,
  32     __global OUTPUT_TYPE* output,
  33     const __global FILTER_TYPE* weights
  34 #if BIAS_TERM
  35     , const __global BIAS_TYPE* biases
  36 #endif
  37     )
  38 {
  39     const uint x = get_global_id(0);
  40     const uint batch_id = x % INPUT0_BATCH_NUM;
  41     const uint neuronIdx = x / INPUT0_BATCH_NUM;
  42
  43     UNIT_TYPE result = UNIT_VAL_ZERO;
  44
  45     uint weight_offset = neuronIdx * FILTER_OFM_PITCH;
  46     for (uint k = 0; k < INPUT0_FEATURE_NUM; k++)
  47     {
  48         for (uint j = 0; j < INPUT0_SIZE_Y; j++)
  49         {
  50             for(uint i = 0; i < INPUT0_SIZE_X; i++)
  51             {
  52                 uint4 widx = FUNC(reshape_dims)(batch_id, k,j,i, INPUT0_SIZE_Y, INPUT0_SIZE_X, FILTER_SIZE_Y, FILTER_SIZE_X, INPUT0_DIMS, FILTER_DIMS);
  53                 uint weight_idx = weight_offset + widx[1]*FILTER_IFM_PITCH + widx[2]*FILTER_Y_PITCH + widx[3]*FILTER_X_PITCH;
  54                 uint input_idx = INPUT0_OFFSET + k*INPUT0_FEATURE_PITCH + j*INPUT0_Y_PITCH + i*INPUT0_X_PITCH + batch_id*INPUT0_BATCH_PITCH;
  55                 result += input[input_idx] * weights[weight_idx];
  56             }
  57         }
  58     }
  59     const uint output_idx = OUTPUT_OFFSET + batch_id*OUTPUT_BATCH_PITCH + neuronIdx*OUTPUT_FEATURE_PITCH;
  60
  61 #if BIAS_TERM
  62     result += biases[neuronIdx];
  63 #endif
  64     output[output_idx] = ACTIVATION(result, NL_M, NL_N);
  65 }