inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_fb_io_b8_f8.cl

   1 // Copyright (c) 2016-2017 Intel Corporation
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //      http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15
  16 #include "include/include_all.cl"
  17 #include "include/sub_group.cl"
  18
  19 __attribute__((reqd_work_group_size(8, 1, 1)))
  20 KERNEL (fully_connected_gpu_xb_xb_b8_x8)(
  21     const __global float* input,
  22     __global float* output,
  23     const __global float* weight
  24 #if BIAS_TERM
  25     , __global UNIT_TYPE* bias)
  26 #else
  27     )
  28 #endif
  29 {
  30     const uint global_id = get_global_id(0);
  31     const int x = get_global_id(0);
  32     const uint batch_id = x % INPUT0_BATCH_NUM;
  33
  34     uint neuronIdx = (x / INPUT0_BATCH_NUM) * NEURONS_PER_WORK_ITEM;
  35
  36     const uint sub_group_id = get_local_id(0);
  37     const uint batch_num = INPUT0_BATCH_NUM;
  38
  39     const int out_id = (global_id / batch_num) * NEURONS_PER_WORK_ITEM * batch_num + batch_id;
  40
  41     const int ofm_offset = (global_id * NEURONS_PER_WORK_ITEM) / batch_num;
  42
  43     float8 _data0 = 0.f;
  44 #if NEURONS_PER_WORK_ITEM > 8
  45     float8 _data1 = 0.f;
  46 #endif
  47
  48     uint weight_offset = sub_group_id + neuronIdx;
  49
  50     for(uint h = 0; h < INPUT0_ELEMENTS_COUNT; h++)
  51     {
  52         DOT_PRODUCT_8(_data0, input[h * batch_num + batch_id], weight[weight_offset])
  53 #if NEURONS_PER_WORK_ITEM > 8
  54         DOT_PRODUCT_8(_data1, input[h * batch_num + batch_id], weight[weight_offset + 8])
  55 #endif
  56         weight_offset+= FILTER_OFM_NUM;
  57     }
  58
  59 #if BIAS_TERM
  60     ADD_BIAS_8(_data0, bias[neuronIdx + sub_group_id]);
  61 #if NEURONS_PER_WORK_ITEM > 8
  62     ADD_BIAS_8(_data1, bias[neuronIdx + sub_group_id + 8]);
  63 #endif
  64 #endif
  65     _data0 = ACTIVATION(_data0, NL_M, NL_N);
  66 #if NEURONS_PER_WORK_ITEM > 8
  67     _data1 = ACTIVATION(_data1, NL_M, NL_N);
  68 #endif
  69
  70     intel_sub_group_block_write8((__global uint*)output + out_id, as_uint8(_data0));
  71 #if NEURONS_PER_WORK_ITEM > 8
  72     intel_sub_group_block_write8((__global uint*)output + out_id + 8 * batch_num, as_uint8(_data1));
  73 #endif
  74 }