inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_b8_f8.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include "fully_connected_kernel_fb_io_b8_f8.h"
  18
  19 namespace kernel_selector
  20 {
  21     ParamsKey FullyConnected_fb_io_b8_f8::GetSupportedKey() const
  22     {
  23         ParamsKey k;
  24         k.EnableInputDataType(Datatype::F32);
  25         k.EnableInputDataType(Datatype::F16);
  26         k.EnableOutputDataType(Datatype::F32);
  27         k.EnableOutputDataType(Datatype::F16);
  28         k.EnableInputWeightsType(WeightsType::F32);
  29         k.EnableInputWeightsType(WeightsType::F16);
  30         k.EnableAllInputLayout();
  31         k.EnableOutputLayout(DataLayout::fb);
  32         k.EnableBatching();
  33         k.EnableBiasPerFeature();
  34         k.EnableNonBiasTerm();
  35         k.EnableSubGroup();
  36         return k;
  37     }
  38
  39     FullyConnected_fb_io_b8_f8::DispatchData FullyConnected_fb_io_b8_f8::SetDefault(const fully_connected_params& arg, int ) const
  40     {
  41         auto kd = FullyConnectedBlockKernelBase::SetDefault(arg);
  42
  43         const auto& output = arg.output;
  44
  45         size_t groups_per_batches = GetLocalGroupsSize(arg);
  46         kd.gws0 = Align(output.LogicalSize() / (GetNeuronsPerWorkItem(arg) * GetBatchesPerWorkItem(arg) * groups_per_batches), 8);
  47         kd.gws1 = groups_per_batches;
  48         kd.lws0 = 8;
  49         kd.lws1 = 1;
  50
  51         return kd;
  52     }
  53
  54     bool FullyConnected_fb_io_b8_f8::Validate(const Params& p, const optional_params& o) const
  55     {
  56         if (!FullyConnectedBlockKernelBase::Validate(p, o))
  57         {
  58             return false;
  59         }
  60
  61         const auto& params = static_cast<const fully_connected_params&>(p);
  62
  63         const auto& output = params.output;
  64         const auto batches = output.Batch().v;
  65         const auto x_size = output.LogicalSize() / batches;
  66
  67         const auto& input = params.inputs[0];
  68         const auto input_x_size = input.LogicalSize() / input.Batch().v;
  69         const bool proper_input_aligment = (input_x_size % 8) == 0;
  70         const bool proper_output_aligment = (output.LogicalSize() / (GetNeuronsPerWorkItem(params) * GetBatchesPerWorkItem(params) * GetLocalGroupsSize(params)) % 8) == 0;
  71         const bool bSupportedBatch = (batches % 8) == 0;
  72         const bool bSupportedFeature = (x_size % 8) == 0;
  73
  74         if (!bSupportedBatch ||
  75             !bSupportedFeature ||
  76             !proper_input_aligment ||
  77             !proper_output_aligment)
  78         {
  79             return false;
  80         }
  81
  82         return true;
  83     }
  84
  85     KernelsData FullyConnected_fb_io_b8_f8::GetKernelsData(const Params& params, const optional_params& optParams) const
  86     {
  87         assert(params.GetType() == KernelType::FULLY_CONNECTED);
  88         KernelsData res = {};
  89         const auto& orgParams = static_cast<const fully_connected_params&>(params);
  90
  91         float estimated_time =
  92             orgParams.inputs[0].GetDType() == Datatype::F16 && orgParams.output.Batch().v >= 16 ?
  93             FORCE_PRIORITY_3 : FORCE_PRIORITY_5;
  94
  95         for (size_t i = 0; i < autoTuneOptions.size(); i++)
  96         {
  97             KernelsData kd = GetTunedKernelsDataByIndex(params, optParams, DataLayout::fb, { WeightsLayout::io }, estimated_time, (int)i);
  98             if (!kd.empty())
  99             {
 100                 res.emplace_back(kd[0]);
 101             }
 102         }
 103
 104         return res;
 105     }
 106 }