inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_b8_f8.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include "fully_connected_kernel_fb_io_b8_f8.h"
  18 #include "kernel_selector_utils.h"
  19
  20 namespace kernel_selector
  21 {
  22     ParamsKey FullyConnected_fb_io_b8_f8::GetSupportedKey() const
  23     {
  24         ParamsKey k;
  25         k.EnableInputDataType(Datatype::F32);
  26         k.EnableInputDataType(Datatype::F16);
  27         k.EnableOutputDataType(Datatype::F32);
  28         k.EnableOutputDataType(Datatype::F16);
  29         k.EnableInputWeightsType(WeightsType::F32);
  30         k.EnableInputWeightsType(WeightsType::F16);
  31         k.EnableAllInputLayout();
  32         k.EnableOutputLayout(DataLayout::fb);
  33         k.EnableBatching();
  34         k.EnableBiasPerFeature();
  35         k.EnableNonBiasTerm();
  36         k.EnableSubGroup();
  37         return k;
  38     }
  39
  40     std::unique_ptr<FullyConnected_fb_io_b8_f8::DispatchData> FullyConnected_fb_io_b8_f8::SetDefault(const fully_connected_params& arg) const
  41     {
  42         auto kd = FullyConnectedBlockKernelBase::SetDefault(arg);
  43
  44         const auto& output = arg.output;
  45
  46         size_t groups_per_batches = GetLocalGroupsSize(arg);
  47         kd->gws0 = output.LogicalSize() / (GetNeuronsPerWorkItem(arg) * GetBatchesPerWorkItem(arg) * groups_per_batches);
  48         kd->gws1 = groups_per_batches;
  49         kd->lws0 = 8;
  50         kd->lws1 = 1;
  51
  52         return std::move(kd);
  53     }
  54
  55     bool FullyConnected_fb_io_b8_f8::Validate(const Params& p, const optional_params& o) const
  56     {
  57         if (!FullyConnectedBlockKernelBase::Validate(p, o))
  58         {
  59             return false;
  60         }
  61
  62         const auto& params = static_cast<const fully_connected_params&>(p);
  63
  64         const auto& output = params.output;
  65         const auto batches = output.Batch().v;
  66         const auto x_size = output.LogicalSize() / batches;
  67
  68         const bool bSupportedBatch = (batches % 8) == 0;
  69         const bool bSupportedFeature = (x_size % 8) == 0;
  70
  71         if (!bSupportedBatch ||
  72             !bSupportedFeature)
  73         {
  74             return false;
  75         }
  76
  77         return true;
  78     }
  79
  80     KernelsData FullyConnected_fb_io_b8_f8::GetKernelsData(const Params& params, const optional_params& optParams) const
  81     {
  82         assert(params.GetType() == KernelType::FULLY_CONNECTED);
  83
  84         const auto& orgParams = static_cast<const fully_connected_params&>(params);
  85
  86         float estimated_time =
  87             orgParams.inputs[0].GetDType() == Datatype::F16 && orgParams.output.Batch().v >= 16 ?
  88             FORCE_PRIORITY_3 : FORCE_PRIORITY_5;
  89
  90         return GetCommonKernelsData(params, optParams, DataLayout::fb, { WeightsLayout::io }, estimated_time);
  91     }
  92 }