inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include "convolution_kernel_yxfb_yxio_b8.h"
  18
  19 namespace kernel_selector
  20 {
  21
  22     ParamsKey ConvolutionKernel_yxfb_yxio_b8::GetSupportedKey() const
  23     {
  24         ParamsKey k;
  25         k.EnableInputDataType(Datatype::F32);
  26         k.EnableInputWeightsType(WeightsType::F16);
  27         k.EnableInputWeightsType(WeightsType::F32);
  28         k.EnableOutputDataType(Datatype::F32);
  29         k.EnableInputLayout(DataLayout::yxfb);
  30         k.EnableOutputLayout(DataLayout::yxfb);
  31         k.EnableTensorOffset();
  32         k.EnableTensorPitches();
  33         k.EnableBiasPerFeature();
  34         k.EnableNonBiasTerm();
  35         k.EnableBatching();
  36         k.EnableSplitSupport();
  37         k.EnableDilation();
  38         k.EnableSubGroup();
  39         return k;
  40     }
  41
  42     namespace {
  43         size_t GetOfmPerWorkitem(size_t filterOfmNum, size_t batchSize, size_t local_work_size)
  44         {
  45             if (((filterOfmNum * batchSize) / 16) % local_work_size)
  46             {
  47                 return 8;
  48             }
  49             else
  50             {
  51                 return 16;
  52             }
  53         }
  54     }
  55
  56     ConvolutionKernelBase::DispatchData ConvolutionKernel_yxfb_yxio_b8::SetDefault(const convolution_params& arg, int autoTuneIndex) const
  57     {
  58         DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg, autoTuneIndex);
  59
  60         const auto filterOfmNum = arg.weights.OFM().v;
  61         const auto batchSize = arg.output.Batch().v;
  62
  63         runInfo.lws0 = batchSize == 8 ? 8 : 16;
  64         runInfo.lws1 = 1;
  65         runInfo.lws2 = 1;
  66
  67         size_t ofmPerWorkItem = GetOfmPerWorkitem(filterOfmNum, batchSize, runInfo.lws0);
  68
  69         runInfo.gws0 = filterOfmNum * batchSize / ofmPerWorkItem;
  70
  71         runInfo.effiency = FORCE_PRIORITY_9;
  72
  73         return runInfo;
  74     }
  75
  76     bool ConvolutionKernel_yxfb_yxio_b8::Validate(const Params& p, const optional_params& o) const
  77     {
  78         if (!ConvolutionKernelBase::Validate(p, o))
  79         {
  80             return false;
  81         }
  82
  83         const convolution_params& params = static_cast<const convolution_params&>(p);
  84
  85         if (!CheckPitchForSplitOnly(params))
  86         {
  87             return false;
  88         }
  89
  90         const auto filterOfmNum = params.weights.OFM().v;
  91         const auto batchSize = params.output.Batch().v;
  92
  93         const bool bInputValidated =
  94             (filterOfmNum > 0) &&
  95             (batchSize > 0) &&
  96             (params.output.Feature().v == filterOfmNum);
  97
  98         if (!bInputValidated)
  99         {
 100             return false;
 101         }
 102
 103         const uint32_t lws0 = batchSize == 8 ? 8 : 16;
 104
 105         if ((filterOfmNum * batchSize) % lws0 != 0 ||
 106             batchSize > 16 || batchSize == 1)
 107         {
 108             return false;
 109         }
 110
 111         if (params.output.PitchesDifferFromLogicalDims())
 112             return false;
 113
 114         return true;
 115     }
 116
 117     JitConstants ConvolutionKernel_yxfb_yxio_b8::GetJitConstants(const convolution_params& params, const DispatchData& kd) const
 118     {
 119         JitConstants jits = ConvolutionKernelBase::GetJitConstants(params, kd);
 120
 121         size_t ofmPerWorkItem = GetOfmPerWorkitem(params.weights.OFM().v, params.output.Batch().v, kd.lws0);
 122
 123         jits.AddConstant(MakeJitConstant("OFM_PER_WORK_ITEM", ofmPerWorkItem));
 124         jits.AddConstant(MakeJitConstant("LOCAL_WORK_GROUP_SIZE", kd.lws0));
 125
 126         return jits;
 127     }
 128
 129     KernelsData ConvolutionKernel_yxfb_yxio_b8::GetKernelsData(const Params& params, const optional_params& options) const
 130     {
 131         return GetTunedKernelsDataByIndex(params, options);
 132     }
 133 }