inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_1x1_gemm_MMAD.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include "convolution_kernel_1x1_gemm_MMAD.h"
  18
  19 namespace kernel_selector {
  20
  21     ParamsKey ConvolutionKernel_1x1_gemm_MMAD::GetSupportedKey() const
  22     {
  23         ParamsKey k;
  24         k.EnableInputDataType(Datatype::INT8);
  25         k.EnableOutputDataType(Datatype::INT8);
  26         k.EnableInputWeightsType(WeightsType::INT8);
  27         k.EnableInputLayout(DataLayout::byxf_af32);
  28         k.EnableOutputLayout(DataLayout::byxf_af32);
  29         k.EnableTensorOffset();
  30         k.EnableTensorPitches();
  31         k.EnableDilation();
  32         k.EnableBiasPerFeature();
  33         k.EnableBiasPerOutput();
  34         k.EnableNonBiasTerm();
  35         k.EnableBatching();
  36         k.EnableSplitSupport();
  37         k.EnableDepthwiseSeparableOpt();
  38         k.EnableInt8Quantization();
  39         k.EnableOutputCalibration();
  40         k.DisableTuning();
  41         return k;
  42     }
  43
  44     bool ConvolutionKernel_1x1_gemm_MMAD::Validate(const Params& p, const optional_params& o) const
  45     {
  46         if (!ConvolutionKernelBase::Validate(p, o))
  47         {
  48             return false;
  49         }
  50
  51         const auto& params = static_cast<const convolution_params&>(p);
  52
  53         if (params.filterSize.x != 1 || params.filterSize.y != 1)
  54             return false;
  55
  56         if (params.stride.x != 1 || params.stride.y != 1)
  57             return false;
  58
  59         if (params.padding.x != 0 || params.padding.y != 0)
  60             return false;
  61
  62         const auto& input = params.inputs[0];
  63
  64         // we do not support padded input
  65         if (input.X().pad.Total() != 0 || input.Y().pad.Total() != 0)
  66             return false;
  67
  68         if (params.split != 1)
  69             return false;
  70
  71         return true;
  72     }
  73
  74     ConvolutionKernelBase::DispatchData ConvolutionKernel_1x1_gemm_MMAD::SetDefault(const convolution_params& arg, int) const
  75     {
  76         DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
  77
  78         // Sub-group size used by "convolution_1x1_gemm_MMAD" kernel.
  79         constexpr size_t sub_group_size = 8;
  80
  81         const auto of_maps = arg.output.Feature().v;
  82         const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
  83
  84         runInfo.effiency = FORCE_PRIORITY_2;
  85
  86         runInfo.gws0 = RoundUp(arg.output.X().v * arg.output.Y().v, 8) / 8;
  87         runInfo.gws1 = of_threads_per_batch * arg.output.Batch().v;
  88         runInfo.gws2 = 1;
  89
  90         runInfo.lws0 = 1;
  91         runInfo.lws1 = sub_group_size;
  92         runInfo.lws2 = 1;
  93
  94         return runInfo;
  95     }
  96
  97     JitConstants ConvolutionKernel_1x1_gemm_MMAD::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const
  98     {
  99         auto jit = Parent::GetJitConstants(params, runInfo);
 100
 101         jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws1));
 102
 103         // pitch for special block format used in this kernel
 104         const size_t ifm_32_aligned = Align(params.weights.IFM().v, 32);
 105         const size_t filter_ofm_block_pitch = (ifm_32_aligned / 32) * params.weights.X().v * params.weights.Y().v * 4 * 8 * 8;
 106         jit.AddConstant(MakeJitConstant("FILTER_OFM_BLOCK_PITCH", filter_ofm_block_pitch));
 107
 108         return jit;
 109     }
 110
 111     KernelsData ConvolutionKernel_1x1_gemm_MMAD::GetKernelsData(const Params& params, const optional_params& options) const
 112     {
 113         return GetTunedKernelsDataByIndex(params, options);
 114     }
 115 }