Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / kernel_selector / core / actual_kernels / convolution / convolution_kernel_yxfb_yxio_b8.cpp
1 /*
2 // Copyright (c) 2016 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include "convolution_kernel_yxfb_yxio_b8.h"
18
19 namespace kernel_selector 
20 {
21
22     ParamsKey ConvolutionKernel_yxfb_yxio_b8::GetSupportedKey() const
23     {
24         ParamsKey k;
25         k.EnableInputDataType(Datatype::F32);
26         k.EnableInputWeightsType(WeightsType::F16);
27         k.EnableInputWeightsType(WeightsType::F32);
28         k.EnableOutputDataType(Datatype::F32);
29         k.EnableInputLayout(DataLayout::yxfb);
30         k.EnableOutputLayout(DataLayout::yxfb);
31         k.EnableTensorOffset();
32         k.EnableTensorPitches();
33         k.EnableBiasPerFeature();
34         k.EnableNonBiasTerm();
35         k.EnableBatching();
36         k.EnableSplitSupport();
37         k.EnableDilation();
38         k.EnableSubGroup();
39         return k;
40     }
41
42     namespace {
43         size_t GetOfmPerWorkitem(size_t filterOfmNum, size_t batchSize, size_t local_work_size)
44         {
45             if (((filterOfmNum * batchSize) / 16) % local_work_size)
46             {
47                 return 8;
48             }
49             else
50             {
51                 return 16;
52             }
53         }
54     }
55
56     ConvolutionKernelBase::DispatchData ConvolutionKernel_yxfb_yxio_b8::SetDefault(const convolution_params& arg, int autoTuneIndex) const
57     {
58         DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg, autoTuneIndex);
59
60         const auto filterOfmNum = arg.weights.OFM().v;
61         const auto batchSize = arg.output.Batch().v;
62
63         runInfo.lws0 = batchSize == 8 ? 8 : 16;
64         runInfo.lws1 = 1;
65         runInfo.lws2 = 1;
66
67         size_t ofmPerWorkItem = GetOfmPerWorkitem(filterOfmNum, batchSize, runInfo.lws0);
68
69         runInfo.gws0 = filterOfmNum * batchSize / ofmPerWorkItem;
70
71         runInfo.effiency = FORCE_PRIORITY_9;
72         
73         return runInfo;
74     }
75
76     bool ConvolutionKernel_yxfb_yxio_b8::Validate(const Params& p, const optional_params& o) const
77     {
78         if (!ConvolutionKernelBase::Validate(p, o))
79         {
80             return false;
81         }
82
83         const convolution_params& params = static_cast<const convolution_params&>(p);
84
85         if (!CheckPitchForSplitOnly(params))
86         {
87             return false;
88         }
89
90         const auto filterOfmNum = params.weights.OFM().v;
91         const auto batchSize = params.output.Batch().v;
92
93         const bool bInputValidated =
94             (filterOfmNum > 0) &&
95             (batchSize > 0) &&
96             (params.output.Feature().v == filterOfmNum);
97
98         if (!bInputValidated)
99         {
100             return false;
101         }
102
103         const uint32_t lws0 = batchSize == 8 ? 8 : 16;
104
105         if ((filterOfmNum * batchSize) % lws0 != 0 ||
106             batchSize > 16 || batchSize == 1)
107         {
108             return false;
109         }
110
111         if (params.output.PitchesDifferFromLogicalDims())
112             return false;
113
114         return true;
115     }
116
117     JitConstants ConvolutionKernel_yxfb_yxio_b8::GetJitConstants(const convolution_params& params, const DispatchData& kd) const
118     {
119         JitConstants jits = ConvolutionKernelBase::GetJitConstants(params, kd);
120
121         size_t ofmPerWorkItem = GetOfmPerWorkitem(params.weights.OFM().v, params.output.Batch().v, kd.lws0);
122
123         jits.AddConstant(MakeJitConstant("OFM_PER_WORK_ITEM", ofmPerWorkItem));
124         jits.AddConstant(MakeJitConstant("LOCAL_WORK_GROUP_SIZE", kd.lws0));
125
126         return jits;
127     }
128
129     KernelsData ConvolutionKernel_yxfb_yxio_b8::GetKernelsData(const Params& params, const optional_params& options) const
130     {
131         return GetTunedKernelsDataByIndex(params, options);
132     }
133 }