Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / kernel_selector / core / actual_kernels / fully_connected / fully_connected_kernel_bf_io_gemm.cpp
1 /*
2 // Copyright (c) 2016 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include "fully_connected_kernel_bf_io_gemm.h"
18
19 namespace kernel_selector {
20
21     ParamsKey FullyConnected_bf_io_GEMM::GetSupportedKey() const
22     {
23         ParamsKey k;
24         k.EnableInputDataType(Datatype::F16);
25         k.EnableInputDataType(Datatype::F32);
26         k.EnableOutputDataType(Datatype::F16);
27         k.EnableOutputDataType(Datatype::F32);
28         k.EnableInputWeightsType(WeightsType::F16);
29         k.EnableInputWeightsType(WeightsType::F32);
30         k.EnableAllInputLayout();
31         k.EnableOutputLayout(DataLayout::bf);
32         k.EnableBiasPerOutput();
33         k.EnableBiasPerFeature();
34         k.EnableNonBiasTerm();
35         k.EnableTensorOffset();
36         k.EnableTensorPitches();
37         return k;
38     }
39
40     FullyConnected_bf_io_GEMM::DispatchData FullyConnected_bf_io_GEMM::SetDefault(const fully_connected_params& params, int autoTuneIndex) const
41     {
42         auto runInfo = Parent::SetDefault(params, autoTuneIndex);
43
44         const uint32_t localWorkSizeX = 64;
45         const uint32_t globalWorkSizeX = localWorkSizeX;
46
47         std::vector<size_t> global = { globalWorkSizeX, params.output.Feature().v, params.output.Batch().v };
48         std::vector<size_t> local = { localWorkSizeX, 1, 1 };
49
50         runInfo.gws0 = global[0];
51         runInfo.gws1 = global[1];
52         runInfo.gws2 = 1;
53
54         runInfo.lws0 = local[0];
55         runInfo.lws1 = local[1];
56         runInfo.lws2 = 1;
57
58         runInfo.effiency = FORCE_PRIORITY_6;
59
60         return runInfo;
61     }
62
63     JitConstants FullyConnected_bf_io_GEMM::GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const
64     {
65         auto jit = Parent::GetJitConstants(params, kd);
66
67         if (params.inputs[0].GetDType() == Datatype::F16)
68         {
69             jit.AddConstant(MakeJitConstant("__fc_f16", ""));
70         }
71         else
72         {
73             jit.AddConstant(MakeJitConstant("__fc_f32", ""));
74         }
75
76         const uint32_t localWorkSizeX = 64;
77         const uint32_t globalWorkSizeX = localWorkSizeX;
78         const uint32_t vecSize = 4;
79         size_t matrixLineSize = params.inputs[0].Batch().pitch;
80
81         jit.AddConstants({
82             MakeJitConstant("LAST_INPUT_SIZE_REMAINDER", matrixLineSize % (globalWorkSizeX * vecSize)),
83             MakeJitConstant("LAST_INPUT_SIZE_DIV_4", matrixLineSize % vecSize),
84         });
85
86         return jit;
87     }
88
89     KernelsData FullyConnected_bf_io_GEMM::GetKernelsData(const Params& params, const optional_params& options) const
90     {
91         KernelsData res = {};
92         for (size_t i = 0; i < autoTuneOptions.size(); i++)
93         {
94             KernelsData kd = GetTunedKernelsDataByIndex(params, options, DataLayout::bf, { WeightsLayout::oiyx }, FORCE_PRIORITY_6, (int)i);
95             if (!kd.empty())
96             {
97                 res.emplace_back(kd[0]);
98             }
99         }
100
101         return res;
102     }
103 }