Publishing R3
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / kernel_selector / core / actual_kernels / reorder / reorder_kernel_base.cpp
1 /*
2 // Copyright (c) 2016 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include "kernel_selector_common.h"
18 #include "reorder_kernel_base.h"
19 #include "common_tools.h"
20 #include "kernel_selector_utils.h" 
21
22 namespace kernel_selector 
23 {
24     inline uint32_t SubGroupSize(WeightsLayout l)
25     {
26         switch (l)
27         {
28         case WeightsLayout::os_iyx_osv16:
29         case WeightsLayout::os_iyx_osv16_rotate_180:
30         case WeightsLayout::os_i_osv16:
31         case WeightsLayout::os_i_osv16__ai8:
32         case WeightsLayout::i_yxs_os_yxsv2_osv16:
33         case WeightsLayout::iy_xs_os_xsv2_osv16__ao32:
34             return 16;
35         case WeightsLayout::os_i_osv8__ai8:
36         case WeightsLayout::iy_xs_os_xsv2_osv8__ao32:
37             return 8;
38         default:
39             return 1;
40         }
41     }
42
43     inline uint32_t SubGroupSize(DataLayout l)
44     {
45         switch (l)
46         {
47         case DataLayout::bs_f_bsv16__af8:
48             return 16;
49         case DataLayout::bs_f_bsv8__af8:
50             return 8;
51         default:
52             return 1;
53         }
54     }
55
56     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
57     // MakeReorderWeightsJitConstants
58     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
59     inline JitConstants MakeReorderWeightsJitConstants(const reorder_weights_params& params)
60     {
61         const auto& input = params.input;
62         const auto& output = params.output;
63         const bool fp16Supported = output.GetDType() == WeightsType::F16 || input.GetDType() == WeightsType::F16;
64
65         JitConstants jit{
66             MakeJitConstant("FP16_SUPPORTED",   fp16Supported),                      // TODO: use engine
67             MakeJitConstant("FP16_UNIT_USED",   fp16Supported),
68             MakeJitConstant("INPUT0",           input),
69             MakeJitConstant("OUTPUT",           output),
70         };
71
72         return jit;
73     }
74
75     JitConstants ReorderKernelBase::GetJitConstants(const reorder_weights_params& params) const
76     {
77         JitConstants mem_consts = MakeReorderWeightsJitConstants(params);
78        
79         mem_consts.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", SubGroupSize(params.output.GetLayout())));
80
81         return mem_consts;
82     }
83
84     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
85     // MakeReorderJitConstants
86     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
87     inline JitConstants MakeReorderJitConstants(const reorder_params& params)
88     {
89         JitConstants jit = MakeBaseParamsJitConstants(params);
90
91         jit.AddConstant(MakeJitConstant("MEAN_SUBTRACT_" + toString(params.mode), 1));
92
93         if (params.mode == MeanSubtractMode::INSIDE_PARAMS)
94         {
95             jit.AddConstant(MakeJitConstant("VALUE_TO_SUBTRACT", params.meanValues));
96             jit.AddConstant(MakeJitConstant("TO_MEAN_TYPE", "convert_float"));
97         }
98         else if (params.mode == MeanSubtractMode::IN_BUFFER)
99         {
100             jit.AddConstant(MakeJitConstant("MEAN_SUBTRACT", params.mean));
101             jit.AddConstant(MakeJitConstant("TO_MEAN_TYPE", "convert_" + toCLType(params.mean.GetDType())));
102         }
103
104         //half->half without subtraction (so plain reorder) can be done on shorts without explicit fp16 support
105         bool useUshort = (params.inputs[0].GetDType() == Datatype::F16 && params.output.GetDType() == Datatype::F16 &&
106             params.mode == MeanSubtractMode::NONE);
107
108         Datatype calc_type = useUshort ? Datatype::UINT16 : params.inputs[0].GetDType();
109
110         jit.AddConstants({
111             MakeJitConstant("CALC_TYPE",                      toCLType(calc_type)),
112             MakeJitConstant("TO_CALC_TYPE",      "convert_" + toCLType(calc_type)),
113             MakeJitConstant("INPUT_REORDER_TYPE",             useUshort ? toCLType(Datatype::UINT16) : "INPUT0_TYPE"),
114             MakeJitConstant("OUTPUT_REORDER_TYPE",            useUshort ? toCLType(Datatype::UINT16) : "OUTPUT_TYPE"),
115             MakeJitConstant("TO_OUTPUT_REORDER_TYPE",         useUshort ? "" : "TO_OUTPUT_TYPE"),
116             MakeJitConstant("MEAN_OP(val,mean_val)",          getMeanOpString(params.mean_op))
117         });
118
119         return jit;
120     }
121
122     JitConstants ReorderKernelBase::GetJitConstants(const reorder_params& params) const
123     {
124         JitConstants mem_consts = MakeReorderJitConstants(params);
125
126         mem_consts.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", SubGroupSize(params.output.GetLayout())));
127
128         return mem_consts;
129     }
130
131     ReorderKernelBase::DispatchData ReorderKernelBase::SetDefault(const reorder_weights_params& params) const
132     {
133         const auto& out = params.output;
134
135         DispatchData kd;
136
137         std::vector<size_t> global(3);
138
139         global = { out.OFM().v, out.IFM().v, out.X().v*out.Y().v };
140         auto local = GetOptimalLocalWorkGroupSizes(global);
141
142         kd.gws0 = global[0];
143         kd.gws1 = global[1];
144         kd.gws2 = global[2];
145
146         kd.lws0 = local[0];
147         kd.lws1 = local[1];
148         kd.lws2 = local[2];
149
150         return kd;
151     }
152
153     ReorderKernelBase::DispatchData ReorderKernelBase::SetDefault(const reorder_params& params) const
154     {
155         DispatchData kd;
156
157         auto global = GetTensorFriendlyWorkGroups(params.inputs[0]);
158         auto local = GetOptimalLocalWorkGroupSizes(global);
159
160         kd.gws0 = global[0];
161         kd.gws1 = global[1];
162         kd.gws2 = global[2];
163
164         kd.lws0 = local[0];
165         kd.lws1 = local[1];
166         kd.lws2 = local[2];
167
168         return kd;
169     }
170
171     KernelsData ReorderKernelBase::GetCommonKernelsData(const reorder_weights_params& params, const optional_params& options, float estimated_time) const
172     {
173         assert(params.GetType() == KernelType::REORDER);
174
175         KernelData kd = KernelData::Default<reorder_weights_params>(params);
176         reorder_weights_params& newParams = *static_cast<reorder_weights_params*>(kd.params.get());
177
178         DispatchData runInfo;
179
180         runInfo = SetDefault(newParams);
181
182         auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
183         auto cldnn_jit = GetJitConstants(newParams);
184         std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
185
186         auto& kernel = kd.kernels[0];
187         
188         FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
189
190         kernel.arguments = GetArgsDesc(1, false, false);
191
192         kd.estimatedTime = estimated_time;
193
194         return{ kd };
195     }
196
197     KernelsData ReorderKernelBase::GetCommonKernelsData(const reorder_params& params, const optional_params& options, float estimated_time) const
198     {
199         if (!Validate(params, options))
200         {
201             return{};
202         }
203         assert(params.GetType() == KernelType::REORDER);
204
205         KernelData kd = KernelData::Default<reorder_params>(params);
206         reorder_params& newParams = *static_cast<reorder_params*>(kd.params.get());
207
208         DispatchData runInfo;
209
210         runInfo = SetDefault(newParams);
211
212         auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
213         auto cldnn_jit = GetJitConstants(newParams);
214         std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
215
216         auto& kernel = kd.kernels[0];
217
218         FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
219
220         kernel.arguments = GetArgsDesc(1, false, false);
221         if (newParams.mode == MeanSubtractMode::IN_BUFFER)
222         {
223             kernel.arguments.push_back({ ArgumentDescriptor::Types::BIAS, 0 });
224         }
225
226         kd.estimatedTime = estimated_time;
227
228         return{ kd };
229     }
230 }