2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
19 #include "kernel_selector_params.h"
30 #define AGE_BASED "-cl-no-subgroup-ifp"
32 #define NO_PRERA_SCH "-cl-intel-no-prera-scheduling"
34 namespace kernel_selector {
37 #define UNUSED(a) (void)a
41 // TODO: current solution until we will have kernel selection time based
42 #define FORCE_PRIORITY_1 (0.0000001f)
43 #define FORCE_PRIORITY_2 (0.0000002f)
44 #define FORCE_PRIORITY_3 (0.0000003f)
45 #define FORCE_PRIORITY_4 (0.0000004f)
46 #define FORCE_PRIORITY_5 (0.0000005f)
47 #define FORCE_PRIORITY_6 (0.0000006f)
48 #define FORCE_PRIORITY_7 (0.0000007f)
49 #define FORCE_PRIORITY_8 (0.0000008f)
50 #define FORCE_PRIORITY_9 (0.0000009f)
51 #define DONT_USE_IF_HAVE_SOMETHING_ELSE (1000000.f)
52 #define TUTORIAL_PRIORITY (DONT_USE_IF_HAVE_SOMETHING_ELSE + 1.f)
53 #define NOT_SUPPORTED (FLT_MAX)
55 std::string GetStringEnv(const char* varName);
57 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
59 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
65 std::string entry_point;
66 bool batch_compilation;
70 options(""), entry_point(""),
71 batch_compilation(false)
74 std::string get_hash()
76 return str + jit + options + entry_point;
80 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
82 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
85 std::vector<size_t> global;
86 std::vector<size_t> local;
89 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
91 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
92 struct ScalarDescriptor
126 using Scalars = std::vector<ScalarDescriptor>;
128 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
129 // ArgumentDescpirtor
130 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
131 struct ArgumentDescriptor
139 PREV_WEIGHTS_GRADIENT,
146 WEIGHTS_QUANTIZATION_FACTORS,
147 OUTPUT_CALIBRATION_FACTORS,
148 RECURRENT, // RNN/LSTM/GRU recurrent weights
149 HIDDEN, // RNN/LSTM/GRU hidden input
150 CELL, // LSTM cell input
151 LSTM_PACK, // LSTM packed output
155 enum class ScalarTypes
173 using Arguments = std::vector<ArgumentDescriptor>;
175 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
177 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
180 std::shared_ptr<KernelString> kernelString;
181 WorkGroupSizes workGroups;
184 std::string layerID; // TODO: in order to support run single layer. think about more appropriate place
187 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
189 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
192 virtual WeightsType GetExpectedInputType() = 0;
193 virtual WeightsLayout GetExpectedInputLayout() const { return WeightsLayout::oiyx; }
194 virtual void Execute(void* input, size_t input_size, void* output, size_t output_size) const = 0;
197 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
198 // GenericKernelParams
199 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
200 struct GenericKernelParams
209 Engine engine = Engine::NONE;
210 std::shared_ptr<clKernelData> clKernel;
211 std::shared_ptr<CPUKernel> cpuKernel;
214 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
215 // WeightsReorderParams
216 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
217 struct WeightsReorderParams : public GenericKernelParams
219 size_t newBufferSize = 0;
220 WeightsType dtype = WeightsType::F16;
221 WeightsLayout destLayout = WeightsLayout::oiyx;
222 bool toImageType = false;
225 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
227 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
230 std::shared_ptr<Params> params;
231 std::vector<clKernelData> kernels;
232 std::vector<size_t> internalBufferSizes;
233 float estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
234 uint64_t runTime = std::numeric_limits<uint64_t>::max(); // kernel run time in nanoseconds
236 bool reorderInput = false;
237 WeightsReorderParams weightsReorderParams;
238 std::string kernelName;
240 int autoTuneIndex = -1;
242 template <typename T>
243 inline static KernelData Default(const Params& _params, size_t kernel_nums = 1)
246 const T& orgParams = static_cast<const T&>(_params);
247 kd.params = std::make_shared<T>(orgParams);
248 kd.kernels.resize(kernel_nums);
249 kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE; // for KW
250 kd.runTime = std::numeric_limits<uint64_t>::max();
251 kd.reorderInput = false; // for KW
252 kd.autoTuneIndex = -1;
257 using KernelsData = std::vector<KernelData>;
259 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
260 // to string functions
261 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
262 std::string toString(ActivationFunction activation);
263 std::string toString(DataLayout l);
264 std::string toString(Datatype dType);
265 std::string toString(WeightsType wType);
266 std::string toString(KernelType kt);
267 std::string toString(EltwiseMode b_mode);
268 std::string toString(ReorderMode mode);
269 std::string toString(MeanSubtractMode mode);
270 std::string toString(ArgMaxMinOut mode);
271 std::string toString(ArgMaxMinAxis mode);
272 std::string toString(LookUpTableAxis mode);
273 std::string toString(PoolType mode);
274 std::string toString(LRNMode mode);
275 std::string toString(KernelDividerMode mode);
276 std::string toString(SoftmaxDim d);
277 std::string toString(NormalizeMode mode);
278 std::string toString(MVNMode mode);
279 std::string toString(WeightsLayout layout);
280 std::string toString(ConcatAxis a);
281 std::string toString(TileAxis a);
282 std::string toString(GatherAxis a);
283 std::string toString(SampleType type);
284 std::string toString(const BorderType type);
285 std::string toString(const Tensor::Dim& dim);
286 std::string toString(const DataTensor& tensor);
287 std::string toString(const IndexSelectAxis& axis);
288 inline std::uint64_t create_hash(const unsigned char* begin, const unsigned char* end)
290 // Compatible with VS std::hash.
291 constexpr auto start_acc = static_cast<std::uint64_t>(UINT64_C(14695981039346656037));
292 constexpr auto mul_factor = static_cast<std::uint64_t>(UINT64_C(1099511628211));
294 std::uint64_t acc = start_acc;
295 for (auto elem_it = begin; elem_it != end; ++elem_it)
297 acc ^= static_cast<std::uint64_t>(*elem_it);
304 template <typename ElemTy>
305 std::uint64_t create_hash(const ElemTy* begin, const std::size_t size)
307 return create_hash(reinterpret_cast<const unsigned char*>(begin), reinterpret_cast<const unsigned char*>(begin + size));
310 template <typename CharTy, typename CharTraits, typename AllocatorTy>
311 std::uint64_t create_hash(const std::basic_string<CharTy, CharTraits, AllocatorTy>& value)
313 return create_hash<CharTy>(value.data(), value.size());