2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 #include "kernel_base.h"
18 #include "kernel_selector_common.h"
19 #include "kernel_selector.h"
20 #include <type_traits>
25 // #define ENABLE_ENV_PRINT
27 #ifdef ENABLE_ENV_PRINT
28 #define ENV_PRINTF(...) printf(__VA_ARGS__)
30 #define ENV_PRINTF(...)
31 #endif // ENABLE_ENV_PRINT
33 #define ENABLE_OFFLINE_TUNING_CACHE 1
35 namespace kernel_selector {
37 AutoTuner kernel_selector_base::autoTuner;
40 std::string strip(const std::string str)
42 size_t start = str.find_first_not_of(' ');
43 size_t end = str.find_last_not_of(' ');
44 if (start == std::string::npos ||
45 end == std::string::npos)
50 return str.substr(start, end - start + 1);
53 static void AddToForceMap(ForceList& force_list, bool force_or_deny, const char* env_str)
56 ss.str(GetStringEnv(env_str));
58 ENV_PRINTF("ENV: %s = %s\n", env_str, ss.str().c_str());
61 while (std::getline(ss, val, ','))
63 std::string kernel_name = strip(val);
64 if (!kernel_name.empty())
66 force_list[kernel_name] = force_or_deny;
72 kernel_selector_base::kernel_selector_base()
75 AddToForceMap(forceKernels, true, "CL_DNN_FORCE_KERNELS");
76 AddToForceMap(forceKernels, false, "CL_DNN_DENY_KERNELS");
80 KernelsData kernel_selector_base::GetNaiveBestKernel(const Params& params, const optional_params& options, KernelType kType) const
82 KernelsData kernelsData;
83 std::string kernelName;
85 if (params.GetType() == kType &&
86 options.GetType() == kType)
88 for (const auto& implementation : implementations)
90 if (implementation->Supports(params, options))
94 KernelsData kds = implementation->GetKernelsData(params, options);
96 if (kds.size() && kds[0].kernels.size())
99 const auto& it = forceKernels.find(implementation->GetName());
100 if (it != forceKernels.end())
102 if (it->second == true)
104 ENV_PRINTF("Force: %s\n", it->first.c_str());
109 ENV_PRINTF("Deny: %s\n", it->first.c_str());
115 if (kernelsData.size() == 0 ||
116 kds[0].estimatedTime < kernelsData[0].estimatedTime)
119 kernelName = implementation->GetName();
124 catch (std::runtime_error&)
126 // we have to handle it in order to avoid exception in KernelSelector as much we can
132 // TODO: find a better place to located this assignment
133 if (kernelsData.size())
135 //printf("%s\n", kernelName.c_str());
136 kernelsData[0].kernelName = kernelName;
137 kernelsData[0].kernels[0].layerID = params.layerID;
143 KernelsData kernel_selector_base::GetAutoTuneBestKernel(const Params& params, const optional_params& options, KernelType kType) const
145 KernelsData kernelsData;
146 std::string kernelName;
147 if (params.GetType() == kType &&
148 options.GetType() == kType)
150 std::string hash = std::to_string(create_hash(params.to_string()));
151 std::tuple<std::string, int> cachedKernelConfig;
152 if (options.tuningParams.mode == TuningMode::TUNING_DISABLED) // Try to load kernel/config from offline cache
154 #if ENABLE_OFFLINE_TUNING_CACHE
155 cachedKernelConfig = autoTuner.LoadKernelOffline(params.engineInfo.deviceCache, hash);
158 return GetNaiveBestKernel(params, options, kType);
161 else // Try to load kernel/config from on-line cache
163 cachedKernelConfig = autoTuner.LoadKernelOnline(options.tuningParams.mode, options.tuningParams.cacheFilePath, params.engineInfo.computeUnitsCount, hash);
165 bool hashFoundInCache = !std::get<0>(cachedKernelConfig).empty();
167 if (hashFoundInCache)
169 std::string cachedkernelName = std::get<0>(cachedKernelConfig);
170 int autoTuneIndex = std::get<1>(cachedKernelConfig);
172 for (const auto& implementation : implementations)
174 // TODO: make sure kernel names are unique.
175 if (implementation->GetName().compare(cachedkernelName) == 0)
177 KernelsData kds = implementation->GetTunedKernelsDataByIndex(params, options, autoTuneIndex);
178 if (kds.size() && kds[0].kernels.size() && implementation->Supports(params, options))
181 kernelsData[0].kernelName = cachedkernelName;
182 kernelsData[0].kernels[0].layerID = params.layerID;
188 if (!kernelsData.empty())
194 if( hashFoundInCache || // Cache is not valid - hash exists in cache but kernelsData was empty or kernel doesn't support the required key.
195 (options.tuningParams.mode != TuningMode::TUNING_TUNE_AND_CACHE) || // On-line tuning is not allowed.
196 !options.tuningParams.runner ) // Runner is invalid - can't run on-line tuning
198 // Fall back to the default path.
199 return GetNaiveBestKernel(params, options, kType);
202 // Start on-line tuning
203 assert(options.tuningParams.runner);
205 for (const auto& implementation : implementations)
207 if (implementation->Supports(params, options) && implementation->SupportsTuning())
211 KernelsData kds = implementation->GetKernelsDataForAutoTune(params, options);
212 std::vector<uint64_t> runTimes = options.tuningParams.runner->run_kernels(kds);
214 for (size_t i = 0; i < kds.size(); i++)
216 kds[i].runTime = runTimes[i];
217 if (kernelsData.size() == 0 || kds[i].runTime < kernelsData[0].runTime)
219 kernelsData = { kds[i] };
220 kernelName = implementation->GetName();
224 catch (std::runtime_error&)
226 // we have to handle it in order to avoid exception in KernelSelector as much we can
231 //try to fallback to reference kernels if no optimized were found during tuning
232 if (!kernelsData.size())
234 for (const auto& implementation : implementations)
237 //this time, check only implementations that have disabled tuning
238 if (implementation->Supports(params, options) && !implementation->SupportsTuning())
242 KernelsData kds = implementation->GetKernelsDataForAutoTune(params, options);
243 std::vector<uint64_t> runTimes = options.tuningParams.runner->run_kernels(kds);
245 for (size_t i = 0; i < kds.size(); i++)
247 kds[i].runTime = runTimes[i];
248 if (kernelsData.size() == 0 || kds[i].runTime < kernelsData[0].runTime)
250 kernelsData = { kds[i] };
251 kernelName = implementation->GetName();
255 catch (std::runtime_error&)
257 // we have to handle it in order to avoid exception in KernelSelector as much we can
263 if (kernelsData.size())
265 kernelsData[0].kernelName = kernelName;
266 kernelsData[0].kernels[0].layerID = params.layerID;
267 autoTuner.StoreKernel(options.tuningParams.cacheFilePath, hash, kernelName, kernelsData[0].autoTuneIndex, params.engineInfo.computeUnitsCount);