Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / kernel_selector / core / kernel_selector.cpp
1 /*
2 // Copyright (c) 2016 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include "kernel_base.h"
18 #include "kernel_selector_common.h"
19 #include "kernel_selector.h"
20 #include <type_traits>
21 #include <sstream>
22 #include <fstream>
23
24 // #define ENABLE_ENV
25 // #define ENABLE_ENV_PRINT
26
27 #ifdef ENABLE_ENV_PRINT
28 #define ENV_PRINTF(...) printf(__VA_ARGS__)
29 #else
30 #define ENV_PRINTF(...) 
31 #endif // ENABLE_ENV_PRINT
32
33 #define ENABLE_OFFLINE_TUNING_CACHE 1
34
35 namespace kernel_selector {
36
37     AutoTuner kernel_selector_base::autoTuner;
38
39 #ifdef ENABLE_ENV
40     std::string strip(const std::string str)
41     {
42         size_t start = str.find_first_not_of(' ');
43         size_t end = str.find_last_not_of(' ');
44         if (start == std::string::npos ||
45             end == std::string::npos)
46         {
47             return "";
48         }
49
50         return str.substr(start, end - start + 1);
51     }
52
53     static void AddToForceMap(ForceList& force_list, bool force_or_deny, const char* env_str)
54     {
55         std::stringstream ss;
56         ss.str(GetStringEnv(env_str));
57
58         ENV_PRINTF("ENV: %s = %s\n", env_str, ss.str().c_str());
59
60         std::string val;
61         while (std::getline(ss, val, ','))
62         {
63             std::string kernel_name = strip(val);
64             if (!kernel_name.empty())
65             {
66                 force_list[kernel_name] = force_or_deny;
67             }
68         }
69     }
70 #endif
71
72     kernel_selector_base::kernel_selector_base()
73     {
74 #ifdef ENABLE_ENV
75         AddToForceMap(forceKernels, true, "CL_DNN_FORCE_KERNELS");
76         AddToForceMap(forceKernels, false, "CL_DNN_DENY_KERNELS");
77 #endif
78     }
79
80     KernelsData kernel_selector_base::GetNaiveBestKernel(const Params& params, const optional_params& options, KernelType kType) const
81     {
82         KernelsData kernelsData;
83         std::string kernelName;
84
85         if (params.GetType() == kType &&
86             options.GetType() == kType)
87         {
88             for (const auto& implementation : implementations)
89             {
90                 if (implementation->Supports(params, options))
91                 {
92                     try
93                     {
94                         KernelsData kds = implementation->GetKernelsData(params, options);
95
96                         if (kds.size() && kds[0].kernels.size())
97                         {
98 #ifdef ENABLE_ENV
99                             const auto& it = forceKernels.find(implementation->GetName());
100                             if (it != forceKernels.end())
101                             {
102                                 if (it->second == true)
103                                 {
104                                     ENV_PRINTF("Force: %s\n", it->first.c_str());
105                                     return kds;
106                                 }
107                                 else
108                                 {
109                                     ENV_PRINTF("Deny: %s\n", it->first.c_str());
110                                 }
111                             }
112                             else
113 #endif
114                             {
115                                 if (kernelsData.size() == 0 ||
116                                     kds[0].estimatedTime < kernelsData[0].estimatedTime)
117                                 {
118                                     kernelsData = kds;
119                                     kernelName = implementation->GetName();
120                                 }
121                             }
122                         }
123                     }
124                     catch (std::runtime_error&)
125                     {
126                         // we have to handle it in order to avoid exception in KernelSelector as much we can
127                     }
128                 }
129             }
130         }
131
132         // TODO: find a better place to located this assignment 
133         if (kernelsData.size())
134         {
135             //printf("%s\n", kernelName.c_str());
136             kernelsData[0].kernelName = kernelName;
137             kernelsData[0].kernels[0].layerID = params.layerID;
138         }
139
140         return kernelsData;
141     }
142
143     KernelsData kernel_selector_base::GetAutoTuneBestKernel(const Params& params, const optional_params& options, KernelType kType) const
144     {
145         KernelsData kernelsData;
146         std::string kernelName;
147         if (params.GetType() == kType &&
148             options.GetType() == kType)
149         {
150             std::string hash = std::to_string(create_hash(params.to_string()));
151             std::tuple<std::string, int> cachedKernelConfig;
152             if (options.tuningParams.mode == TuningMode::TUNING_DISABLED) // Try to load kernel/config from offline cache
153             {
154 #if ENABLE_OFFLINE_TUNING_CACHE
155                 cachedKernelConfig = autoTuner.LoadKernelOffline(params.engineInfo.deviceCache, hash);
156                 
157 #else
158                 return  GetNaiveBestKernel(params, options, kType);
159 #endif
160             }
161             else // Try to load kernel/config from on-line cache
162             {
163                 cachedKernelConfig = autoTuner.LoadKernelOnline(options.tuningParams.mode, options.tuningParams.cacheFilePath, params.engineInfo.computeUnitsCount, hash);
164             }       
165             bool hashFoundInCache = !std::get<0>(cachedKernelConfig).empty();
166
167             if (hashFoundInCache)
168             {
169                 std::string cachedkernelName = std::get<0>(cachedKernelConfig);
170                 int autoTuneIndex = std::get<1>(cachedKernelConfig);
171
172                 for (const auto& implementation : implementations)
173                 {
174                     // TODO: make sure kernel names are unique.
175                     if (implementation->GetName().compare(cachedkernelName) == 0)
176                     {            
177                         KernelsData kds = implementation->GetTunedKernelsDataByIndex(params, options, autoTuneIndex);
178                         if (kds.size() && kds[0].kernels.size() && implementation->Supports(params, options))
179                         {
180                             kernelsData = kds;
181                             kernelsData[0].kernelName = cachedkernelName;
182                             kernelsData[0].kernels[0].layerID = params.layerID;
183                         }
184                         break;
185                     }
186                 }
187
188                 if (!kernelsData.empty())
189                 {
190                     return kernelsData;
191                 }
192             }
193
194             if( hashFoundInCache || // Cache is not valid - hash exists in cache but kernelsData was empty or kernel doesn't support the required key.
195                 (options.tuningParams.mode != TuningMode::TUNING_TUNE_AND_CACHE) || // On-line tuning is not allowed.
196                 !options.tuningParams.runner ) // Runner is invalid - can't run on-line tuning
197             {
198                 // Fall back to the default path.
199                 return GetNaiveBestKernel(params, options, kType);
200             }    
201
202             // Start on-line tuning
203             assert(options.tuningParams.runner);
204
205             for (const auto& implementation : implementations)
206             {
207                 if (implementation->Supports(params, options) && implementation->SupportsTuning())
208                 {
209                     try
210                     {
211                         KernelsData kds = implementation->GetKernelsDataForAutoTune(params, options);
212                         std::vector<uint64_t> runTimes = options.tuningParams.runner->run_kernels(kds);
213                         
214                         for (size_t i = 0; i < kds.size(); i++)
215                         {
216                             kds[i].runTime = runTimes[i];
217                             if (kernelsData.size() == 0 || kds[i].runTime < kernelsData[0].runTime)
218                             {
219                                 kernelsData = { kds[i] };
220                                 kernelName = implementation->GetName();                                
221                             }
222                         }
223                     }
224                     catch (std::runtime_error&)
225                     {
226                         // we have to handle it in order to avoid exception in KernelSelector as much we can
227                     }
228                 }
229             }
230
231             //try to fallback to reference kernels if no optimized were found during tuning
232             if (!kernelsData.size())
233             {
234                 for (const auto& implementation : implementations)
235                 {
236
237                     //this time, check only implementations that have disabled tuning
238                     if (implementation->Supports(params, options) && !implementation->SupportsTuning())
239                     {
240                         try
241                         {
242                             KernelsData kds = implementation->GetKernelsDataForAutoTune(params, options);
243                             std::vector<uint64_t> runTimes = options.tuningParams.runner->run_kernels(kds);
244
245                             for (size_t i = 0; i < kds.size(); i++)
246                             {
247                                 kds[i].runTime = runTimes[i];
248                                 if (kernelsData.size() == 0 || kds[i].runTime < kernelsData[0].runTime)
249                                 {
250                                     kernelsData = { kds[i] };
251                                     kernelName = implementation->GetName();
252                                 }
253                             }
254                         }
255                         catch (std::runtime_error&)
256                         {
257                             // we have to handle it in order to avoid exception in KernelSelector as much we can
258                         }
259                     }
260                 }
261             }
262
263             if (kernelsData.size())
264             {
265                 kernelsData[0].kernelName = kernelName;
266                 kernelsData[0].kernels[0].layerID = params.layerID;
267                 autoTuner.StoreKernel(options.tuningParams.cacheFilePath, hash, kernelName, kernelsData[0].autoTuneIndex, params.engineInfo.computeUnitsCount);
268             }
269         } 
270
271         return kernelsData;
272     }
273 }