Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / src / gpu / kernels_cache.cpp
1 /*
2 // Copyright (c) 2016 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18 #include "kernels_cache.h"
19 #include "ocl_toolkit.h"
20 #include <algorithm>
21 #include <cassert>
22 #include <sstream>
23 #include <fstream>
24 #include <set>
25
26 #include "kernel_selector_helper.h"
27
28 #define MAX_KERNELS_PER_PROGRAM 10
29
30 namespace cldnn { namespace gpu {
31
32 namespace {
33     std::string get_undef_jit(kernels_cache::source_code org_source_code)
34     {
35         const std::string white_space_with_new_lines = " \t\r\n";
36         const std::string white_space = " \t";
37
38         size_t current_pos = 0;
39
40         const std::string define = "define";
41
42         std::set<std::string> to_undef;
43         for (const auto& source : org_source_code)
44         {
45             do
46             {
47                 size_t index_to_hash = source.find_first_not_of(white_space_with_new_lines, current_pos);
48                 if (index_to_hash != std::string::npos &&
49                     source[index_to_hash] == '#')
50                 {
51                     size_t index_define = source.find_first_not_of(white_space, index_to_hash + 1);
52
53                     if (index_define != std::string::npos &&
54                         !source.compare(index_define, define.size(), define))
55                     {
56                         size_t index_to_name = source.find_first_not_of(white_space, index_define + define.size());
57                         if (index_to_name != std::string::npos)
58                         {
59                             size_t index_to_end_name = source.find_first_of(white_space_with_new_lines + "(", index_to_name);
60                             if (index_to_end_name == std::string::npos)
61                             {
62                                 index_to_end_name = source.size();
63                             }
64                             std::string name = source.substr(index_to_name, index_to_end_name - index_to_name);
65                             to_undef.insert(name);
66                         }
67                     }
68                 }
69
70                 current_pos = source.find_first_of('\n', current_pos + 1);
71             } while (current_pos != std::string::npos);
72         }
73
74         std::string undefs;
75         for (const auto& name : to_undef)
76         {
77             undefs += "#ifdef " + name + "\n";
78             undefs += "#undef " + name + "\n";
79             undefs += "#endif\n";
80         }
81
82         return std::move(undefs);
83     }
84
85     std::string reorder_options(const std::string& org_options)
86     {
87         std::stringstream ss(org_options);
88         std::set<std::string> sorted_options;
89
90         while (ss.good())
91         {
92             std::string word;
93             ss >> word;
94             sorted_options.insert(word);
95         }
96
97         std::string options;
98
99         for (const auto& o : sorted_options)
100         {
101             options += o + " ";
102         }
103         
104         return options;
105     }
106
107     inline bool does_options_support_batch_compilation(const std::string& options)
108     {
109         return
110             options.find("-D") == std::string::npos &&
111             options.find("-I") == std::string::npos;
112     }
113 }
114
115 kernels_cache::sorted_code kernels_cache::get_program_source(const kernels_code& kernels_source_code) const 
116 {
117     sorted_code scode;
118
119     for (const auto& code : kernels_source_code)
120     {
121         const source_code   org_source_code     = { code.second.kernel_strings->jit, code.second.kernel_strings->str };
122         std::string         entry_point         = code.second.kernel_strings->entry_point;
123         std::string         options             = code.second.kernel_strings->options;
124         bool                batch_compilation   = code.second.kernel_strings->batch_compilation;
125         bool                dump_custom_program = code.second.dump_custom_program;
126         bool                one_time_kernel     = code.second.one_time_kernel;
127
128         batch_compilation &= does_options_support_batch_compilation(options);
129
130         if (batch_compilation)
131         {
132             options = reorder_options(options);
133         }
134
135         std::string key = options;
136
137         if (batch_compilation == false)
138         {
139             key += " __PROGRAM__" + std::to_string(scode.size());
140         }
141
142         if (dump_custom_program)
143         {
144             key += " __DUMP_CUSTOM_PROGRAM__"; // Adding label to key so it would be separated from other programs
145         }
146
147
148         if (one_time_kernel)
149         {
150             key += " __ONE_TIME__";
151         }
152
153         auto& current_bucket = scode[key];
154         current_bucket.dump_custom_program = dump_custom_program;
155         current_bucket.one_time = one_time_kernel;
156
157         if (current_bucket.source.empty())
158         {
159             current_bucket.options = options;
160         }
161
162         if ((current_bucket.kernels_counter % MAX_KERNELS_PER_PROGRAM) == 0)
163         {
164             current_bucket.source.push_back({});
165         }
166
167         current_bucket.entry_point_to_id[entry_point] = code.second.id;
168
169         source_code new_source_code = org_source_code;
170
171         if (batch_compilation)
172         {
173             new_source_code.push_back(get_undef_jit(org_source_code));
174         }
175
176         for (auto& s : new_source_code)
177         {
178             current_bucket.source.back().push_back(std::move(s));
179         }
180
181         current_bucket.kernels_counter++;
182     }
183
184     return std::move(scode);
185 }
186
187 kernels_cache::kernels_cache(gpu_toolkit& context): _context(context) {}
188
189 kernels_cache::kernel_id kernels_cache::set_kernel_source(const std::shared_ptr<kernel_selector::kernel_string>& kernel_string, bool dump_custom_program, bool one_time_kernel)
190 {
191     kernels_cache::kernel_id id;
192     
193     // same kernel_string == same kernel
194     const auto key = kernel_string.get()->get_hash();
195
196     std::lock_guard<std::mutex> lock(_mutex);
197
198     const auto it = _kernels_code.find(key);
199
200     if (it == _kernels_code.end())
201     {
202         // we need unique id in order to avoid conflict across topologies.
203         const auto kernel_num = _kernels.size() + _kernels_code.size(); 
204         id = kernel_string->entry_point + "_" + std::to_string(kernel_num);
205         _kernels_code[key] = { kernel_string, id, dump_custom_program, one_time_kernel };
206     }
207     else
208     {
209         id = it->second.id;
210     }
211
212     assert(_kernels.find(id) == _kernels.end());
213     _pending_compilation = true;
214     return id;
215 }
216
217 kernels_cache::kernels_map kernels_cache::build_program(const program_code& program_source) const
218 {
219     static uint32_t current_file_index = 0;
220
221     bool dump_sources = !_context.get_configuration().ocl_sources_dumps_dir.empty() || program_source.dump_custom_program;
222
223     std::string dump_file_name = "";
224     if (dump_sources)
225     {
226         dump_file_name = _context.get_configuration().ocl_sources_dumps_dir;
227         if (!dump_file_name.empty() && dump_file_name.back() != '/')
228             dump_file_name += '/';
229
230         dump_file_name += "clDNN_program_" + std::to_string(current_file_index++) + "_part_";
231     }
232
233     try
234     {
235         kernels_map kmap;
236         std::string err_log; //accumulated build log from all program's parts (only contains messages from parts which failed to compile)
237
238         uint32_t part_idx = 0;
239         for (const auto& sources : program_source.source)
240         {
241             auto current_dump_file_name = dump_file_name + std::to_string(part_idx++) + ".cl";
242             std::ofstream dump_file;
243
244             if (dump_sources)
245             {
246                 dump_file.open(current_dump_file_name);
247
248                 if (dump_file.good())
249                 {
250                     for (auto& s : sources)
251                         dump_file << s;
252                 }
253             }
254
255             try
256             {
257                 cl::Program program(_context.context(), sources);
258                 program.build({ _context.device() }, program_source.options.c_str());
259                 ///Store kernels for serialization process.
260                 _context.store_binaries(program.getInfo<CL_PROGRAM_BINARIES>());
261
262                 if (dump_sources && dump_file.good())
263                 {
264                     dump_file << "\n/* Build Log:\n";
265                     for (auto& p : program.getBuildInfo<CL_PROGRAM_BUILD_LOG>())
266                         dump_file << p.second << "\n";
267
268                     dump_file << "*/\n";
269                 }
270
271                 cl::vector<cl::Kernel> kernels;
272                 program.createKernels(&kernels);
273
274                 for (auto& k : kernels)
275                 {
276                     auto kernel_name = k.getInfo<CL_KERNEL_FUNCTION_NAME>();
277                     kmap.emplace(kernel_name, k);
278                 }
279             }
280             catch (const cl::BuildError& err)
281             {
282                 if (dump_sources && dump_file.good())
283                     dump_file << "\n/* Build Log:\n";
284
285                 for (auto& p : err.getBuildLog())
286                 {
287                     if (dump_sources && dump_file.good())
288                         dump_file << p.second << "\n";
289                 
290                     err_log += p.second + '\n';
291                 }
292
293                 if (dump_sources && dump_file.good())
294                     dump_file << "*/\n";
295             }
296             
297         }
298
299         if (!err_log.empty())
300             throw std::runtime_error("Program build failed:\n" + std::move(err_log));
301
302         return kmap;
303     }
304     catch (const cl::Error& err)
305     {
306         throw ocl_error(err);
307     }
308 }
309
310 kernels_cache::kernel_type kernels_cache::get_kernel(kernel_id id, bool one_time_kernel) 
311 {
312     build_all();
313     if (one_time_kernel)
314     {
315         return _one_time_kernels.at(id);
316     }
317     else
318     {
319         return _kernels.at(id);
320     }
321 }
322
323 void kernels_cache::build_all()
324 {
325     if (!_pending_compilation)
326         return;
327
328     std::lock_guard<std::mutex> lock(_mutex);
329
330     auto sorted_program_code = get_program_source(_kernels_code);
331
332     _one_time_kernels.clear();
333     for (auto& program : sorted_program_code)
334     {
335         auto kernels = build_program(program.second);
336
337         for (auto& k : kernels)
338         {
339             const auto& entry_point = k.first;
340             const auto& k_id = program.second.entry_point_to_id[entry_point];
341             if (program.second.one_time)
342             {
343                 _one_time_kernels[k_id] = k.second;
344             }
345             else
346             {
347                 _kernels[k_id] = k.second;
348             }
349         }
350     }
351
352     _kernels_code.clear();
353     _pending_compilation = false;
354 }
355
356 }}
357