2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18 #include "kernels_cache.h"
19 #include "ocl_toolkit.h"
26 #include "kernel_selector_helper.h"
28 #define MAX_KERNELS_PER_PROGRAM 10
30 namespace cldnn { namespace gpu {
33 std::string get_undef_jit(kernels_cache::source_code org_source_code)
35 const std::string white_space_with_new_lines = " \t\r\n";
36 const std::string white_space = " \t";
38 size_t current_pos = 0;
40 const std::string define = "define";
42 std::set<std::string> to_undef;
43 for (const auto& source : org_source_code)
47 size_t index_to_hash = source.find_first_not_of(white_space_with_new_lines, current_pos);
48 if (index_to_hash != std::string::npos &&
49 source[index_to_hash] == '#')
51 size_t index_define = source.find_first_not_of(white_space, index_to_hash + 1);
53 if (index_define != std::string::npos &&
54 !source.compare(index_define, define.size(), define))
56 size_t index_to_name = source.find_first_not_of(white_space, index_define + define.size());
57 if (index_to_name != std::string::npos)
59 size_t index_to_end_name = source.find_first_of(white_space_with_new_lines + "(", index_to_name);
60 if (index_to_end_name == std::string::npos)
62 index_to_end_name = source.size();
64 std::string name = source.substr(index_to_name, index_to_end_name - index_to_name);
65 to_undef.insert(name);
70 current_pos = source.find_first_of('\n', current_pos + 1);
71 } while (current_pos != std::string::npos);
75 for (const auto& name : to_undef)
77 undefs += "#ifdef " + name + "\n";
78 undefs += "#undef " + name + "\n";
82 return std::move(undefs);
85 std::string reorder_options(const std::string& org_options)
87 std::stringstream ss(org_options);
88 std::set<std::string> sorted_options;
94 sorted_options.insert(word);
99 for (const auto& o : sorted_options)
107 inline bool does_options_support_batch_compilation(const std::string& options)
110 options.find("-D") == std::string::npos &&
111 options.find("-I") == std::string::npos;
115 kernels_cache::sorted_code kernels_cache::get_program_source(const kernels_code& kernels_source_code) const
119 for (const auto& code : kernels_source_code)
121 const source_code org_source_code = { code.second.kernel_strings->jit, code.second.kernel_strings->str };
122 std::string entry_point = code.second.kernel_strings->entry_point;
123 std::string options = code.second.kernel_strings->options;
124 bool batch_compilation = code.second.kernel_strings->batch_compilation;
125 bool dump_custom_program = code.second.dump_custom_program;
126 bool one_time_kernel = code.second.one_time_kernel;
128 batch_compilation &= does_options_support_batch_compilation(options);
130 if (batch_compilation)
132 options = reorder_options(options);
135 std::string key = options;
137 if (batch_compilation == false)
139 key += " __PROGRAM__" + std::to_string(scode.size());
142 if (dump_custom_program)
144 key += " __DUMP_CUSTOM_PROGRAM__"; // Adding label to key so it would be separated from other programs
150 key += " __ONE_TIME__";
153 auto& current_bucket = scode[key];
154 current_bucket.dump_custom_program = dump_custom_program;
155 current_bucket.one_time = one_time_kernel;
157 if (current_bucket.source.empty())
159 current_bucket.options = options;
162 if ((current_bucket.kernels_counter % MAX_KERNELS_PER_PROGRAM) == 0)
164 current_bucket.source.push_back({});
167 current_bucket.entry_point_to_id[entry_point] = code.second.id;
169 source_code new_source_code = org_source_code;
171 if (batch_compilation)
173 new_source_code.push_back(get_undef_jit(org_source_code));
176 for (auto& s : new_source_code)
178 current_bucket.source.back().push_back(std::move(s));
181 current_bucket.kernels_counter++;
184 return std::move(scode);
187 kernels_cache::kernels_cache(gpu_toolkit& context): _context(context) {}
189 kernels_cache::kernel_id kernels_cache::set_kernel_source(const std::shared_ptr<kernel_selector::kernel_string>& kernel_string, bool dump_custom_program, bool one_time_kernel)
191 kernels_cache::kernel_id id;
193 // same kernel_string == same kernel
194 const auto key = kernel_string.get()->get_hash();
196 std::lock_guard<std::mutex> lock(_mutex);
198 const auto it = _kernels_code.find(key);
200 if (it == _kernels_code.end())
202 // we need unique id in order to avoid conflict across topologies.
203 const auto kernel_num = _kernels.size() + _kernels_code.size();
204 id = kernel_string->entry_point + "_" + std::to_string(kernel_num);
205 _kernels_code[key] = { kernel_string, id, dump_custom_program, one_time_kernel };
212 assert(_kernels.find(id) == _kernels.end());
213 _pending_compilation = true;
217 kernels_cache::kernels_map kernels_cache::build_program(const program_code& program_source) const
219 static uint32_t current_file_index = 0;
221 bool dump_sources = !_context.get_configuration().ocl_sources_dumps_dir.empty() || program_source.dump_custom_program;
223 std::string dump_file_name = "";
226 dump_file_name = _context.get_configuration().ocl_sources_dumps_dir;
227 if (!dump_file_name.empty() && dump_file_name.back() != '/')
228 dump_file_name += '/';
230 dump_file_name += "clDNN_program_" + std::to_string(current_file_index++) + "_part_";
236 std::string err_log; //accumulated build log from all program's parts (only contains messages from parts which failed to compile)
238 uint32_t part_idx = 0;
239 for (const auto& sources : program_source.source)
241 auto current_dump_file_name = dump_file_name + std::to_string(part_idx++) + ".cl";
242 std::ofstream dump_file;
246 dump_file.open(current_dump_file_name);
248 if (dump_file.good())
250 for (auto& s : sources)
257 cl::Program program(_context.context(), sources);
258 program.build({ _context.device() }, program_source.options.c_str());
259 ///Store kernels for serialization process.
260 _context.store_binaries(program.getInfo<CL_PROGRAM_BINARIES>());
262 if (dump_sources && dump_file.good())
264 dump_file << "\n/* Build Log:\n";
265 for (auto& p : program.getBuildInfo<CL_PROGRAM_BUILD_LOG>())
266 dump_file << p.second << "\n";
271 cl::vector<cl::Kernel> kernels;
272 program.createKernels(&kernels);
274 for (auto& k : kernels)
276 auto kernel_name = k.getInfo<CL_KERNEL_FUNCTION_NAME>();
277 kmap.emplace(kernel_name, k);
280 catch (const cl::BuildError& err)
282 if (dump_sources && dump_file.good())
283 dump_file << "\n/* Build Log:\n";
285 for (auto& p : err.getBuildLog())
287 if (dump_sources && dump_file.good())
288 dump_file << p.second << "\n";
290 err_log += p.second + '\n';
293 if (dump_sources && dump_file.good())
299 if (!err_log.empty())
300 throw std::runtime_error("Program build failed:\n" + std::move(err_log));
304 catch (const cl::Error& err)
306 throw ocl_error(err);
310 kernels_cache::kernel_type kernels_cache::get_kernel(kernel_id id, bool one_time_kernel)
315 return _one_time_kernels.at(id);
319 return _kernels.at(id);
323 void kernels_cache::build_all()
325 if (!_pending_compilation)
328 std::lock_guard<std::mutex> lock(_mutex);
330 auto sorted_program_code = get_program_source(_kernels_code);
332 _one_time_kernels.clear();
333 for (auto& program : sorted_program_code)
335 auto kernels = build_program(program.second);
337 for (auto& k : kernels)
339 const auto& entry_point = k.first;
340 const auto& k_id = program.second.entry_point_to_id[entry_point];
341 if (program.second.one_time)
343 _one_time_kernels[k_id] = k.second;
347 _kernels[k_id] = k.second;
352 _kernels_code.clear();
353 _pending_compilation = false;