inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18 #include "kernels_cache.h"
  19 #include "ocl_toolkit.h"
  20 #include <algorithm>
  21 #include <cassert>
  22 #include <sstream>
  23 #include <fstream>
  24 #include <set>
  25
  26 #include "kernel_selector_helper.h"
  27
  28 #define MAX_KERNELS_PER_PROGRAM 10
  29
  30 namespace cldnn { namespace gpu {
  31
  32 namespace {
  33     std::string get_undef_jit(kernels_cache::source_code org_source_code)
  34     {
  35         const std::string white_space_with_new_lines = " \t\r\n";
  36         const std::string white_space = " \t";
  37
  38         size_t current_pos = 0;
  39
  40         const std::string define = "define";
  41
  42         std::set<std::string> to_undef;
  43         for (const auto& source : org_source_code)
  44         {
  45             do
  46             {
  47                 size_t index_to_hash = source.find_first_not_of(white_space_with_new_lines, current_pos);
  48                 if (index_to_hash != std::string::npos &&
  49                     source[index_to_hash] == '#')
  50                 {
  51                     size_t index_define = source.find_first_not_of(white_space, index_to_hash + 1);
  52
  53                     if (index_define != std::string::npos &&
  54                         !source.compare(index_define, define.size(), define))
  55                     {
  56                         size_t index_to_name = source.find_first_not_of(white_space, index_define + define.size());
  57                         if (index_to_name != std::string::npos)
  58                         {
  59                             size_t index_to_end_name = source.find_first_of(white_space_with_new_lines + "(", index_to_name);
  60                             if (index_to_end_name == std::string::npos)
  61                             {
  62                                 index_to_end_name = source.size();
  63                             }
  64                             std::string name = source.substr(index_to_name, index_to_end_name - index_to_name);
  65                             to_undef.insert(name);
  66                         }
  67                     }
  68                 }
  69
  70                 current_pos = source.find_first_of('\n', current_pos + 1);
  71             } while (current_pos != std::string::npos);
  72         }
  73
  74         std::string undefs;
  75         for (const auto& name : to_undef)
  76         {
  77             undefs += "#ifdef " + name + "\n";
  78             undefs += "#undef " + name + "\n";
  79             undefs += "#endif\n";
  80         }
  81
  82         return std::move(undefs);
  83     }
  84
  85     std::string reorder_options(const std::string& org_options)
  86     {
  87         std::stringstream ss(org_options);
  88         std::set<std::string> sorted_options;
  89
  90         while (ss.good())
  91         {
  92             std::string word;
  93             ss >> word;
  94             sorted_options.insert(word);
  95         }
  96
  97         std::string options;
  98
  99         for (const auto& o : sorted_options)
 100         {
 101             options += o + " ";
 102         }
 103
 104         return options;
 105     }
 106
 107     inline bool does_options_support_batch_compilation(const std::string& options)
 108     {
 109         return
 110             options.find("-D") == std::string::npos &&
 111             options.find("-I") == std::string::npos;
 112     }
 113 }
 114
 115 kernels_cache::sorted_code kernels_cache::get_program_source(const kernels_code& kernels_source_code) const
 116 {
 117     sorted_code scode;
 118
 119     for (const auto& code : kernels_source_code)
 120     {
 121         const source_code   org_source_code     = { code.second.kernel_strings->jit, code.second.kernel_strings->str };
 122         std::string         entry_point         = code.second.kernel_strings->entry_point;
 123         std::string         options             = code.second.kernel_strings->options;
 124         bool                batch_compilation   = code.second.kernel_strings->batch_compilation;
 125         bool                dump_custom_program = code.second.dump_custom_program;
 126         bool                one_time_kernel     = code.second.one_time_kernel;
 127
 128         batch_compilation &= does_options_support_batch_compilation(options);
 129
 130         if (batch_compilation)
 131         {
 132             options = reorder_options(options);
 133         }
 134
 135         std::string key = options;
 136
 137         if (batch_compilation == false)
 138         {
 139             key += " __PROGRAM__" + std::to_string(scode.size());
 140         }
 141
 142         if (dump_custom_program)
 143         {
 144             key += " __DUMP_CUSTOM_PROGRAM__"; // Adding label to key so it would be separated from other programs
 145         }
 146
 147
 148         if (one_time_kernel)
 149         {
 150             key += " __ONE_TIME__";
 151         }
 152
 153         auto& current_bucket = scode[key];
 154         current_bucket.dump_custom_program = dump_custom_program;
 155         current_bucket.one_time = one_time_kernel;
 156
 157         if (current_bucket.source.empty())
 158         {
 159             current_bucket.options = options;
 160         }
 161
 162         if ((current_bucket.kernels_counter % MAX_KERNELS_PER_PROGRAM) == 0)
 163         {
 164             current_bucket.source.push_back({});
 165         }
 166
 167         current_bucket.entry_point_to_id[entry_point] = code.second.id;
 168
 169         source_code new_source_code = org_source_code;
 170
 171         if (batch_compilation)
 172         {
 173             new_source_code.push_back(get_undef_jit(org_source_code));
 174         }
 175
 176         for (auto& s : new_source_code)
 177         {
 178             current_bucket.source.back().push_back(std::move(s));
 179         }
 180
 181         current_bucket.kernels_counter++;
 182     }
 183
 184     return std::move(scode);
 185 }
 186
 187 kernels_cache::kernels_cache(gpu_toolkit& context): _context(context) {}
 188
 189 kernels_cache::kernel_id kernels_cache::set_kernel_source(const std::shared_ptr<kernel_selector::kernel_string>& kernel_string, bool dump_custom_program, bool one_time_kernel)
 190 {
 191     kernels_cache::kernel_id id;
 192
 193     // same kernel_string == same kernel
 194     const auto key = kernel_string.get()->get_hash();
 195
 196     std::lock_guard<std::mutex> lock(_mutex);
 197
 198     const auto it = _kernels_code.find(key);
 199
 200     if (it == _kernels_code.end())
 201     {
 202         // we need unique id in order to avoid conflict across topologies.
 203         const auto kernel_num = _kernels.size() + _kernels_code.size();
 204         id = kernel_string->entry_point + "_" + std::to_string(kernel_num);
 205         _kernels_code[key] = { kernel_string, id, dump_custom_program, one_time_kernel };
 206     }
 207     else
 208     {
 209         id = it->second.id;
 210     }
 211
 212     assert(_kernels.find(id) == _kernels.end());
 213     _pending_compilation = true;
 214     return id;
 215 }
 216
 217 kernels_cache::kernels_map kernels_cache::build_program(const program_code& program_source) const
 218 {
 219     static uint32_t current_file_index = 0;
 220
 221     bool dump_sources = !_context.get_configuration().ocl_sources_dumps_dir.empty() || program_source.dump_custom_program;
 222
 223     std::string dump_file_name = "";
 224     if (dump_sources)
 225     {
 226         dump_file_name = _context.get_configuration().ocl_sources_dumps_dir;
 227         if (!dump_file_name.empty() && dump_file_name.back() != '/')
 228             dump_file_name += '/';
 229
 230         dump_file_name += "clDNN_program_" + std::to_string(current_file_index++) + "_part_";
 231     }
 232
 233     try
 234     {
 235         kernels_map kmap;
 236         std::string err_log; //accumulated build log from all program's parts (only contains messages from parts which failed to compile)
 237
 238         uint32_t part_idx = 0;
 239         for (const auto& sources : program_source.source)
 240         {
 241             auto current_dump_file_name = dump_file_name + std::to_string(part_idx++) + ".cl";
 242             std::ofstream dump_file;
 243
 244             if (dump_sources)
 245             {
 246                 dump_file.open(current_dump_file_name);
 247
 248                 if (dump_file.good())
 249                 {
 250                     for (auto& s : sources)
 251                         dump_file << s;
 252                 }
 253             }
 254
 255             try
 256             {
 257                 cl::Program program(_context.context(), sources);
 258                 program.build({ _context.device() }, program_source.options.c_str());
 259                 ///Store kernels for serialization process.
 260                 _context.store_binaries(program.getInfo<CL_PROGRAM_BINARIES>());
 261
 262                 if (dump_sources && dump_file.good())
 263                 {
 264                     dump_file << "\n/* Build Log:\n";
 265                     for (auto& p : program.getBuildInfo<CL_PROGRAM_BUILD_LOG>())
 266                         dump_file << p.second << "\n";
 267
 268                     dump_file << "*/\n";
 269                 }
 270
 271                 cl::vector<cl::Kernel> kernels;
 272                 program.createKernels(&kernels);
 273
 274                 for (auto& k : kernels)
 275                 {
 276                     auto kernel_name = k.getInfo<CL_KERNEL_FUNCTION_NAME>();
 277                     kmap.emplace(kernel_name, k);
 278                 }
 279             }
 280             catch (const cl::BuildError& err)
 281             {
 282                 if (dump_sources && dump_file.good())
 283                     dump_file << "\n/* Build Log:\n";
 284
 285                 for (auto& p : err.getBuildLog())
 286                 {
 287                     if (dump_sources && dump_file.good())
 288                         dump_file << p.second << "\n";
 289
 290                     err_log += p.second + '\n';
 291                 }
 292
 293                 if (dump_sources && dump_file.good())
 294                     dump_file << "*/\n";
 295             }
 296
 297         }
 298
 299         if (!err_log.empty())
 300             throw std::runtime_error("Program build failed:\n" + std::move(err_log));
 301
 302         return kmap;
 303     }
 304     catch (const cl::Error& err)
 305     {
 306         throw ocl_error(err);
 307     }
 308 }
 309
 310 kernels_cache::kernel_type kernels_cache::get_kernel(kernel_id id, bool one_time_kernel)
 311 {
 312     build_all();
 313     if (one_time_kernel)
 314     {
 315         return _one_time_kernels.at(id);
 316     }
 317     else
 318     {
 319         return _kernels.at(id);
 320     }
 321 }
 322
 323 void kernels_cache::build_all()
 324 {
 325     if (!_pending_compilation)
 326         return;
 327
 328     std::lock_guard<std::mutex> lock(_mutex);
 329
 330     auto sorted_program_code = get_program_source(_kernels_code);
 331
 332     _one_time_kernels.clear();
 333     for (auto& program : sorted_program_code)
 334     {
 335         auto kernels = build_program(program.second);
 336
 337         for (auto& k : kernels)
 338         {
 339             const auto& entry_point = k.first;
 340             const auto& k_id = program.second.entry_point_to_id[entry_point];
 341             if (program.second.one_time)
 342             {
 343                 _one_time_kernels[k_id] = k.second;
 344             }
 345             else
 346             {
 347                 _kernels[k_id] = k.second;
 348             }
 349         }
 350     }
 351
 352     _kernels_code.clear();
 353     _pending_compilation = false;
 354 }
 355
 356 }}
 357