*/
DECLARE_CONFIG_KEY(ENFORCE_BF16);
+/**
+* @brief This key defines the directory which will be used to store any data cached by plugins.
+*
+* This key supports unicode symbols in path
+* The underlying cache structure is not defined and might differ between OpenVINO releases
+* Cached data might be platform/device specific and might be invalid after OpenVINO version change
+* If this key is not specified or value is empty string, then caching is disabled.
+* The key might enable caching for all plugin or some specific ones, e.g.:
+* ie.SetConfig({{CONFIG_KEY(CACHE_DIR), "cache/"}}) - enables cache for all plugins that might want to use it
+* ie.SetConfig({{CONFIG_KEY(CACHE_DIR), "cache/"}}, {"GPU"}) - enables cache only for GPU plugin
+*/
+DECLARE_CONFIG_KEY(CACHE_DIR);
+
} // namespace PluginConfigParams
} // namespace InferenceEngine
#include "cldnn_config.h"
#include "cpp_interfaces/exception2status.hpp"
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
+#include "ie_api.h"
+#include "file_utils.h"
#ifdef _WIN32
# include <direct.h>
+#ifdef ENABLE_UNICODE_PATH_SUPPORT
+# define mkdir(dir, mode) _wmkdir(dir)
+#else
# define mkdir(dir, mode) _mkdir(dir)
-#endif
+#endif // ENABLE_UNICODE_PATH_SUPPORT
+#endif // _WIN32
using namespace InferenceEngine;
namespace CLDNNPlugin {
+static void createDirectory(std::string _path) {
+#if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
+ std::wstring widepath = FileUtils::multiByteCharToWString(_path.c_str());
+ const wchar_t* path = widepath.c_str();
+#else
+ const char* path = _path.c_str();
+#endif
+
+ auto err = mkdir(path, 0755);
+ if (err != 0 && errno != EEXIST) {
+ THROW_IE_EXCEPTION << "Couldn't create directory! (err=" << err << "; errno=" << errno << ")";
+ }
+}
+
void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap) {
for (auto& kvp : configMap) {
std::string key = kvp.first;
} else if (key.compare(CLDNNConfigParams::KEY_CLDNN_GRAPH_DUMPS_DIR) == 0) {
if (!val.empty()) {
graph_dumps_dir = val;
- if (mkdir(graph_dumps_dir.c_str(), 0755) != 0) {
- THROW_IE_EXCEPTION << "Couldn't create clDNN graph dump directory!";
- }
+ createDirectory(graph_dumps_dir);
+ }
+ } else if (key.compare(PluginConfigParams::KEY_CACHE_DIR) == 0) {
+ if (!val.empty()) {
+ kernels_cache_dir = val;
+ createDirectory(kernels_cache_dir);
}
} else if (key.compare(CLDNNConfigParams::KEY_CLDNN_SOURCES_DUMPS_DIR) == 0) {
if (!val.empty()) {
sources_dumps_dir = val;
- if (mkdir(sources_dumps_dir.c_str(), 0755) != 0) {
- THROW_IE_EXCEPTION << "Couldn't create clDNN source dump directory!";
- }
+ createDirectory(sources_dumps_dir);
}
} else if (key.compare(PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS) == 0) {
if (val.compare(PluginConfigParams::YES) == 0) {
key_config_map[CLDNNConfigParams::KEY_CLDNN_GRAPH_DUMPS_DIR] = graph_dumps_dir;
key_config_map[CLDNNConfigParams::KEY_CLDNN_SOURCES_DUMPS_DIR] = sources_dumps_dir;
+ key_config_map[PluginConfigParams::KEY_CACHE_DIR] = kernels_cache_dir;
key_config_map[PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS] = std::to_string(throughput_streams);
key_config_map[PluginConfigParams::KEY_DEVICE_ID] = device_id;
tuningConfig(),
graph_dumps_dir(""),
sources_dumps_dir(""),
- device_id("") {
+ device_id(""),
+ kernels_cache_dir("") {
adjustKeyMapValues();
}
std::string graph_dumps_dir;
std::string sources_dumps_dir;
std::string device_id;
+ std::string kernels_cache_dir;
std::map<std::string, std::string> key_config_map;
};
context_config.sources_dumps_dir == current_config.sources_dumps_dir &&
context_config.tuningConfig.mode == current_config.tuningConfig.mode &&
context_config.tuningConfig.cache_file_path == current_config.tuningConfig.cache_file_path &&
+ context_config.kernels_cache_dir == current_config.kernels_cache_dir &&
context_config.device_id == current_config.device_id;
};
m_config.queuePriority,
m_config.queueThrottle,
m_config.memory_pool_on,
- m_config.throughput_streams));
+ m_config.throughput_streams,
+ m_config.kernels_cache_dir));
}
ParamMap CLDNNExecutionContextImpl::getParams() const {
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "common_test_utils/test_common.hpp"
+#include "common_test_utils/file_utils.hpp"
+#include "common_test_utils/unicode_utils.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "ngraph_functions/subgraph_builders.hpp"
+#include <ie_core.hpp>
+#include <ie_plugin_config.hpp>
+
+class CompiledKernelsCacheTest : public CommonTestUtils::TestsCommon {
+protected:
+ std::string test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
+ std::shared_ptr<ngraph::Function> function;
+ std::string cache_path;
+
+ void SetUp() override {
+ function = ngraph::builder::subgraph::makeConvPoolRelu();
+ cache_path = test_name + "_cache";
+ }
+};
+
+TEST_F(CompiledKernelsCacheTest, CanCreateCacheDirAndDumpBinaries) {
+ std::shared_ptr<InferenceEngine::Core> ie = PluginCache::get().ie();
+ // Create CNNNetwork from ngraph::Function
+ InferenceEngine::CNNNetwork cnnNet(function);
+ std::map<std::string, std::string> config = {{ CONFIG_KEY(CACHE_DIR), cache_path }};
+ try {
+ // Load CNNNetwork to target plugins
+ auto execNet = ie->LoadNetwork(cnnNet, "GPU", config);
+
+ // Check that directory with cached kernels exists after loading network
+ ASSERT_TRUE(CommonTestUtils::directoryExists(cache_path)) << "Directory with cached kernels doesn't exist";
+ // Check that folder contains cache files and remove them
+ ASSERT_GT(CommonTestUtils::removeFilesWithExt(cache_path, "cl_cache"), 0);
+ // Remove directory and check that it doesn't exist anymore
+ ASSERT_EQ(CommonTestUtils::removeDir(cache_path), 0);
+ ASSERT_FALSE(CommonTestUtils::directoryExists(cache_path));
+ } catch (std::exception& ex) {
+ // Cleanup in case of any exception
+ if (CommonTestUtils::directoryExists(cache_path)) {
+ ASSERT_GE(CommonTestUtils::removeFilesWithExt(cache_path, "cl_cache"), 0);
+ ASSERT_EQ(CommonTestUtils::removeDir(cache_path), 0);
+ }
+ FAIL() << ex.what() << std::endl;
+ }
+}
+
+#ifdef ENABLE_UNICODE_PATH_SUPPORT
+
+TEST_F(CompiledKernelsCacheTest, CanCreateCacheDirAndDumpBinariesUnicodePath) {
+ std::shared_ptr<InferenceEngine::Core> ie = PluginCache::get().ie();
+ // Create CNNNetwork from ngraph::Function
+ InferenceEngine::CNNNetwork cnnNet(function);
+ for (std::size_t testIndex = 0; testIndex < CommonTestUtils::test_unicode_postfix_vector.size(); testIndex++) {
+ std::wstring postfix = L"_" + CommonTestUtils::test_unicode_postfix_vector[testIndex];
+ std::wstring cache_path_w = CommonTestUtils::addUnicodePostfixToPath(cache_path, postfix);
+
+ try {
+ auto cache_path_mb = FileUtils::wStringtoMBCSstringChar(cache_path_w);
+ std::map<std::string, std::string> config = {{ CONFIG_KEY(CACHE_DIR), cache_path_mb }};
+ // Load CNNNetwork to target plugins
+ auto execNet = ie->LoadNetwork(cnnNet, "GPU", config);
+
+ // Check that directory with cached kernels exists after loading network
+ ASSERT_TRUE(CommonTestUtils::directoryExists(cache_path_w)) << "Directory with cached kernels doesn't exist";
+ // Check that folder contains cache files and remove them
+ ASSERT_GT(CommonTestUtils::removeFilesWithExt(cache_path_w, L"cl_cache"), 0);
+ // Remove directory and check that it doesn't exist anymore
+ ASSERT_EQ(CommonTestUtils::removeDir(cache_path_w), 0);
+ ASSERT_FALSE(CommonTestUtils::directoryExists(cache_path_w));
+ } catch (std::exception& ex) {
+ // Cleanup in case of any exception
+ if (CommonTestUtils::directoryExists(cache_path_w)) {
+ ASSERT_GE(CommonTestUtils::removeFilesWithExt(cache_path_w, L"cl_cache"), 0);
+ ASSERT_EQ(CommonTestUtils::removeDir(cache_path_w), 0);
+ }
+ FAIL() << ex.what() << std::endl;
+ }
+ }
+}
+
+#endif // ENABLE_UNICODE_PATH_SUPPORT
Params params[] = {
std::tuple<Device, Config>{ CommonTestUtils::DEVICE_GPU, { { CONFIG_KEY(PERF_COUNT), CONFIG_VALUE(YES) }}},
std::tuple<Device, Config>{ CommonTestUtils::DEVICE_GPU, { { CONFIG_KEY(PERF_COUNT), CONFIG_VALUE(NO) }}},
+ std::tuple<Device, Config>{ CommonTestUtils::DEVICE_GPU, { { CONFIG_KEY(CACHE_DIR), "cache" }}},
};
} // namespace
#include <fstream>
#include <string>
#include <vector>
+#include <sys/stat.h>
#include "test_constants.hpp"
+#include "w_dirent.h"
+#include "common_utils.hpp"
+
+#ifdef _WIN32
+#include <direct.h>
+#define rmdir(dir) _rmdir(dir)
+#else // _WIN32
+#include <unistd.h>
+#endif // _WIN32
namespace CommonTestUtils {
std::remove(binFileName.c_str());
}
}
+
+// Removes all files with extension=ext from the given directory
+// Return value:
+// < 0 - error
+// >= 0 - count of removed files
+inline int removeFilesWithExt(std::string path, std::string ext) {
+ struct dirent *ent;
+ DIR *dir = opendir(path.c_str());
+ int ret = 0;
+ if (dir != nullptr) {
+ while ((ent = readdir(dir)) != NULL) {
+ auto file = makePath(path, std::string(ent->d_name));
+ struct stat stat_path;
+ stat(file.c_str(), &stat_path);
+ if (!S_ISDIR(stat_path.st_mode) && endsWith(file, "." + ext)) {
+ auto err = std::remove(file.c_str());
+ if (err != 0) {
+ closedir(dir);
+ return err;
+ }
+ ret++;
+ }
+ }
+ closedir(dir);
+ }
+
+ return ret;
+}
+
+inline int removeDir(const std::string &path) {
+ return rmdir(path.c_str());
+}
+
+inline bool directoryExists(const std::string &path) {
+ struct stat sb;
+
+ if (stat(path.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
+ return true;
+ }
+
+ return false;
+}
+
} // namespace CommonTestUtils
#include <algorithm>
#include <file_utils.h>
+#include "common_utils.hpp"
+#include "w_dirent.h"
#ifdef ENABLE_UNICODE_PATH_SUPPORT
namespace CommonTestUtils {
}
}
+inline bool endsWith(const std::wstring& source, const std::wstring& expectedSuffix) {
+ return expectedSuffix.size() <= source.size() && source.compare(source.size() - expectedSuffix.size(), expectedSuffix.size(), expectedSuffix) == 0;
+}
+
+// Removes all files with extension=ext from the given directory
+// Return value:
+// < 0 - error
+// >= 0 - count of removed files
+inline int removeFilesWithExt(std::wstring path, std::wstring ext) {
+ int ret = 0;
+#ifdef _WIN32
+ struct _wdirent *ent;
+ _WDIR *dir = _wopendir(path.c_str());
+ if (dir != nullptr) {
+ while ((ent = _wreaddir(dir)) != NULL) {
+ auto file = ::FileUtils::makePath(path, std::wstring(ent->wd_name));
+ struct _stat64i32 stat_path;
+ _wstat(file.c_str(), &stat_path);
+ if (!S_ISDIR(stat_path.st_mode) && endsWith(file, L"." + ext)) {
+ auto err = _wremove(file.c_str());
+ if (err != 0) {
+ _wclosedir(dir);
+ return err;
+ }
+ ret++;
+ }
+ }
+ _wclosedir(dir);
+ }
+#else
+ struct dirent *ent;
+ auto path_mb = FileUtils::wStringtoMBCSstringChar(path);
+ auto ext_mb = FileUtils::wStringtoMBCSstringChar(ext);
+ DIR *dir = opendir(path_mb.c_str());
+ if (dir != nullptr) {
+ while ((ent = readdir(dir)) != NULL) {
+ std::string file = ::FileUtils::makePath(path_mb, std::string(ent->d_name));
+ struct stat stat_path;
+ stat(file.c_str(), &stat_path);
+ if (!S_ISDIR(stat_path.st_mode) && ::CommonTestUtils::endsWith(file, "." + ext_mb)) {
+ auto err = std::remove(file.c_str());
+ if (err != 0) {
+ closedir(dir);
+ return err;
+ }
+ ret++;
+ }
+ }
+ closedir(dir);
+ }
+#endif
+ return ret;
+}
+
+static int removeDir(std::wstring path) {
+ int result = 0;
+ if (!path.empty()) {
+#ifdef _WIN32
+ result = _wrmdir(path.c_str());
+#else
+ result = rmdir(FileUtils::wStringtoMBCSstringChar(path).c_str());
+#endif
+ }
+ return result;
+}
+
+inline bool directoryExists(const std::wstring &path) {
+#ifdef _WIN32
+ struct _stat64i32 sb;
+ if (_wstat(path.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
+ return true;
+ }
+#else
+ struct stat sb;
+ if (stat(FileUtils::wStringtoMBCSstringChar(path).c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
+ return true;
+ }
+#endif
+
+ return false;
+}
+
static const std::vector<std::wstring> test_unicode_postfix_vector = {
L"unicode_Яㅎあ",
L"ひらがな日本語",
};
} // namespace CommonTestUtils
-#endif // ENABLE_UNICODE_PATH_SUPPORT
\ No newline at end of file
+#endif // ENABLE_UNICODE_PATH_SUPPORT
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#if defined(_WIN32)
+
+#ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN_UNDEF
+#endif
+
+#ifndef NOMINMAX
+# define NOMINMAX
+# define NOMINMAX_UNDEF
+#endif
+
+#if defined(_M_IX86) && !defined(_X86_) && !defined(_AMD64_)
+# define _X86_
+#endif
+
+#if defined(_M_X64) && !defined(_X86_) && !defined(_AMD64_)
+# define _AMD64_
+#endif
+
+#if defined(_M_ARM) && !defined(_ARM_) && !defined(_ARM64_)
+# define _ARM_
+#endif
+
+#if defined(_M_ARM64) && !defined(_ARM_) && !defined(_ARM64_)
+# define _ARM64_
+#endif
+
+#include <string>
+#include <windef.h>
+#include <fileapi.h>
+#include <Winbase.h>
+#include <sys/stat.h>
+
+// Copied from linux libc sys/stat.h:
+#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
+
+struct dirent {
+ char *d_name;
+
+ explicit dirent(const wchar_t *wsFilePath) {
+ size_t i;
+ auto slen = wcslen(wsFilePath);
+ d_name = static_cast<char *>(malloc(slen + 1));
+ wcstombs_s(&i, d_name, slen + 1, wsFilePath, slen);
+ }
+ ~dirent() {
+ free(d_name);
+ }
+};
+
+class DIR {
+ WIN32_FIND_DATAA FindFileData;
+ HANDLE hFind;
+ dirent *next;
+
+ static inline bool endsWith(const std::string &src, const char *with) {
+ int wl = static_cast<int>(strlen(with));
+ int so = static_cast<int>(src.length()) - wl;
+ if (so < 0) return false;
+ return 0 == strncmp(with, &src[so], wl);
+ }
+
+public:
+ DIR(const DIR &other) = delete;
+ DIR(DIR &&other) = delete;
+ DIR& operator=(const DIR &other) = delete;
+ DIR& operator=(DIR &&other) = delete;
+
+ explicit DIR(const char *dirPath) : next(nullptr) {
+ std::string ws = dirPath;
+ if (endsWith(ws, "\\"))
+ ws += "*";
+ else
+ ws += "\\*";
+ hFind = FindFirstFileA(ws.c_str(), &FindFileData);
+ FindFileData.dwReserved0 = hFind != INVALID_HANDLE_VALUE;
+ }
+
+ ~DIR() {
+ if (!next) delete next;
+ next = nullptr;
+ FindClose(hFind);
+ }
+
+ bool isValid() const {
+ return (hFind != INVALID_HANDLE_VALUE && FindFileData.dwReserved0);
+ }
+
+ dirent* nextEnt() {
+ if (next != nullptr) delete next;
+ next = nullptr;
+
+ if (!FindFileData.dwReserved0) return nullptr;
+
+ wchar_t wbuf[4096];
+
+ size_t outSize;
+ mbstowcs_s(&outSize, wbuf, 4094, FindFileData.cFileName, 4094);
+ next = new dirent(wbuf);
+ FindFileData.dwReserved0 = FindNextFileA(hFind, &FindFileData);
+ return next;
+ }
+};
+
+struct _wdirent {
+ wchar_t *wd_name;
+
+ explicit _wdirent(const wchar_t *wsFilePath) {
+ auto slen = wcslen(wsFilePath);
+ wd_name = static_cast<wchar_t *>(malloc(sizeof(wchar_t) * (slen + 1)));
+ wcscpy_s(wd_name, slen + 1, wsFilePath);
+ }
+ ~_wdirent() {
+ free(wd_name);
+ }
+};
+
+class _WDIR {
+ WIN32_FIND_DATAW FindFileData;
+ HANDLE hFind;
+ _wdirent *next;
+
+ static inline bool endsWith(const std::wstring &src, const wchar_t *with) {
+ int wl = static_cast<int>(wcslen(with));
+ int so = static_cast<int>(src.length()) - wl;
+ if (so < 0) return false;
+ return 0 == wcsncmp(with, &src[so], wl);
+ }
+
+public:
+ _WDIR(const _WDIR &other) = delete;
+ _WDIR(_WDIR &&other) = delete;
+ _WDIR& operator=(const _WDIR &other) = delete;
+ _WDIR& operator=(_WDIR &&other) = delete;
+
+ explicit _WDIR(const wchar_t *dirPath) : next(nullptr) {
+ std::wstring ws = dirPath;
+ if (endsWith(ws, L"\\"))
+ ws += L"*";
+ else
+ ws += L"\\*";
+ hFind = FindFirstFileW(ws.c_str(), &FindFileData);
+ FindFileData.dwReserved0 = hFind != INVALID_HANDLE_VALUE;
+ }
+
+ ~_WDIR() {
+ if (!next) delete next;
+ next = nullptr;
+ FindClose(hFind);
+ }
+
+ bool isValid() const {
+ return (hFind != INVALID_HANDLE_VALUE && FindFileData.dwReserved0);
+ }
+
+ _wdirent* nextEnt() {
+ if (next != nullptr) delete next;
+ next = nullptr;
+
+ if (!FindFileData.dwReserved0) return nullptr;
+
+ std::wstring buf(FindFileData.cFileName);
+ next = new _wdirent(buf.c_str());
+ FindFileData.dwReserved0 = FindNextFileW(hFind, &FindFileData);
+ return next;
+ }
+};
+
+
+static DIR* opendir(const char *dirPath) {
+ auto dp = new DIR(dirPath);
+ if (!dp->isValid()) {
+ delete dp;
+ return nullptr;
+ }
+ return dp;
+}
+
+static _WDIR* _wopendir(const wchar_t *dirPath) {
+ auto dp = new _WDIR(dirPath);
+ if (!dp->isValid()) {
+ delete dp;
+ return nullptr;
+ }
+ return dp;
+}
+
+static struct dirent* readdir(DIR *dp) {
+ return dp->nextEnt();
+}
+
+static struct _wdirent* _wreaddir(_WDIR *dp) {
+ return dp->nextEnt();
+}
+
+static void closedir(DIR *dp) {
+ delete dp;
+}
+
+static void _wclosedir(_WDIR *dp) {
+ delete dp;
+}
+
+#ifdef WIN32_LEAN_AND_MEAN_UNDEF
+# undef WIN32_LEAN_AND_MEAN
+# undef WIN32_LEAN_AND_MEAN_UNDEF
+#endif
+
+#ifdef NOMINMAX_UNDEF
+# undef NOMINMAX_UNDEF
+# undef NOMINMAX
+#endif
+
+#else
+
+#include <sys/types.h>
+#include <dirent.h>
+
+#endif
/*
-// Copyright (c) 2016-2019 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
const throttle_mode_types throttle_mode; ///< Throttle mode (support of throttle hints in command queue). If cl_khr_throttle_hints extension
///< is not supported by current OpenCL implementation, the value must be set to cldnn_throttle_disabled.
- bool enable_memory_pool; ///< Enables memory usage optimization. memory objects will be reused when possible
- ///< (switched off for older drivers then NEO).
- uint16_t n_streams; ///< Number of queues executed in parallel
- const std::string tuning_cache_path; ///< Path to tuning kernel cache
+ bool enable_memory_pool; ///< Enables memory usage optimization. memory objects will be reused when possible
+ ///< (switched off for older drivers then NEO).
+ uint16_t n_streams; ///< Number of queues executed in parallel
+ const std::string kernels_cache_path; ///< Path to compiled kernels cache
+ const std::string tuning_cache_path; ///< Path to tuning kernel cache
/// @brief Constructs engine configuration with specified options.
/// @param profiling Enable per-primitive profiling.
throttle_mode_types throttle_mode = throttle_mode_types::disabled,
bool memory_pool = true,
uint16_t n_streams = 1,
+ const std::string& kernels_cache_path = "",
const std::string& tuning_cache_path = "cache.json")
: enable_profiling(profiling)
, meaningful_kernels_names(decorate_kernel_names)
, throttle_mode(throttle_mode)
, enable_memory_pool(memory_pool)
, n_streams(n_streams)
+ , kernels_cache_path(kernels_cache_path)
, tuning_cache_path(tuning_cache_path) {
if (n_streams == 0) {
throw std::invalid_argument("Invalid streams count set in engine config");
/// @brief Specifies a directory to which stages of network compilation should be dumped. (default: empty, i.e. no dumping)
graph_dumps_dir,
+ /// @brief Specifies a directory to which compiled kernels should be cached or can be loaded from. (default: empty, i.e. no caching)
+ kernels_cache_dir,
/// @brief Name for serialization process
serialize_network,
load_program,
/// @brief Specifies a directory to which stages of network compilation should be dumped (default: empty, i.e. no dumping)
static std::shared_ptr<const build_option> graph_dumps_dir(const std::string& dir_path);
+ /// @brief Specifies a directory to which compiled kernels should be cached or can be loaded from. (default: empty, i.e. no caching)
+ static std::shared_ptr<const build_option> kernels_cache_dir(const std::string& dir_path);
+
/// @brief Specifies a name for serialization process.
static std::shared_ptr<const build_option> serialize_network(const std::string& network_name);
/// @brief Specifies a name of load_program process.
build_option_directory& operator=(const build_option_directory& other) = delete;
};
+/// @brief @ref build_option specialization for selecting a directory.
+template <build_option_type OptType>
+struct build_option_kernels_cache_dir : build_option {
+ const std::string directory_path;
+
+ explicit build_option_kernels_cache_dir(const std::string& dir_path) : directory_path(dir_path) {}
+
+private:
+ /// @brief Returns build_option_type::kernels_cache_dir.
+ build_option_type get_type() const override { return build_option_type::kernels_cache_dir; }
+
+ build_option_kernels_cache_dir(const build_option_kernels_cache_dir& other) = delete;
+ build_option_kernels_cache_dir& operator=(const build_option_kernels_cache_dir& other) = delete;
+};
+
/// @brief @ref build_option specialization for serialization process.
template <build_option_type OptType>
struct build_option_serialization : build_option {
static std::shared_ptr<const build_option> make_default() { return build_option::graph_dumps_dir({}); }
};
template <>
+struct build_option_traits<build_option_type::kernels_cache_dir> {
+ typedef build_option_directory<build_option_type::kernels_cache_dir> object_type;
+ static std::shared_ptr<const build_option> make_default() { return build_option::kernels_cache_dir({}); }
+};
+template <>
struct build_option_traits<build_option_type::serialize_network> {
typedef build_option_serialization<build_option_type::serialize_network> object_type;
static std::shared_ptr<const build_option> make_default() { return build_option::serialize_network({}); }
inline std::shared_ptr<const build_option> build_option::graph_dumps_dir(const std::string& dir_path) {
return std::make_shared<build_option_directory<build_option_type::graph_dumps_dir>>(dir_path);
}
+
+inline std::shared_ptr<const build_option> build_option::kernels_cache_dir(const std::string& dir_path) {
+ return std::make_shared<build_option_directory<build_option_type::kernels_cache_dir>>(dir_path);
+}
inline std::shared_ptr<const build_option> build_option::serialize_network(const std::string& name) {
return std::make_shared<build_option_serialization<build_option_type::serialize_network>>(name);
}
result.priority_mode = conf.priority_mode;
result.throttle_mode = conf.throttle_mode;
result.queues_num = conf.n_streams;
+ result.kernels_cache_path = conf.kernels_cache_path;
result.tuning_cache_path = conf.tuning_cache_path;
return result;
}
priority_mode(priority_mode_types::disabled),
throttle_mode(throttle_mode_types::disabled),
queues_num(0),
- tuning_cache_path("cache.json") {}
+ tuning_cache_path("cache.json"),
+ kernels_cache_path("") {}
} // namespace gpu
} // namespace cldnn
throttle_mode_types throttle_mode;
uint16_t queues_num;
std::string tuning_cache_path;
+ std::string kernels_cache_path;
};
} // namespace gpu
} // namespace cldnn
/*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
#include "kernel_selector_helper.h"
-#define MAX_KERNELS_PER_PROGRAM 10
-
-namespace cldnn {
-namespace gpu {
+#ifndef ENABLE_UNICODE_PATH_SUPPORT
+# ifdef _WIN32
+# if defined __INTEL_COMPILER || defined _MSC_VER
+# define ENABLE_UNICODE_PATH_SUPPORT
+# endif
+# elif defined(__GNUC__) && (__GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ > 2)) || defined(__clang__)
+# define ENABLE_UNICODE_PATH_SUPPORT
+# endif
+#endif
+
+#ifndef _WIN32
+#ifdef ENABLE_UNICODE_PATH_SUPPORT
+#include <locale>
+#include <codecvt>
+#endif
+#else
+#include <Windows.h>
+#endif
namespace {
-std::string get_undef_jit(kernels_cache::source_code org_source_code) {
+
+std::mutex cacheAccessMutex;
+
+#ifdef ENABLE_UNICODE_PATH_SUPPORT
+std::wstring multiByteCharToWString(const char* str) {
+#ifdef _WIN32
+ int strSize = static_cast<int>(std::strlen(str));
+ int size_needed = MultiByteToWideChar(CP_UTF8, 0, str, strSize, NULL, 0);
+ std::wstring wstrTo(size_needed, 0);
+ MultiByteToWideChar(CP_UTF8, 0, str, strSize, &wstrTo[0], size_needed);
+ return wstrTo;
+#else
+ std::wstring_convert<std::codecvt_utf8<wchar_t>> wstring_encoder;
+ std::wstring result = wstring_encoder.from_bytes(str);
+ return result;
+#endif // _WIN32
+}
+#endif // ENABLE_UNICODE_PATH_SUPPORT
+
+static std::vector<unsigned char> loadBinaryFromFile(std::string path) {
+ std::lock_guard<std::mutex> lock(cacheAccessMutex);
+
+#if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
+ std::wstring widefilename = multiByteCharToWString(path.c_str());
+ const wchar_t* filename = widefilename.c_str();
+ FILE *fp = _wfopen(filename, L"rb");
+#else
+ const char* filename = path.c_str();
+ FILE *fp = fopen(filename, "rb");
+#endif
+
+ if (fp) {
+ fseek(fp, 0, SEEK_END);
+ size_t nsize = (size_t)ftell(fp);
+
+ fseek(fp, 0, SEEK_SET);
+
+ std::vector<unsigned char> ret(nsize);
+
+ auto res = fread(ret.data(), sizeof(unsigned char), nsize, fp);
+ (void)res;
+ fclose(fp);
+ return ret;
+ }
+
+ return {};
+}
+
+static void saveBinaryToFile(std::string path, const std::vector<unsigned char> buffer) {
+ std::lock_guard<std::mutex> lock(cacheAccessMutex);
+#if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
+ std::wstring widefilename = multiByteCharToWString(path.c_str());
+ const wchar_t* filename = widefilename.c_str();
+#else
+ const char* filename = path.c_str();
+#endif
+ std::ofstream out_file(filename, std::ios::out | std::ios::binary);
+ if (out_file.is_open()) {
+ out_file.write((char*)&buffer[0], buffer.size());
+ }
+}
+
+std::string get_undef_jit(cldnn::gpu::kernels_cache::source_code org_source_code) {
const std::string white_space_with_new_lines = " \t\r\n";
const std::string white_space = " \t";
inline bool does_options_support_batch_compilation(const std::string& options) {
return options.find("-D") == std::string::npos && options.find("-I") == std::string::npos;
}
+
} // namespace
+namespace cldnn {
+namespace gpu {
+
+std::string kernels_cache::get_cache_path() const {
+ auto path = _context.get_configuration().kernels_cache_path;
+ if (path.empty()) {
+ return {};
+ }
+
+ if (path.back() != '/' && path.back() != '\\') {
+ path += "/";
+ }
+ return path;
+}
+
+bool kernels_cache::is_cache_enabled() const {
+ return !_context.get_configuration().kernels_cache_path.empty();
+}
+
+size_t kernels_cache::get_max_kernels_per_batch() const {
+ return 10;
+}
+
kernels_cache::sorted_code kernels_cache::get_program_source(const kernels_code& kernels_source_code) const {
sorted_code scode;
for (const auto& code : kernels_source_code) {
- const source_code org_source_code = {code.kernel_strings->jit, code.kernel_strings->str};
+ std::string full_code = code.kernel_strings->jit + code.kernel_strings->str;
+ full_code += get_undef_jit({full_code});
+ const source_code org_source_code = { full_code };
std::string entry_point = code.kernel_strings->entry_point;
std::string options = code.kernel_strings->options;
bool batch_compilation = code.kernel_strings->batch_compilation;
current_bucket.options = options;
}
- if ((current_bucket.kernels_counter % MAX_KERNELS_PER_PROGRAM) == 0) {
+ // Create new kernels bucket when the limit is reached
+ if ((current_bucket.kernels_counter % get_max_kernels_per_batch()) == 0) {
current_bucket.source.push_back({});
}
current_bucket.entry_point_to_id[entry_point] = code.id;
+ assert(org_source_code.size() == 1);
- source_code new_source_code = org_source_code;
+ current_bucket.source.back().push_back(std::move(org_source_code.front()));
- if (batch_compilation) {
- new_source_code.push_back(get_undef_jit(org_source_code));
- }
+ current_bucket.kernels_counter++;
+ }
- for (auto& s : new_source_code) {
- current_bucket.source.back().push_back(std::move(s));
+ // Compute hash value for each bucket
+ // Hash calculation might require additional optimizations, but currently execution time of this part is much smaller than loading
+ // of the precompiled binaries or get_undef_jit calls
+ // Hash is computed for string that contains compilation options + driver version +
+ // full source code (jit + template + undef sections) of all kernels in the bucket
+ for (auto& c : scode) {
+ program_code& code = c.second;
+ auto options = c.first;
+ for (size_t i = 0; i < code.source.size(); i++) {
+ std::string full_code = options + " " + _context.get_device_info().driver_version;
+ for (auto& ss : code.source[i])
+ full_code += ss;
+ code.hash_values.push_back(std::hash<std::string>()(full_code));
}
-
- current_bucket.kernels_counter++;
}
return scode;
return id;
}
+static std::vector<unsigned char> getProgramBinaries(cl::Program program) {
+ // Get the size of the program binary in bytes.
+ std::vector<size_t> binary_sizes = program.getInfo<CL_PROGRAM_BINARY_SIZES>();
+
+ if (binary_sizes.size() != 1)
+ throw std::runtime_error("Invalid binaries count");
+
+ size_t binary_size = binary_sizes.front();
+ // Binary is not available for the device.
+ if (binary_size == 0)
+ throw std::runtime_error("Binary is not avaliable after program build");
+
+ // Get program binary.
+ return program.getInfo<CL_PROGRAM_BINARIES>().front();
+}
+
kernels_cache::kernels_map kernels_cache::build_program(const program_code& program_source) const {
static uint32_t current_file_index = 0;
- bool dump_sources =
- !_context.get_configuration().ocl_sources_dumps_dir.empty() || program_source.dump_custom_program;
+ bool dump_sources = !_context.get_configuration().ocl_sources_dumps_dir.empty() || program_source.dump_custom_program;
std::string dump_file_name = "";
if (dump_sources) {
// failed to compile)
uint32_t part_idx = 0;
- for (const auto& sources : program_source.source) {
+ for (size_t i = 0; i < program_source.source.size(); i++) {
+ auto sources_bucket_to_compile = program_source.source[i];
+ const auto& hash_value = program_source.hash_values[i];
+ std::string cached_bin_name = get_cache_path() + std::to_string(hash_value) + ".cl_cache";
+ cl::Program::Binaries precompiled_kernels = {};
+ if (is_cache_enabled()) {
+ // Try to load file with name ${hash_value}.cl_cache which contains precompiled kernels for current bucket
+ // If read is successful, then remove kernels from compilation bucket
+ auto bin = loadBinaryFromFile(cached_bin_name);
+ if (!bin.empty()) {
+ precompiled_kernels.push_back(bin);
+ }
+ }
auto current_dump_file_name = dump_file_name + std::to_string(part_idx++) + ".cl";
std::ofstream dump_file;
dump_file.open(current_dump_file_name);
if (dump_file.good()) {
- for (auto& s : sources) dump_file << s;
+ for (auto& s : sources_bucket_to_compile)
+ dump_file << s;
}
}
try {
- cl::Program program(_context.context(), sources);
- program.build({_context.device()}, program_source.options.c_str());
+ cl::vector<cl::Kernel> kernels;
+ // Run compilation
+ if (precompiled_kernels.empty()) {
+ cl::Program program(_context.context(), sources_bucket_to_compile);
+ program.build({_context.device()}, program_source.options.c_str());
- if (dump_sources && dump_file.good()) {
- dump_file << "\n/* Build Log:\n";
- for (auto& p : program.getBuildInfo<CL_PROGRAM_BUILD_LOG>()) dump_file << p.second << "\n";
+ if (dump_sources && dump_file.good()) {
+ dump_file << "\n/* Build Log:\n";
+ for (auto& p : program.getBuildInfo<CL_PROGRAM_BUILD_LOG>())
+ dump_file << p.second << "\n";
- dump_file << "*/\n";
- }
+ dump_file << "*/\n";
+ }
- cl::vector<cl::Kernel> kernels;
- program.createKernels(&kernels);
+ program.createKernels(&kernels);
+ if (is_cache_enabled()) {
+ // If kernels caching is enabled, then we save compiled bucket to binary file with name ${code_hash_value}.cl_cache
+ // Note: Bin file contains full bucket, not separate kernels, so kernels reuse across different models is quite limited
+ // Bucket size can be changed in get_max_kernels_per_batch() method, but forcing it to 1 will lead to much longer
+ // compile time.
+ saveBinaryToFile(cached_bin_name, getProgramBinaries(program));
+ }
+ } else {
+ cl::Program program(_context.context(), {_context.device()}, precompiled_kernels);
+ program.build({_context.device()}, program_source.options.c_str());
+ program.createKernels(&kernels);
+ }
for (auto& k : kernels) {
auto kernel_name = k.getInfo<CL_KERNEL_FUNCTION_NAME>();
/*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
struct program_code {
std::vector<source_code> source;
+ std::vector<size_t> hash_values;
uint32_t kernels_counter = 0;
std::string options;
bool dump_custom_program = false;
sorted_code get_program_source(const kernels_code& kernels_source_code) const;
kernels_map build_program(const program_code& pcode) const;
+ std::string get_cache_path() const;
+ bool is_cache_enabled() const;
+ size_t get_max_kernels_per_batch() const;
public:
explicit kernels_cache(gpu_toolkit& context, uint32_t prog_id);
kernel_id set_kernel_source(const std::shared_ptr<kernel_selector::kernel_string>& kernel_string,