[IE CLDNN] Plugin-side kernels caching (#2871)
authorVladimir Paramuzov <vladimir.paramuzov@intel.com>
Thu, 5 Nov 2020 20:44:45 +0000 (23:44 +0300)
committerGitHub <noreply@github.com>
Thu, 5 Nov 2020 20:44:45 +0000 (23:44 +0300)
17 files changed:
inference-engine/include/ie_plugin_config.hpp
inference-engine/src/cldnn_engine/cldnn_config.cpp
inference-engine/src/cldnn_engine/cldnn_config.h
inference-engine/src/cldnn_engine/cldnn_engine.cpp
inference-engine/src/cldnn_engine/cldnn_remote_context.cpp
inference-engine/tests/functional/plugin/gpu/behavior/cache.cpp [new file with mode: 0644]
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_threading_tests.cpp
inference-engine/tests/ie_test_utils/common_test_utils/file_utils.hpp
inference-engine/tests/ie_test_utils/common_test_utils/unicode_utils.hpp
inference-engine/tests/ie_test_utils/common_test_utils/w_dirent.h [new file with mode: 0644]
inference-engine/thirdparty/clDNN/api/engine.hpp
inference-engine/thirdparty/clDNN/api/program.hpp
inference-engine/thirdparty/clDNN/src/engine.cpp
inference-engine/thirdparty/clDNN/src/gpu/configuration.cpp
inference-engine/thirdparty/clDNN/src/gpu/configuration.h
inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.cpp
inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.h

index a593204..e6175eb 100644 (file)
@@ -362,5 +362,18 @@ DECLARE_CONFIG_KEY(DUMP_EXEC_GRAPH_AS_DOT);
  */
 DECLARE_CONFIG_KEY(ENFORCE_BF16);
 
+/**
+* @brief This key defines the directory which will be used to store any data cached by plugins.
+*
+* This key supports unicode symbols in path
+* The underlying cache structure is not defined and might differ between OpenVINO releases
+* Cached data might be platform/device specific and might be invalid after OpenVINO version change
+* If this key is not specified or value is empty string, then caching is disabled.
+* The key might enable caching for all plugin or some specific ones, e.g.:
+* ie.SetConfig({{CONFIG_KEY(CACHE_DIR), "cache/"}}) - enables cache for all plugins that might want to use it
+* ie.SetConfig({{CONFIG_KEY(CACHE_DIR), "cache/"}}, {"GPU"}) - enables cache only for GPU plugin
+*/
+DECLARE_CONFIG_KEY(CACHE_DIR);
+
 }  // namespace PluginConfigParams
 }  // namespace InferenceEngine
index cd685ed..2309399 100644 (file)
@@ -8,16 +8,36 @@
 #include "cldnn_config.h"
 #include "cpp_interfaces/exception2status.hpp"
 #include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
+#include "ie_api.h"
+#include "file_utils.h"
 
 #ifdef _WIN32
 # include <direct.h>
+#ifdef ENABLE_UNICODE_PATH_SUPPORT
+# define mkdir(dir, mode) _wmkdir(dir)
+#else
 # define mkdir(dir, mode) _mkdir(dir)
-#endif
+#endif  // ENABLE_UNICODE_PATH_SUPPORT
+#endif  // _WIN32
 
 using namespace InferenceEngine;
 
 namespace CLDNNPlugin {
 
+static void createDirectory(std::string _path) {
+#if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
+    std::wstring widepath = FileUtils::multiByteCharToWString(_path.c_str());
+    const wchar_t* path = widepath.c_str();
+#else
+    const char* path = _path.c_str();
+#endif
+
+    auto err = mkdir(path, 0755);
+    if (err != 0 && errno != EEXIST) {
+        THROW_IE_EXCEPTION << "Couldn't create directory! (err=" << err << "; errno=" << errno << ")";
+    }
+}
+
 void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap) {
     for (auto& kvp : configMap) {
         std::string key = kvp.first;
@@ -129,16 +149,17 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
         } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_GRAPH_DUMPS_DIR) == 0) {
             if (!val.empty()) {
                 graph_dumps_dir = val;
-                if (mkdir(graph_dumps_dir.c_str(), 0755) != 0) {
-                    THROW_IE_EXCEPTION << "Couldn't create clDNN graph dump directory!";
-                }
+                createDirectory(graph_dumps_dir);
+            }
+        } else if (key.compare(PluginConfigParams::KEY_CACHE_DIR) == 0) {
+            if (!val.empty()) {
+                kernels_cache_dir = val;
+                createDirectory(kernels_cache_dir);
             }
         } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_SOURCES_DUMPS_DIR) == 0) {
             if (!val.empty()) {
                 sources_dumps_dir = val;
-                if (mkdir(sources_dumps_dir.c_str(), 0755) != 0) {
-                    THROW_IE_EXCEPTION << "Couldn't create clDNN source dump directory!";
-                }
+                createDirectory(sources_dumps_dir);
             }
         } else if (key.compare(PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS) == 0) {
             if (val.compare(PluginConfigParams::YES) == 0) {
@@ -276,6 +297,7 @@ void Config::adjustKeyMapValues() {
 
     key_config_map[CLDNNConfigParams::KEY_CLDNN_GRAPH_DUMPS_DIR] = graph_dumps_dir;
     key_config_map[CLDNNConfigParams::KEY_CLDNN_SOURCES_DUMPS_DIR] = sources_dumps_dir;
+    key_config_map[PluginConfigParams::KEY_CACHE_DIR] = kernels_cache_dir;
 
     key_config_map[PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS] = std::to_string(throughput_streams);
     key_config_map[PluginConfigParams::KEY_DEVICE_ID] = device_id;
index 8bc782e..9abf639 100644 (file)
@@ -35,7 +35,8 @@ struct Config {
                tuningConfig(),
                graph_dumps_dir(""),
                sources_dumps_dir(""),
-               device_id("") {
+               device_id(""),
+               kernels_cache_dir("") {
         adjustKeyMapValues();
     }
 
@@ -59,6 +60,7 @@ struct Config {
     std::string graph_dumps_dir;
     std::string sources_dumps_dir;
     std::string device_id;
+    std::string kernels_cache_dir;
 
     std::map<std::string, std::string> key_config_map;
 };
index 222f78b..c1c5215 100644 (file)
@@ -312,6 +312,7 @@ ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEn
                context_config.sources_dumps_dir == current_config.sources_dumps_dir &&
                context_config.tuningConfig.mode == current_config.tuningConfig.mode &&
                context_config.tuningConfig.cache_file_path == current_config.tuningConfig.cache_file_path &&
+               context_config.kernels_cache_dir == current_config.kernels_cache_dir &&
                context_config.device_id == current_config.device_id;
     };
 
index 3ff5289..c8d4ceb 100644 (file)
@@ -262,7 +262,8 @@ CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptr<IInfe
             m_config.queuePriority,
             m_config.queueThrottle,
             m_config.memory_pool_on,
-            m_config.throughput_streams));
+            m_config.throughput_streams,
+            m_config.kernels_cache_dir));
 }
 
 ParamMap CLDNNExecutionContextImpl::getParams() const {
diff --git a/inference-engine/tests/functional/plugin/gpu/behavior/cache.cpp b/inference-engine/tests/functional/plugin/gpu/behavior/cache.cpp
new file mode 100644 (file)
index 0000000..7140bd9
--- /dev/null
@@ -0,0 +1,86 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "common_test_utils/test_common.hpp"
+#include "common_test_utils/file_utils.hpp"
+#include "common_test_utils/unicode_utils.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "ngraph_functions/subgraph_builders.hpp"
+#include <ie_core.hpp>
+#include <ie_plugin_config.hpp>
+
+class CompiledKernelsCacheTest : public CommonTestUtils::TestsCommon {
+protected:
+    std::string test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
+    std::shared_ptr<ngraph::Function> function;
+    std::string cache_path;
+
+    void SetUp() override {
+        function = ngraph::builder::subgraph::makeConvPoolRelu();
+        cache_path = test_name + "_cache";
+    }
+};
+
+TEST_F(CompiledKernelsCacheTest, CanCreateCacheDirAndDumpBinaries) {
+    std::shared_ptr<InferenceEngine::Core> ie = PluginCache::get().ie();
+    // Create CNNNetwork from ngraph::Function
+    InferenceEngine::CNNNetwork cnnNet(function);
+    std::map<std::string, std::string> config = {{ CONFIG_KEY(CACHE_DIR), cache_path }};
+    try {
+        // Load CNNNetwork to target plugins
+        auto execNet = ie->LoadNetwork(cnnNet, "GPU", config);
+
+        // Check that directory with cached kernels exists after loading network
+        ASSERT_TRUE(CommonTestUtils::directoryExists(cache_path)) << "Directory with cached kernels doesn't exist";
+        // Check that folder contains cache files and remove them
+        ASSERT_GT(CommonTestUtils::removeFilesWithExt(cache_path, "cl_cache"), 0);
+        // Remove directory and check that it doesn't exist anymore
+        ASSERT_EQ(CommonTestUtils::removeDir(cache_path), 0);
+        ASSERT_FALSE(CommonTestUtils::directoryExists(cache_path));
+    } catch (std::exception& ex) {
+        // Cleanup in case of any exception
+        if (CommonTestUtils::directoryExists(cache_path)) {
+            ASSERT_GE(CommonTestUtils::removeFilesWithExt(cache_path, "cl_cache"), 0);
+            ASSERT_EQ(CommonTestUtils::removeDir(cache_path), 0);
+        }
+        FAIL() << ex.what() << std::endl;
+    }
+}
+
+#ifdef ENABLE_UNICODE_PATH_SUPPORT
+
+TEST_F(CompiledKernelsCacheTest, CanCreateCacheDirAndDumpBinariesUnicodePath) {
+    std::shared_ptr<InferenceEngine::Core> ie = PluginCache::get().ie();
+    // Create CNNNetwork from ngraph::Function
+    InferenceEngine::CNNNetwork cnnNet(function);
+    for (std::size_t testIndex = 0; testIndex < CommonTestUtils::test_unicode_postfix_vector.size(); testIndex++) {
+        std::wstring postfix  = L"_" + CommonTestUtils::test_unicode_postfix_vector[testIndex];
+        std::wstring cache_path_w = CommonTestUtils::addUnicodePostfixToPath(cache_path, postfix);
+
+        try {
+            auto cache_path_mb = FileUtils::wStringtoMBCSstringChar(cache_path_w);
+            std::map<std::string, std::string> config = {{ CONFIG_KEY(CACHE_DIR), cache_path_mb }};
+            // Load CNNNetwork to target plugins
+            auto execNet = ie->LoadNetwork(cnnNet, "GPU", config);
+
+            // Check that directory with cached kernels exists after loading network
+            ASSERT_TRUE(CommonTestUtils::directoryExists(cache_path_w)) << "Directory with cached kernels doesn't exist";
+            // Check that folder contains cache files and remove them
+            ASSERT_GT(CommonTestUtils::removeFilesWithExt(cache_path_w, L"cl_cache"), 0);
+            // Remove directory and check that it doesn't exist anymore
+            ASSERT_EQ(CommonTestUtils::removeDir(cache_path_w), 0);
+            ASSERT_FALSE(CommonTestUtils::directoryExists(cache_path_w));
+        } catch (std::exception& ex) {
+            // Cleanup in case of any exception
+            if (CommonTestUtils::directoryExists(cache_path_w)) {
+                ASSERT_GE(CommonTestUtils::removeFilesWithExt(cache_path_w, L"cl_cache"), 0);
+                ASSERT_EQ(CommonTestUtils::removeDir(cache_path_w), 0);
+            }
+            FAIL() << ex.what() << std::endl;
+        }
+    }
+}
+
+#endif  // ENABLE_UNICODE_PATH_SUPPORT
index 8e6a2dc..5886a29 100644 (file)
@@ -13,6 +13,7 @@ namespace {
 Params params[] = {
     std::tuple<Device, Config>{ CommonTestUtils::DEVICE_GPU, { { CONFIG_KEY(PERF_COUNT), CONFIG_VALUE(YES) }}},
     std::tuple<Device, Config>{ CommonTestUtils::DEVICE_GPU, { { CONFIG_KEY(PERF_COUNT), CONFIG_VALUE(NO) }}},
+    std::tuple<Device, Config>{ CommonTestUtils::DEVICE_GPU, { { CONFIG_KEY(CACHE_DIR), "cache" }}},
 };
 
 }  // namespace
index c6e8e6a..0452c24 100644 (file)
@@ -6,8 +6,18 @@
 #include <fstream>
 #include <string>
 #include <vector>
+#include <sys/stat.h>
 
 #include "test_constants.hpp"
+#include "w_dirent.h"
+#include "common_utils.hpp"
+
+#ifdef _WIN32
+#include <direct.h>
+#define rmdir(dir) _rmdir(dir)
+#else  // _WIN32
+#include <unistd.h>
+#endif  // _WIN32
 
 namespace CommonTestUtils {
 
@@ -62,4 +72,47 @@ inline void removeIRFiles(const std::string &xmlFilePath, const std::string &bin
         std::remove(binFileName.c_str());
     }
 }
+
+// Removes all files with extension=ext from the given directory
+// Return value:
+// < 0 - error
+// >= 0 - count of removed files
+inline int removeFilesWithExt(std::string path, std::string ext) {
+    struct dirent *ent;
+    DIR *dir = opendir(path.c_str());
+    int ret = 0;
+    if (dir != nullptr) {
+        while ((ent = readdir(dir)) != NULL) {
+            auto file = makePath(path, std::string(ent->d_name));
+            struct stat stat_path;
+            stat(file.c_str(), &stat_path);
+            if (!S_ISDIR(stat_path.st_mode) && endsWith(file, "." + ext)) {
+                auto err = std::remove(file.c_str());
+                if (err != 0) {
+                    closedir(dir);
+                    return err;
+                }
+                ret++;
+            }
+        }
+        closedir(dir);
+    }
+
+    return ret;
+}
+
+inline int removeDir(const std::string &path) {
+    return rmdir(path.c_str());
+}
+
+inline bool directoryExists(const std::string &path) {
+    struct stat sb;
+
+    if (stat(path.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
+        return true;
+    }
+
+    return false;
+}
+
 }  // namespace CommonTestUtils
index 3a13d71..44a4523 100644 (file)
@@ -10,6 +10,8 @@
 #include <algorithm>
 
 #include <file_utils.h>
+#include "common_utils.hpp"
+#include "w_dirent.h"
 
 #ifdef ENABLE_UNICODE_PATH_SUPPORT
 namespace CommonTestUtils {
@@ -71,6 +73,88 @@ static void removeFile(std::wstring path) {
     }
 }
 
+inline bool endsWith(const std::wstring& source, const std::wstring& expectedSuffix) {
+    return expectedSuffix.size() <= source.size() && source.compare(source.size() - expectedSuffix.size(), expectedSuffix.size(), expectedSuffix) == 0;
+}
+
+// Removes all files with extension=ext from the given directory
+// Return value:
+// < 0 - error
+// >= 0 - count of removed files
+inline int removeFilesWithExt(std::wstring path, std::wstring ext) {
+    int ret = 0;
+#ifdef _WIN32
+    struct _wdirent *ent;
+    _WDIR *dir = _wopendir(path.c_str());
+    if (dir != nullptr) {
+        while ((ent = _wreaddir(dir)) != NULL) {
+            auto file = ::FileUtils::makePath(path, std::wstring(ent->wd_name));
+            struct _stat64i32 stat_path;
+            _wstat(file.c_str(), &stat_path);
+            if (!S_ISDIR(stat_path.st_mode) && endsWith(file, L"." + ext)) {
+                auto err = _wremove(file.c_str());
+                if (err != 0) {
+                    _wclosedir(dir);
+                    return err;
+                }
+                ret++;
+            }
+        }
+        _wclosedir(dir);
+    }
+#else
+    struct dirent *ent;
+    auto path_mb = FileUtils::wStringtoMBCSstringChar(path);
+    auto ext_mb = FileUtils::wStringtoMBCSstringChar(ext);
+    DIR *dir = opendir(path_mb.c_str());
+    if (dir != nullptr) {
+        while ((ent = readdir(dir)) != NULL) {
+            std::string file = ::FileUtils::makePath(path_mb, std::string(ent->d_name));
+            struct stat stat_path;
+            stat(file.c_str(), &stat_path);
+            if (!S_ISDIR(stat_path.st_mode) && ::CommonTestUtils::endsWith(file, "." + ext_mb)) {
+                auto err = std::remove(file.c_str());
+                if (err != 0) {
+                    closedir(dir);
+                    return err;
+                }
+                ret++;
+            }
+        }
+        closedir(dir);
+    }
+#endif
+    return ret;
+}
+
+static int removeDir(std::wstring path) {
+    int result = 0;
+    if (!path.empty()) {
+#ifdef _WIN32
+        result = _wrmdir(path.c_str());
+#else
+        result = rmdir(FileUtils::wStringtoMBCSstringChar(path).c_str());
+#endif
+    }
+    return result;
+}
+
+inline bool directoryExists(const std::wstring &path) {
+#ifdef _WIN32
+    struct _stat64i32 sb;
+    if (_wstat(path.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
+        return true;
+    }
+#else
+    struct stat sb;
+    if (stat(FileUtils::wStringtoMBCSstringChar(path).c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
+        return true;
+    }
+#endif
+
+    return false;
+}
+
 static const std::vector<std::wstring> test_unicode_postfix_vector = {
         L"unicode_Яㅎあ",
         L"ひらがな日本語",
@@ -83,4 +167,4 @@ static const std::vector<std::wstring> test_unicode_postfix_vector = {
 };
 
 }  // namespace CommonTestUtils
-#endif  // ENABLE_UNICODE_PATH_SUPPORT
\ No newline at end of file
+#endif  // ENABLE_UNICODE_PATH_SUPPORT
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/w_dirent.h b/inference-engine/tests/ie_test_utils/common_test_utils/w_dirent.h
new file mode 100644 (file)
index 0000000..9338926
--- /dev/null
@@ -0,0 +1,227 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#if defined(_WIN32)
+
+#ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN_UNDEF
+#endif
+
+#ifndef NOMINMAX
+# define NOMINMAX
+# define NOMINMAX_UNDEF
+#endif
+
+#if defined(_M_IX86) && !defined(_X86_) && !defined(_AMD64_)
+# define _X86_
+#endif
+
+#if defined(_M_X64) && !defined(_X86_) && !defined(_AMD64_)
+# define _AMD64_
+#endif
+
+#if defined(_M_ARM) && !defined(_ARM_) && !defined(_ARM64_)
+# define _ARM_
+#endif
+
+#if defined(_M_ARM64) && !defined(_ARM_) && !defined(_ARM64_)
+# define _ARM64_
+#endif
+
+#include <string>
+#include <windef.h>
+#include <fileapi.h>
+#include <Winbase.h>
+#include <sys/stat.h>
+
+// Copied from linux libc sys/stat.h:
+#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
+
+struct dirent {
+    char *d_name;
+
+    explicit dirent(const wchar_t *wsFilePath) {
+        size_t i;
+        auto slen = wcslen(wsFilePath);
+        d_name = static_cast<char *>(malloc(slen + 1));
+        wcstombs_s(&i, d_name, slen + 1, wsFilePath, slen);
+    }
+    ~dirent() {
+        free(d_name);
+    }
+};
+
+class DIR {
+    WIN32_FIND_DATAA FindFileData;
+    HANDLE hFind;
+    dirent *next;
+
+    static inline bool endsWith(const std::string &src, const char *with) {
+        int wl = static_cast<int>(strlen(with));
+        int so = static_cast<int>(src.length()) - wl;
+        if (so < 0) return false;
+        return 0 == strncmp(with, &src[so], wl);
+    }
+
+public:
+    DIR(const DIR &other) = delete;
+    DIR(DIR &&other) = delete;
+    DIR& operator=(const DIR &other) = delete;
+    DIR& operator=(DIR &&other) = delete;
+
+    explicit DIR(const char *dirPath) : next(nullptr) {
+        std::string ws = dirPath;
+        if (endsWith(ws, "\\"))
+            ws += "*";
+        else
+            ws += "\\*";
+        hFind = FindFirstFileA(ws.c_str(), &FindFileData);
+        FindFileData.dwReserved0 = hFind != INVALID_HANDLE_VALUE;
+    }
+
+    ~DIR() {
+        if (!next) delete next;
+        next = nullptr;
+        FindClose(hFind);
+    }
+
+    bool isValid() const {
+        return (hFind != INVALID_HANDLE_VALUE && FindFileData.dwReserved0);
+    }
+
+    dirent* nextEnt() {
+        if (next != nullptr) delete next;
+        next = nullptr;
+
+        if (!FindFileData.dwReserved0) return nullptr;
+
+        wchar_t wbuf[4096];
+
+        size_t outSize;
+        mbstowcs_s(&outSize, wbuf, 4094, FindFileData.cFileName, 4094);
+        next = new dirent(wbuf);
+        FindFileData.dwReserved0 = FindNextFileA(hFind, &FindFileData);
+        return next;
+    }
+};
+
+struct _wdirent {
+    wchar_t *wd_name;
+
+    explicit _wdirent(const wchar_t *wsFilePath) {
+        auto slen = wcslen(wsFilePath);
+        wd_name = static_cast<wchar_t *>(malloc(sizeof(wchar_t) * (slen + 1)));
+        wcscpy_s(wd_name, slen + 1, wsFilePath);
+    }
+    ~_wdirent() {
+        free(wd_name);
+    }
+};
+
+class _WDIR {
+    WIN32_FIND_DATAW FindFileData;
+    HANDLE hFind;
+    _wdirent *next;
+
+    static inline bool endsWith(const std::wstring &src, const wchar_t *with) {
+        int wl = static_cast<int>(wcslen(with));
+        int so = static_cast<int>(src.length()) - wl;
+        if (so < 0) return false;
+        return 0 == wcsncmp(with, &src[so], wl);
+    }
+
+public:
+    _WDIR(const _WDIR &other) = delete;
+    _WDIR(_WDIR &&other) = delete;
+    _WDIR& operator=(const _WDIR &other) = delete;
+    _WDIR& operator=(_WDIR &&other) = delete;
+
+    explicit _WDIR(const wchar_t *dirPath) : next(nullptr) {
+        std::wstring ws = dirPath;
+        if (endsWith(ws, L"\\"))
+            ws += L"*";
+        else
+            ws += L"\\*";
+        hFind = FindFirstFileW(ws.c_str(), &FindFileData);
+        FindFileData.dwReserved0 = hFind != INVALID_HANDLE_VALUE;
+    }
+
+    ~_WDIR() {
+        if (!next) delete next;
+        next = nullptr;
+        FindClose(hFind);
+    }
+
+    bool isValid() const {
+        return (hFind != INVALID_HANDLE_VALUE && FindFileData.dwReserved0);
+    }
+
+    _wdirent* nextEnt() {
+        if (next != nullptr) delete next;
+        next = nullptr;
+
+        if (!FindFileData.dwReserved0) return nullptr;
+
+        std::wstring buf(FindFileData.cFileName);
+        next = new _wdirent(buf.c_str());
+        FindFileData.dwReserved0 = FindNextFileW(hFind, &FindFileData);
+        return next;
+    }
+};
+
+
+static DIR* opendir(const char *dirPath) {
+    auto dp = new DIR(dirPath);
+    if (!dp->isValid()) {
+        delete dp;
+        return nullptr;
+    }
+    return dp;
+}
+
+static _WDIR* _wopendir(const wchar_t *dirPath) {
+    auto dp = new _WDIR(dirPath);
+    if (!dp->isValid()) {
+        delete dp;
+        return nullptr;
+    }
+    return dp;
+}
+
+static struct dirent* readdir(DIR *dp) {
+    return dp->nextEnt();
+}
+
+static struct _wdirent* _wreaddir(_WDIR *dp) {
+    return dp->nextEnt();
+}
+
+static void closedir(DIR *dp) {
+    delete dp;
+}
+
+static void _wclosedir(_WDIR *dp) {
+    delete dp;
+}
+
+#ifdef WIN32_LEAN_AND_MEAN_UNDEF
+# undef WIN32_LEAN_AND_MEAN
+# undef WIN32_LEAN_AND_MEAN_UNDEF
+#endif
+
+#ifdef NOMINMAX_UNDEF
+# undef NOMINMAX_UNDEF
+# undef NOMINMAX
+#endif
+
+#else
+
+#include <sys/types.h>
+#include <dirent.h>
+
+#endif
index 2f16009..9942aa9 100644 (file)
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016-2019 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -69,10 +69,11 @@ struct engine_configuration {
     const throttle_mode_types throttle_mode;  ///< Throttle mode (support of throttle hints in command queue). If cl_khr_throttle_hints extension
                                               ///< is not supported by current OpenCL implementation, the value must be set to cldnn_throttle_disabled.
 
-    bool enable_memory_pool;              ///< Enables memory usage optimization. memory objects will be reused when possible
-                                          ///< (switched off for older drivers then NEO).
-    uint16_t n_streams;                   ///< Number of queues executed in parallel
-    const std::string tuning_cache_path;  ///< Path to tuning kernel cache
+    bool enable_memory_pool;                  ///< Enables memory usage optimization. memory objects will be reused when possible
+                                              ///< (switched off for older drivers then NEO).
+    uint16_t n_streams;                       ///< Number of queues executed in parallel
+    const std::string kernels_cache_path;     ///< Path to compiled kernels cache
+    const std::string tuning_cache_path;      ///< Path to tuning kernel cache
 
     /// @brief Constructs engine configuration with specified options.
     /// @param profiling Enable per-primitive profiling.
@@ -93,6 +94,7 @@ struct engine_configuration {
         throttle_mode_types throttle_mode = throttle_mode_types::disabled,
         bool memory_pool = true,
         uint16_t n_streams = 1,
+        const std::string& kernels_cache_path = "",
         const std::string& tuning_cache_path = "cache.json")
         : enable_profiling(profiling)
         , meaningful_kernels_names(decorate_kernel_names)
@@ -106,6 +108,7 @@ struct engine_configuration {
         , throttle_mode(throttle_mode)
         , enable_memory_pool(memory_pool)
         , n_streams(n_streams)
+        , kernels_cache_path(kernels_cache_path)
         , tuning_cache_path(tuning_cache_path) {
         if (n_streams == 0) {
             throw std::invalid_argument("Invalid streams count set in engine config");
index 087aa47..8252271 100644 (file)
@@ -66,6 +66,8 @@ enum class build_option_type {
 
     /// @brief Specifies a directory to which stages of network compilation should be dumped. (default: empty, i.e. no dumping)
     graph_dumps_dir,
+    /// @brief Specifies a directory to which compiled kernels should be cached or can be loaded from. (default: empty, i.e. no caching)
+    kernels_cache_dir,
     /// @brief Name for serialization process
     serialize_network,
     load_program,
@@ -146,6 +148,9 @@ struct build_option {
     /// @brief Specifies a directory to which stages of network compilation should be dumped (default: empty, i.e. no dumping)
     static std::shared_ptr<const build_option> graph_dumps_dir(const std::string& dir_path);
 
+    /// @brief Specifies a directory to which compiled kernels should be cached or can be loaded from. (default: empty, i.e. no caching)
+    static std::shared_ptr<const build_option> kernels_cache_dir(const std::string& dir_path);
+
     /// @brief Specifies a name for serialization process.
     static std::shared_ptr<const build_option> serialize_network(const std::string& network_name);
     /// @brief Specifies a name of load_program process.
@@ -251,6 +256,21 @@ private:
     build_option_directory& operator=(const build_option_directory& other) = delete;
 };
 
+/// @brief @ref build_option specialization for selecting a directory.
+template <build_option_type OptType>
+struct build_option_kernels_cache_dir : build_option {
+    const std::string directory_path;
+
+    explicit build_option_kernels_cache_dir(const std::string& dir_path) : directory_path(dir_path) {}
+
+private:
+    /// @brief Returns build_option_type::kernels_cache_dir.
+    build_option_type get_type() const override { return build_option_type::kernels_cache_dir; }
+
+    build_option_kernels_cache_dir(const build_option_kernels_cache_dir& other) = delete;
+    build_option_kernels_cache_dir& operator=(const build_option_kernels_cache_dir& other) = delete;
+};
+
 /// @brief @ref build_option specialization for serialization process.
 template <build_option_type OptType>
 struct build_option_serialization : build_option {
@@ -342,6 +362,11 @@ struct build_option_traits<build_option_type::graph_dumps_dir> {
     static std::shared_ptr<const build_option> make_default() { return build_option::graph_dumps_dir({}); }
 };
 template <>
+struct build_option_traits<build_option_type::kernels_cache_dir> {
+    typedef build_option_directory<build_option_type::kernels_cache_dir> object_type;
+    static std::shared_ptr<const build_option> make_default() { return build_option::kernels_cache_dir({}); }
+};
+template <>
 struct build_option_traits<build_option_type::serialize_network> {
     typedef build_option_serialization<build_option_type::serialize_network> object_type;
     static std::shared_ptr<const build_option> make_default() { return build_option::serialize_network({}); }
@@ -392,6 +417,10 @@ inline std::shared_ptr<const build_option> build_option::tuning_config(const tun
 inline std::shared_ptr<const build_option> build_option::graph_dumps_dir(const std::string& dir_path) {
     return std::make_shared<build_option_directory<build_option_type::graph_dumps_dir>>(dir_path);
 }
+
+inline std::shared_ptr<const build_option> build_option::kernels_cache_dir(const std::string& dir_path) {
+    return std::make_shared<build_option_directory<build_option_type::kernels_cache_dir>>(dir_path);
+}
 inline std::shared_ptr<const build_option> build_option::serialize_network(const std::string& name) {
     return std::make_shared<build_option_serialization<build_option_type::serialize_network>>(name);
 }
index ac89a73..379e8db 100644 (file)
@@ -93,6 +93,7 @@ gpu_toolkit_config convert_configuration(const engine_configuration conf) {
     result.priority_mode = conf.priority_mode;
     result.throttle_mode = conf.throttle_mode;
     result.queues_num = conf.n_streams;
+    result.kernels_cache_path = conf.kernels_cache_path;
     result.tuning_cache_path = conf.tuning_cache_path;
     return result;
 }
index d174da0..052a133 100644 (file)
@@ -33,6 +33,7 @@ configuration::configuration()
       priority_mode(priority_mode_types::disabled),
       throttle_mode(throttle_mode_types::disabled),
       queues_num(0),
-      tuning_cache_path("cache.json") {}
+      tuning_cache_path("cache.json"),
+      kernels_cache_path("") {}
 }  // namespace gpu
 }  // namespace cldnn
index ecb0d0e..cf402ba 100644 (file)
@@ -42,6 +42,7 @@ struct configuration {
     throttle_mode_types throttle_mode;
     uint16_t queues_num;
     std::string tuning_cache_path;
+    std::string kernels_cache_path;
 };
 }  // namespace gpu
 }  // namespace cldnn
index c0a5a10..5e210e2 100644 (file)
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 
 #include "kernel_selector_helper.h"
 
-#define MAX_KERNELS_PER_PROGRAM 10
-
-namespace cldnn {
-namespace gpu {
+#ifndef ENABLE_UNICODE_PATH_SUPPORT
+# ifdef _WIN32
+#  if defined __INTEL_COMPILER || defined _MSC_VER
+#   define ENABLE_UNICODE_PATH_SUPPORT
+#  endif
+# elif defined(__GNUC__) && (__GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ > 2)) || defined(__clang__)
+#  define ENABLE_UNICODE_PATH_SUPPORT
+# endif
+#endif
+
+#ifndef _WIN32
+#ifdef ENABLE_UNICODE_PATH_SUPPORT
+#include <locale>
+#include <codecvt>
+#endif
+#else
+#include <Windows.h>
+#endif
 
 namespace {
-std::string get_undef_jit(kernels_cache::source_code org_source_code) {
+
+std::mutex cacheAccessMutex;
+
+#ifdef ENABLE_UNICODE_PATH_SUPPORT
+std::wstring multiByteCharToWString(const char* str) {
+#ifdef _WIN32
+    int strSize = static_cast<int>(std::strlen(str));
+    int size_needed = MultiByteToWideChar(CP_UTF8, 0, str, strSize, NULL, 0);
+    std::wstring wstrTo(size_needed, 0);
+    MultiByteToWideChar(CP_UTF8, 0, str, strSize, &wstrTo[0], size_needed);
+    return wstrTo;
+#else
+    std::wstring_convert<std::codecvt_utf8<wchar_t>> wstring_encoder;
+    std::wstring result = wstring_encoder.from_bytes(str);
+    return result;
+#endif  // _WIN32
+}
+#endif  // ENABLE_UNICODE_PATH_SUPPORT
+
+static std::vector<unsigned char> loadBinaryFromFile(std::string path) {
+    std::lock_guard<std::mutex> lock(cacheAccessMutex);
+
+#if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
+    std::wstring widefilename = multiByteCharToWString(path.c_str());
+    const wchar_t* filename = widefilename.c_str();
+    FILE *fp = _wfopen(filename, L"rb");
+#else
+    const char* filename = path.c_str();
+    FILE *fp = fopen(filename, "rb");
+#endif
+
+    if (fp) {
+        fseek(fp, 0, SEEK_END);
+        size_t nsize = (size_t)ftell(fp);
+
+        fseek(fp, 0, SEEK_SET);
+
+        std::vector<unsigned char> ret(nsize);
+
+        auto res = fread(ret.data(), sizeof(unsigned char), nsize, fp);
+        (void)res;
+        fclose(fp);
+        return ret;
+    }
+
+    return {};
+}
+
+static void saveBinaryToFile(std::string path, const std::vector<unsigned char> buffer) {
+    std::lock_guard<std::mutex> lock(cacheAccessMutex);
+#if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
+    std::wstring widefilename = multiByteCharToWString(path.c_str());
+    const wchar_t* filename = widefilename.c_str();
+#else
+    const char* filename = path.c_str();
+#endif
+    std::ofstream out_file(filename, std::ios::out | std::ios::binary);
+    if (out_file.is_open()) {
+        out_file.write((char*)&buffer[0], buffer.size());
+    }
+}
+
+std::string get_undef_jit(cldnn::gpu::kernels_cache::source_code org_source_code) {
     const std::string white_space_with_new_lines = " \t\r\n";
     const std::string white_space = " \t";
 
@@ -99,13 +175,39 @@ std::string reorder_options(const std::string& org_options) {
 inline bool does_options_support_batch_compilation(const std::string& options) {
     return options.find("-D") == std::string::npos && options.find("-I") == std::string::npos;
 }
+
 }  // namespace
 
+namespace cldnn {
+namespace gpu {
+
+std::string kernels_cache::get_cache_path() const {
+    auto path = _context.get_configuration().kernels_cache_path;
+    if (path.empty()) {
+        return {};
+    }
+
+    if (path.back() != '/' && path.back() != '\\') {
+        path += "/";
+    }
+    return path;
+}
+
+bool kernels_cache::is_cache_enabled() const {
+    return !_context.get_configuration().kernels_cache_path.empty();
+}
+
+size_t kernels_cache::get_max_kernels_per_batch() const {
+    return 10;
+}
+
 kernels_cache::sorted_code kernels_cache::get_program_source(const kernels_code& kernels_source_code) const {
     sorted_code scode;
 
     for (const auto& code : kernels_source_code) {
-        const source_code org_source_code = {code.kernel_strings->jit, code.kernel_strings->str};
+        std::string full_code = code.kernel_strings->jit + code.kernel_strings->str;
+        full_code += get_undef_jit({full_code});
+        const source_code org_source_code = { full_code };
         std::string entry_point = code.kernel_strings->entry_point;
         std::string options = code.kernel_strings->options;
         bool batch_compilation = code.kernel_strings->batch_compilation;
@@ -140,23 +242,33 @@ kernels_cache::sorted_code kernels_cache::get_program_source(const kernels_code&
             current_bucket.options = options;
         }
 
-        if ((current_bucket.kernels_counter % MAX_KERNELS_PER_PROGRAM) == 0) {
+        // Create new kernels bucket when the limit is reached
+        if ((current_bucket.kernels_counter % get_max_kernels_per_batch()) == 0) {
             current_bucket.source.push_back({});
         }
 
         current_bucket.entry_point_to_id[entry_point] = code.id;
+        assert(org_source_code.size() == 1);
 
-        source_code new_source_code = org_source_code;
+        current_bucket.source.back().push_back(std::move(org_source_code.front()));
 
-        if (batch_compilation) {
-            new_source_code.push_back(get_undef_jit(org_source_code));
-        }
+        current_bucket.kernels_counter++;
+    }
 
-        for (auto& s : new_source_code) {
-            current_bucket.source.back().push_back(std::move(s));
+    // Compute hash value for each bucket
+    // Hash calculation might require additional optimizations, but currently execution time of this part is much smaller than loading
+    // of the precompiled binaries or get_undef_jit calls
+    // Hash is computed for string that contains compilation options + driver version +
+    // full source code (jit + template + undef sections) of all kernels in the bucket
+    for (auto& c : scode) {
+        program_code& code = c.second;
+        auto options = c.first;
+        for (size_t i = 0; i < code.source.size(); i++) {
+            std::string full_code = options + " " + _context.get_device_info().driver_version;
+            for (auto& ss : code.source[i])
+                full_code += ss;
+            code.hash_values.push_back(std::hash<std::string>()(full_code));
         }
-
-        current_bucket.kernels_counter++;
     }
 
     return scode;
@@ -183,11 +295,26 @@ kernels_cache::kernel_id kernels_cache::set_kernel_source(
     return id;
 }
 
+static std::vector<unsigned char> getProgramBinaries(cl::Program program) {
+    // Get the size of the program binary in bytes.
+    std::vector<size_t> binary_sizes = program.getInfo<CL_PROGRAM_BINARY_SIZES>();
+
+    if (binary_sizes.size() != 1)
+        throw std::runtime_error("Invalid binaries count");
+
+    size_t binary_size = binary_sizes.front();
+    // Binary is not available for the device.
+    if (binary_size == 0)
+        throw std::runtime_error("Binary is not avaliable after program build");
+
+    // Get program binary.
+    return program.getInfo<CL_PROGRAM_BINARIES>().front();
+}
+
 kernels_cache::kernels_map kernels_cache::build_program(const program_code& program_source) const {
     static uint32_t current_file_index = 0;
 
-    bool dump_sources =
-        !_context.get_configuration().ocl_sources_dumps_dir.empty() || program_source.dump_custom_program;
+    bool dump_sources = !_context.get_configuration().ocl_sources_dumps_dir.empty() || program_source.dump_custom_program;
 
     std::string dump_file_name = "";
     if (dump_sources) {
@@ -204,7 +331,19 @@ kernels_cache::kernels_map kernels_cache::build_program(const program_code& prog
                               // failed to compile)
 
         uint32_t part_idx = 0;
-        for (const auto& sources : program_source.source) {
+        for (size_t i = 0; i < program_source.source.size(); i++) {
+            auto sources_bucket_to_compile = program_source.source[i];
+            const auto& hash_value = program_source.hash_values[i];
+            std::string cached_bin_name = get_cache_path() + std::to_string(hash_value) + ".cl_cache";
+            cl::Program::Binaries precompiled_kernels = {};
+            if (is_cache_enabled()) {
+                // Try to load file with name ${hash_value}.cl_cache which contains precompiled kernels for current bucket
+                // If read is successful, then remove kernels from compilation bucket
+                auto bin = loadBinaryFromFile(cached_bin_name);
+                if (!bin.empty()) {
+                    precompiled_kernels.push_back(bin);
+                }
+            }
             auto current_dump_file_name = dump_file_name + std::to_string(part_idx++) + ".cl";
             std::ofstream dump_file;
 
@@ -212,23 +351,39 @@ kernels_cache::kernels_map kernels_cache::build_program(const program_code& prog
                 dump_file.open(current_dump_file_name);
 
                 if (dump_file.good()) {
-                    for (auto& s : sources) dump_file << s;
+                    for (auto& s : sources_bucket_to_compile)
+                        dump_file << s;
                 }
             }
 
             try {
-                cl::Program program(_context.context(), sources);
-                program.build({_context.device()}, program_source.options.c_str());
+                cl::vector<cl::Kernel> kernels;
+                // Run compilation
+                if (precompiled_kernels.empty()) {
+                    cl::Program program(_context.context(), sources_bucket_to_compile);
+                    program.build({_context.device()}, program_source.options.c_str());
 
-                if (dump_sources && dump_file.good()) {
-                    dump_file << "\n/* Build Log:\n";
-                    for (auto& p : program.getBuildInfo<CL_PROGRAM_BUILD_LOG>()) dump_file << p.second << "\n";
+                    if (dump_sources && dump_file.good()) {
+                        dump_file << "\n/* Build Log:\n";
+                        for (auto& p : program.getBuildInfo<CL_PROGRAM_BUILD_LOG>())
+                            dump_file << p.second << "\n";
 
-                    dump_file << "*/\n";
-                }
+                        dump_file << "*/\n";
+                    }
 
-                cl::vector<cl::Kernel> kernels;
-                program.createKernels(&kernels);
+                    program.createKernels(&kernels);
+                    if (is_cache_enabled()) {
+                        // If kernels caching is enabled, then we save compiled bucket to binary file with name ${code_hash_value}.cl_cache
+                        // Note: Bin file contains full bucket, not separate kernels, so kernels reuse across different models is quite limited
+                        // Bucket size can be changed in get_max_kernels_per_batch() method, but forcing it to 1 will lead to much longer
+                        // compile time.
+                        saveBinaryToFile(cached_bin_name, getProgramBinaries(program));
+                    }
+                } else {
+                    cl::Program program(_context.context(), {_context.device()}, precompiled_kernels);
+                    program.build({_context.device()}, program_source.options.c_str());
+                    program.createKernels(&kernels);
+                }
 
                 for (auto& k : kernels) {
                     auto kernel_name = k.getInfo<CL_KERNEL_FUNCTION_NAME>();
index e9667ac..234efee 100644 (file)
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -45,6 +45,7 @@ public:
 
     struct program_code {
         std::vector<source_code> source;
+        std::vector<size_t> hash_values;
         uint32_t kernels_counter = 0;
         std::string options;
         bool dump_custom_program = false;
@@ -96,6 +97,9 @@ private:
     sorted_code get_program_source(const kernels_code& kernels_source_code) const;
     kernels_map build_program(const program_code& pcode) const;
 
+    std::string get_cache_path() const;
+    bool is_cache_enabled() const;
+    size_t get_max_kernels_per_batch() const;
 public:
     explicit kernels_cache(gpu_toolkit& context, uint32_t prog_id);
     kernel_id set_kernel_source(const std::shared_ptr<kernel_selector::kernel_string>& kernel_string,