From: Inki Dae Date: Wed, 18 Mar 2020 06:03:38 +0000 (+0900) Subject: Introduce integrated CL kernel cache feature X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=eaedac58a3780aa57f9f890d16b63cefade714fb;p=platform%2Fupstream%2Farmcl.git Introduce integrated CL kernel cache feature This patch introduces integrated CL kernel cache feature for NN runtimes such as ARMNN or other in-house versions. Originally, this feature came from utils/Utils.cpp and utils/Utils.h files but was used just for the testing. Caching CL kernel binaries is madatorily required for real product becasue inference performance at first run is one of very important factors. This patch exposes two API - save_program_cache_to_file and resotre_program_cache_from_file so that NN runtimes can use these API to cache the compiled CL kernel binaries. In addition, it adds a new map object, _new_built_programs_map, to manage only newly compiled CL kernel binaries. With this patch, already compiled kernel binaries will be restored from existing map object, _built_programs_map, and newly compiled kernel binaries will be added to _new_built_programs_map, and then saved to a given file. However, it can bring more latency when loading the CL kernel bineries from a given file in case that many binary pieces - cl caches - are stacked on the file so separeate them different files, which depends on NN runtime policy. Changelog v1. - Reordered including headers suggested by Manuel Bottini. - Updated description. Changelog v2. - Drop function descriptions. Changelog v3. - FIx build error with openlcl = 0. As for this, it moves two functions, save_program_cache_to_file and restore_program_cache_from_file, to src/runtime/CL/Utils.cpp and arm_compute/runtime/CL/Utils.h, and set ARM_COMPUTE_CL flag only when opencl = 1. This change makes CL kernel cache feature to depend on OpenCL support. Change-Id: I8c640fb1809ca79191daf5b1ff72a50dd492c36d Signed-off-by: Inki Dae --- diff --git a/SConscript b/SConscript index 33e8f77fe..d5b4b15c3 100644 --- a/SConscript +++ b/SConscript @@ -192,6 +192,7 @@ if env['opencl']: core_files += Glob('src/core/CL/gemm/reshaped/*.cpp') core_files += Glob('src/core/CL/gemm/reshaped_only_rhs/*.cpp') + arm_compute_env.Append(CPPDEFINES=['ARM_COMPUTE_CL']) runtime_files += Glob('src/runtime/CL/*.cpp') runtime_files += Glob('src/runtime/CL/functions/*.cpp') runtime_files += Glob('src/runtime/CL/tuners/*.cpp') diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h index 4f5aa76a0..95bdd0afe 100644 --- a/arm_compute/core/CL/CLKernelLibrary.h +++ b/arm_compute/core/CL/CLKernelLibrary.h @@ -285,6 +285,9 @@ public: /** Access the cache of built OpenCL programs */ const std::map &get_built_programs() const; + /** Access the cache of new built OpenCL programs */ + std::map &get_new_built_programs(); + /** Add a new built program to the cache * * @param[in] built_program_name Name of the program @@ -323,6 +326,7 @@ private: std::string _kernel_path; /**< Path to the kernels folder. */ mutable std::map _programs_map; /**< Map with all already loaded program data. */ mutable std::map _built_programs_map; /**< Map with all already built program data. */ + mutable std::map _new_built_programs_map; /**< Map with new built program data. */ static const std::map _kernel_program_map; /**< Map that associates kernel names with programs. */ static const std::map _program_source_map; /**< Contains sources for all programs. Used for compile-time kernel inclusion. >*/ diff --git a/arm_compute/runtime/CL/Utils.h b/arm_compute/runtime/CL/Utils.h new file mode 100644 index 000000000..712c09903 --- /dev/null +++ b/arm_compute/runtime/CL/Utils.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CORE_CL_UTILS_H +#define ARM_COMPUTE_CORE_CL_UTILS_H + +#include + +namespace arm_compute +{ +/** This function saves opencl kernels library to a file + * + * @param[in] filename Name of the file to be used to save the library + */ +void save_program_cache_to_file(const std::string &filename = "cache.bin"); + +/** This function loads prebuilt opencl kernels from a file + * + * @param[in] filename Name of the file to be used to load the kernels + */ +void restore_program_cache_from_file(const std::string &filename = "cache.bin"); +} // namespace arm_compute +#endif /* ARM_COMPUTE_CORE_CL_UTILS_H */ diff --git a/examples/cl_cache.cpp b/examples/cl_cache.cpp index 7d8a51542..34419bb3e 100644 --- a/examples/cl_cache.cpp +++ b/examples/cl_cache.cpp @@ -26,6 +26,9 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#ifdef ARM_COMPUTE_CL +#include "arm_compute/runtime/CL/Utils.h" +#endif #include "utils/Utils.h" using namespace arm_compute; @@ -51,9 +54,11 @@ public: std::transform(argv1.begin(), argv1.end(), argv1.begin(), ::tolower); if(argv1 == "--restore_cache") { +#ifdef ARM_COMPUTE_CL // Load the precompiled kernels from a file into the kernel library, in this way the next time they are needed // compilation won't be required. restore_program_cache_from_file(); +#endif } else { @@ -86,8 +91,10 @@ public: permute_nchw.configure(&tensor_nhwc, &tensor_nchw_result, vector_nhwc_to_nchw); tensor_nchw_result.allocator()->allocate(); +#ifdef ARM_COMPUTE_CL // Save the opencl kernels to a file save_program_cache_to_file(); +#endif return true; } diff --git a/examples/graph_alexnet.cpp b/examples/graph_alexnet.cpp index 79d02f6ba..6861aeecd 100644 --- a/examples/graph_alexnet.cpp +++ b/examples/graph_alexnet.cpp @@ -22,11 +22,15 @@ * SOFTWARE. */ #include "arm_compute/graph.h" +#ifdef ARM_COMPUTE_CL +#include "arm_compute/runtime/CL/Utils.h" +#endif #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" #include "utils/Utils.h" +using namespace arm_compute; using namespace arm_compute::utils; using namespace arm_compute::graph::frontend; using namespace arm_compute::graph_utils; @@ -159,7 +163,9 @@ public: // compilation won't be required. if(common_params.enable_cl_cache) { +#ifdef ARM_COMPUTE_CL restore_program_cache_from_file(); +#endif } graph.finalize(common_params.target, config); @@ -167,7 +173,9 @@ public: // Save the opencl kernels to a file if(common_opts.enable_cl_cache) { +#ifdef ARM_COMPUTE_CL save_program_cache_to_file(); +#endif } return true; diff --git a/examples/graph_inception_v4.cpp b/examples/graph_inception_v4.cpp index bac85eef1..071a2f6b0 100644 --- a/examples/graph_inception_v4.cpp +++ b/examples/graph_inception_v4.cpp @@ -22,11 +22,15 @@ * SOFTWARE. */ #include "arm_compute/graph.h" +#ifdef ARM_COMPUTE_CL +#include "arm_compute/runtime/CL/Utils.h" +#endif #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" #include "utils/Utils.h" +using namespace arm_compute; using namespace arm_compute::utils; using namespace arm_compute::graph::frontend; using namespace arm_compute::graph_utils; @@ -159,7 +163,9 @@ public: // compilation won't be required. if(common_params.enable_cl_cache) { +#ifdef ARM_COMPUTE_CL restore_program_cache_from_file(); +#endif } graph.finalize(common_params.target, config); @@ -167,7 +173,9 @@ public: // Save the opencl kernels to a file if(common_opts.enable_cl_cache) { +#ifdef ARM_COMPUTE_CL save_program_cache_to_file(); +#endif } return true; diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index 5d5205439..5cd7e7145 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -1056,7 +1056,7 @@ const std::map CLKernelLibrary::_program_source_map = }; CLKernelLibrary::CLKernelLibrary() - : _context(), _device(), _kernel_path("."), _programs_map(), _built_programs_map() + : _context(), _device(), _kernel_path("."), _programs_map(), _built_programs_map(), _new_built_programs_map() { opencl_is_available(); // Make sure the OpenCL symbols are initialised *before* the CLKernelLibrary is built } @@ -1137,7 +1137,7 @@ Kernel CLKernelLibrary::create_kernel(const std::string &kernel_name, const Stri cl_program = program.build(build_options); // Add built program to internal map - _built_programs_map.emplace(built_program_name, cl_program); + _new_built_programs_map.emplace(built_program_name, cl_program); } // Create and return kernel @@ -1180,6 +1180,7 @@ void CLKernelLibrary::clear_programs_cache() { _programs_map.clear(); _built_programs_map.clear(); + _new_built_programs_map.clear(); } const std::map &CLKernelLibrary::get_built_programs() const @@ -1187,6 +1188,11 @@ const std::map &CLKernelLibrary::get_built_programs() return _built_programs_map; } +std::map &CLKernelLibrary::get_new_built_programs() +{ + return _new_built_programs_map; +} + void CLKernelLibrary::add_built_program(const std::string &built_program_name, const cl::Program &program) { _built_programs_map.emplace(built_program_name, program); diff --git a/src/runtime/CL/Utils.cpp b/src/runtime/CL/Utils.cpp new file mode 100644 index 000000000..3710152da --- /dev/null +++ b/src/runtime/CL/Utils.cpp @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +#include +#include +#include + +namespace arm_compute +{ +void restore_program_cache_from_file(const std::string &filename) +{ + std::ifstream cache_file(filename, std::ios::binary); + if(cache_file.is_open()) + { + if(!CLScheduler::get().is_initialised()) + { + arm_compute::CLScheduler::get().default_init(); + } + + while(!cache_file.eof()) + { + size_t name_len = 0; + size_t binary_len = 0; + cache_file.read(reinterpret_cast(&name_len), sizeof(size_t)); + cache_file.read(reinterpret_cast(&binary_len), sizeof(size_t)); + if(name_len == 0 || binary_len == 0) + { + break; + } + std::vector tmp(name_len); + std::vector binary(binary_len); + std::string name; + cache_file.read(tmp.data(), name_len); + name.assign(tmp.data(), name_len); + tmp.resize(binary_len); + cache_file.read(reinterpret_cast(binary.data()), binary_len); + cl::Context context = arm_compute::CLScheduler::get().context(); + cl::Program::Binaries binaries{ binary }; + std::vector devices = context.getInfo(); + cl::Program program(context, devices, binaries); + program.build(); + CLKernelLibrary::get().add_built_program(name, program); + } + cache_file.close(); + } +} + +void save_program_cache_to_file(const std::string &filename) +{ + if(CLScheduler::get().is_initialised()) + { + std::ofstream cache_file(filename, std::ios::binary | std::ios::app); + if(cache_file.is_open()) + { + for(const auto &it : CLKernelLibrary::get().get_new_built_programs()) + { + std::vector> binaries = it.second.getInfo(); + ARM_COMPUTE_ERROR_ON(binaries.size() != 1); + const std::string kernel_name = it.first; + size_t kernel_name_size = kernel_name.length(); + size_t binary_size = binaries[0].size(); + cache_file.write(reinterpret_cast(&kernel_name_size), sizeof(size_t)); + cache_file.write(reinterpret_cast(&binary_size), sizeof(size_t)); + cache_file.write(kernel_name.c_str(), kernel_name_size); + cache_file.write(reinterpret_cast(binaries[0].data()), binaries[0].size()); + } + cache_file.close(); + CLKernelLibrary::get().get_new_built_programs().clear(); + } + else + { + ARM_COMPUTE_ERROR("Cannot open cache file"); + } + } +} +} // namespace arm_compute diff --git a/utils/Utils.cpp b/utils/Utils.cpp index 80b47d767..c514cf6b8 100644 --- a/utils/Utils.cpp +++ b/utils/Utils.cpp @@ -275,86 +275,5 @@ uint64_t get_mem_free_from_meminfo() return 0; } -/** This function loads prebuilt opencl kernels from a file - * - * @param[in] filename Name of the file to be used to load the kernels - */ -void restore_program_cache_from_file(const std::string &filename) -{ -#ifdef ARM_COMPUTE_CL - std::ifstream cache_file(filename, std::ios::binary); - if(cache_file.is_open()) - { - if(!CLScheduler::get().is_initialised()) - { - arm_compute::CLScheduler::get().default_init(); - } - - while(!cache_file.eof()) - { - size_t name_len = 0; - size_t binary_len = 0; - cache_file.read(reinterpret_cast(&name_len), sizeof(size_t)); - cache_file.read(reinterpret_cast(&binary_len), sizeof(size_t)); - if(name_len == 0 || binary_len == 0) - { - break; - } - std::vector tmp(name_len); - std::vector binary(binary_len); - std::string name; - cache_file.read(tmp.data(), name_len); - name.assign(tmp.data(), name_len); - tmp.resize(binary_len); - cache_file.read(reinterpret_cast(binary.data()), binary_len); - cl::Context context = arm_compute::CLScheduler::get().context(); - cl::Program::Binaries binaries{ binary }; - std::vector devices = context.getInfo(); - cl::Program program(context, devices, binaries); - program.build(); - CLKernelLibrary::get().add_built_program(name, program); - } - cache_file.close(); - } -#else /* ARM_COMPUTE_CL */ - ARM_COMPUTE_UNUSED(filename); -#endif /* ARM_COMPUTE_CL */ -} - -/** This function saves opencl kernels library to a file - * - * @param[in] filename Name of the file to be used to save the library - */ -void save_program_cache_to_file(const std::string &filename) -{ -#ifdef ARM_COMPUTE_CL - if(CLScheduler::get().is_initialised()) - { - std::ofstream cache_file(filename, std::ios::binary); - if(cache_file.is_open()) - { - for(const auto &it : CLKernelLibrary::get().get_built_programs()) - { - std::vector> binaries = it.second.getInfo(); - ARM_COMPUTE_ERROR_ON(binaries.size() != 1); - const std::string kernel_name = it.first; - size_t kernel_name_size = kernel_name.length(); - size_t binary_size = binaries[0].size(); - cache_file.write(reinterpret_cast(&kernel_name_size), sizeof(size_t)); - cache_file.write(reinterpret_cast(&binary_size), sizeof(size_t)); - cache_file.write(kernel_name.c_str(), kernel_name_size); - cache_file.write(reinterpret_cast(binaries[0].data()), binaries[0].size()); - } - cache_file.close(); - } - else - { - ARM_COMPUTE_ERROR("Cannot open cache file"); - } - } -#else /* ARM_COMPUTE_CL */ - ARM_COMPUTE_UNUSED(filename); -#endif /* ARM_COMPUTE_CL */ -} } // namespace utils } // namespace arm_compute diff --git a/utils/Utils.h b/utils/Utils.h index 752271cc7..5b0d895ac 100644 --- a/utils/Utils.h +++ b/utils/Utils.h @@ -822,17 +822,6 @@ int compare_tensor(ITensor &tensor1, ITensor &tensor2, T tolerance) return num_mismatches; } -/** This function saves opencl kernels library to a file - * - * @param[in] filename Name of the file to be used to save the library - */ -void save_program_cache_to_file(const std::string &filename = "cache.bin"); - -/** This function loads prebuilt opencl kernels from a file - * - * @param[in] filename Name of the file to be used to load the kernels - */ -void restore_program_cache_from_file(const std::string &filename = "cache.bin"); } // namespace utils } // namespace arm_compute #endif /* __UTILS_UTILS_H__*/