2 * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * Copyright (c) 2016-2018 ARM Limited.
20 * SPDX-License-Identifier: MIT
22 * Permission is hereby granted, free of charge, to any person obtaining a copy
23 * of this software and associated documentation files (the "Software"), to
24 * deal in the Software without restriction, including without limitation the
25 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
26 * sell copies of the Software, and to permit persons to whom the Software is
27 * furnished to do so, subject to the following conditions:
29 * The above copyright notice and this permission notice shall be included in all
30 * copies or substantial portions of the Software.
32 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
33 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
34 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
35 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
36 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
37 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
41 #include "arm_compute/core/CL/CLKernelLibrary.h"
42 #include "arm_compute/core/CL/CLKernelLibraryEx.h"
44 #include "arm_compute/core/CL/CLHelpers.h"
45 #include "arm_compute/core/Error.h"
46 #include "arm_compute/core/Utils.h"
54 using namespace arm_compute;
56 const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map = {
57 // ARMComputeEx kernels
58 {"arg_min_max_ex_x", "arg_min_max_ex.cl"},
59 {"arg_min_max_ex_y", "arg_min_max_ex.cl"},
60 {"arg_min_max_ex_z", "arg_min_max_ex.cl"},
61 {"arg_min_max_ex_w", "arg_min_max_ex.cl"},
62 {"binary_logical_op", "binary_logical_op.cl"},
63 {"cast_bool", "cast.cl"},
64 {"embedding_lookup", "embedding_lookup.cl"},
65 {"gather_ex", "gather_ex.cl"},
66 {"gather_ex_1d", "gather_ex.cl"},
67 {"gather_ex_1d_out", "gather_ex.cl"},
68 {"gemmlowp_mm_midgard_ex", "gemmlowp_ex.cl"},
69 {"hashtable_lookup", "hashtable_lookup.cl"},
70 {"instance_normalization_ex", "instance_normalization_ex.cl"},
71 {"multiply_scale_factor", "multiply_scale_factor.cl"},
72 {"neg_tensor", "neg_tensor.cl"},
73 {"one_hot", "one_hot.cl"},
74 {"one_hot_only_on_value", "one_hot.cl"},
75 {"quantization_symm8", "quantization_symm8.cl"},
76 {"reduce_min_max", "reduce_operation.cl"},
77 {"reduce_sum_mean", "reduce_operation.cl"},
78 {"topkv2_init", "topkv2.cl"},
79 {"topkv2_find_first_negative", "topkv2.cl"},
80 {"topkv2_reorder_negatives", "topkv2.cl"},
81 {"topkv2_store", "topkv2.cl"},
82 {"radixsort_histogram", "topkv2_radixsort.cl"},
83 {"radixsort_scanhistograms", "topkv2_radixsort.cl"},
84 {"radixsort_pastehistograms", "topkv2_radixsort.cl"},
85 {"radixsort_reorder", "topkv2_radixsort.cl"},
86 {"topkv2_quicksort", "topkv2_quicksort.cl"},
87 {"scale_factor_symm8", "scale_factor.cl"},
90 const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map = {
91 #ifdef EMBEDDED_KERNELS
94 #include "./cl_kernels/arg_min_max_ex.clembed"
98 #include "./cl_kernels/cast.clembed"
101 "embedding_lookup.cl",
102 #include "./cl_kernels/embedding_lookup.clembed"
106 #include "./cl_kernels/gather_ex.clembed"
110 #include "./cl_kernels/gemmlowp_ex.clembed"
113 "hashtable_lookup.cl",
114 #include "./cl_kernels/hashtable_lookup.clembed"
118 #include "./cl_kernels/helpers.hembed"
122 #include "./cl_kernels/helpers_asymm.hembed"
125 "instance_normalization_ex.cl",
126 #include "./cl_kernels/instance_normalization_ex.clembed"
129 "binary_logical_op.cl",
130 #include "./cl_kernels/binary_logical_op.clembed"
133 "multiply_scale_factor.cl",
134 #include "./cl_kernels/multiply_scale_factor.clembed"
138 #include "./cl_kernels/neg_tensor.clembed"
142 #include "./cl_kernels/one_hot.clembed"
145 "quantization_symm8.cl",
146 #include "./cl_kernels/quantization_symm8.clembed"
149 "reduce_operation.cl",
150 #include "./cl_kernels/reduce_operation.clembed"
154 #include "./cl_kernels/scale_factor.clembed"
158 #include "./cl_kernels/topkv2.clembed"
161 "topkv2_radixsort.cl",
162 #include "./cl_kernels/topkv2_radixsort.clembed"
165 "topkv2_quicksort.cl",
166 #include "./cl_kernels/topkv2_quicksort.clembed"
169 #endif /* EMBEDDED_KERNELS */
172 CLKernelLibraryEx::CLKernelLibraryEx()
173 : _context(), _device(), _kernel_path("."), _programs_map(), _built_programs_map()
175 opencl_is_available(); // Make sure the OpenCL symbols are initialised *before* the
176 // CLKernelLibraryEx is built
179 CLKernelLibraryEx &CLKernelLibraryEx::get()
181 static CLKernelLibraryEx _kernel_library;
182 return _kernel_library;
185 Kernel CLKernelLibraryEx::create_kernel(const std::string &kernel_name,
186 const StringSet &build_options_set) const
188 // Find which program contains the kernel
189 auto kernel_program_it = _kernel_program_map.find(kernel_name);
191 if (_kernel_program_map.end() == kernel_program_it)
193 ARM_COMPUTE_ERROR_VAR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str());
195 std::string concat_str;
197 if (fp16_supported())
199 concat_str += " -DARM_COMPUTE_OPENCL_FP16_ENABLED=1 ";
202 if (get_cl_version(_device) == CLVersion::CL20)
204 concat_str += " -cl-std=CL2.0 ";
206 else if (arm_non_uniform_workgroup_supported(_device))
208 concat_str += " -cl-arm-non-uniform-work-group-size ";
212 ARM_COMPUTE_ERROR("Non uniform workgroup size is not supported!!");
215 // Check if the program has been built before with same build options.
216 const std::string program_name = kernel_program_it->second;
217 const std::string build_options = stringify_set(build_options_set) + concat_str;
219 const std::string built_program_name = program_name + "_" + build_options;
220 auto built_program_it = _built_programs_map.find(built_program_name);
222 cl::Program cl_program;
224 if (_built_programs_map.end() != built_program_it)
226 // If program has been built, retrieve to create kernel from it
227 cl_program = built_program_it->second;
232 Program program = load_program(program_name);
235 cl_program = program.build(build_options);
237 // Add built program to internal map
238 _built_programs_map.emplace(built_program_name, cl_program);
241 // Create and return kernel
242 return Kernel(kernel_name, cl_program);
245 void CLKernelLibraryEx::add_built_program(const std::string &built_program_name,
248 _built_programs_map.emplace(built_program_name, program);
251 bool CLKernelLibraryEx::fp16_supported() const { return ::fp16_supported(_device); }
253 bool CLKernelLibraryEx::int64_base_atomics_supported() const
255 return device_supports_extension(_device, "cl_khr_int64_base_atomics");
258 const Program &CLKernelLibraryEx::load_program(const std::string &program_name) const
260 const auto program_it = _programs_map.find(program_name);
262 if (program_it != _programs_map.end())
264 return program_it->second;
269 #ifdef EMBEDDED_KERNELS
270 const auto program_source_it = _program_source_map.find(program_name);
272 if (_program_source_map.end() == program_source_it)
274 ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str());
277 program = Program(_context, program_name, program_source_it->second);
278 #else /* EMBEDDED_KERNELS */
280 std::string source_name = _kernel_path + program_name;
281 std::string binary_name = source_name + "bin";
283 if (std::ifstream(binary_name).is_open())
285 const std::string program_binary = read_file(binary_name, true);
286 program = Program(_context, _device, program_name,
287 std::vector<unsigned char>(program_binary.begin(), program_binary.end()));
289 else if (std::ifstream(source_name).is_open())
291 program = Program(_context, program_name, read_file(source_name, false));
295 ARM_COMPUTE_ERROR_VAR("Kernel file %s does not exist.", source_name.c_str());
297 #endif /* EMBEDDED_KERNELS */
299 // Insert program to program map
300 const auto new_program = _programs_map.emplace(program_name, std::move(program));
302 return new_program.first->second;
305 std::string CLKernelLibraryEx::stringify_set(const StringSet &s) const
307 std::string concat_set;
309 #ifndef EMBEDDED_KERNELS
310 concat_set += "-I" + _kernel_path + " ";
311 #endif /* EMBEDDED_KERNELS */
314 for (const auto &el : s)
316 concat_set += " " + el;
322 std::string CLKernelLibraryEx::get_program_source(const std::string &program_name)
324 const auto program_source_it = _program_source_map.find(program_name);
326 if (program_source_it == _program_source_map.end())
328 ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str());
331 return program_source_it->second;
334 size_t CLKernelLibraryEx::max_local_workgroup_size(const cl::Kernel &kernel) const
338 size_t err = kernel.getWorkGroupInfo(_device, CL_KERNEL_WORK_GROUP_SIZE, &result);
339 ARM_COMPUTE_ERROR_ON_MSG(
341 "clGetKernelWorkGroupInfo failed to return the maximum workgroup size for the kernel");
342 ARM_COMPUTE_UNUSED(err);
347 cl::NDRange CLKernelLibraryEx::default_ndrange() const
349 // GPUTarget _target = get_target_from_device(_device);
350 cl::Device device = cl::Device::getDefault();
351 GPUTarget _target = get_target_from_device(device);
352 cl::NDRange default_range;
356 case GPUTarget::MIDGARD:
357 case GPUTarget::T600:
358 case GPUTarget::T700:
359 case GPUTarget::T800:
360 default_range = cl::NDRange(128u, 1);
363 default_range = cl::NullRange;
366 return default_range;
369 std::string CLKernelLibraryEx::get_device_version() { return _device.getInfo<CL_DEVICE_VERSION>(); }