2 * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * Copyright (c) 2016-2018 ARM Limited.
20 * SPDX-License-Identifier: MIT
22 * Permission is hereby granted, free of charge, to any person obtaining a copy
23 * of this software and associated documentation files (the "Software"), to
24 * deal in the Software without restriction, including without limitation the
25 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
26 * sell copies of the Software, and to permit persons to whom the Software is
27 * furnished to do so, subject to the following conditions:
29 * The above copyright notice and this permission notice shall be included in all
30 * copies or substantial portions of the Software.
32 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
33 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
34 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
35 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
36 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
37 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
41 #include "arm_compute/core/CL/CLKernelLibrary.h"
42 #include "arm_compute/core/CL/CLKernelLibraryEx.h"
44 #include "arm_compute/core/CL/CLHelpers.h"
45 #include "arm_compute/core/Error.h"
46 #include "arm_compute/core/Utils.h"
54 using namespace arm_compute;
56 const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map = {
57 // ARMComputeEx kernels
58 {"binary_logical_op", "binary_logical_op.cl"},
59 {"embedding_lookup", "embedding_lookup.cl"},
60 {"gather_ex", "gather_ex.cl"},
61 {"gather_ex_1d", "gather_ex.cl"},
62 {"gather_ex_1d_out", "gather_ex.cl"},
63 {"gemmlowp_mm_midgard_ex", "gemmlowp_ex.cl"},
64 {"hashtable_lookup", "hashtable_lookup.cl"},
65 {"instance_normalization_ex", "instance_normalization_ex.cl"},
66 {"multiply_scale_factor", "multiply_scale_factor.cl"},
67 {"neg_tensor", "neg_tensor.cl"},
68 {"quantization_symm8", "quantization_symm8.cl"},
69 {"reduce_min_max", "reduce_operation.cl"},
70 {"reduce_sum_mean", "reduce_operation.cl"},
71 {"topkv2_init", "topkv2.cl"},
72 {"topkv2_find_first_negative", "topkv2.cl"},
73 {"topkv2_reorder_negatives", "topkv2.cl"},
74 {"topkv2_store", "topkv2.cl"},
75 {"radixsort_histogram", "topkv2_radixsort.cl"},
76 {"radixsort_scanhistograms", "topkv2_radixsort.cl"},
77 {"radixsort_pastehistograms", "topkv2_radixsort.cl"},
78 {"radixsort_reorder", "topkv2_radixsort.cl"},
79 {"topkv2_quicksort", "topkv2_quicksort.cl"},
80 {"scale_factor_symm8", "scale_factor.cl"},
83 const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map = {
84 #ifdef EMBEDDED_KERNELS
86 "embedding_lookup.cl",
87 #include "./cl_kernels/embedding_lookup.clembed"
91 #include "./cl_kernels/gather_ex.clembed"
95 #include "./cl_kernels/gemmlowp_ex.clembed"
98 "hashtable_lookup.cl",
99 #include "./cl_kernels/hashtable_lookup.clembed"
103 #include "./cl_kernels/helpers.hembed"
107 #include "./cl_kernels/helpers_asymm.hembed"
110 "instance_normalization_ex.cl",
111 #include "./cl_kernels/instance_normalization_ex.clembed"
114 "binary_logical_op.cl",
115 #include "./cl_kernels/binary_logical_op.clembed"
118 "multiply_scale_factor.cl",
119 #include "./cl_kernels/multiply_scale_factor.clembed"
123 #include "./cl_kernels/neg_tensor.clembed"
126 "quantization_symm8.cl",
127 #include "./cl_kernels/quantization_symm8.clembed"
130 "reduce_operation.cl",
131 #include "./cl_kernels/reduce_operation.clembed"
135 #include "./cl_kernels/scale_factor.clembed"
139 #include "./cl_kernels/topkv2.clembed"
142 "topkv2_radixsort.cl",
143 #include "./cl_kernels/topkv2_radixsort.clembed"
146 "topkv2_quicksort.cl",
147 #include "./cl_kernels/topkv2_quicksort.clembed"
150 #endif /* EMBEDDED_KERNELS */
153 CLKernelLibraryEx::CLKernelLibraryEx()
154 : _context(), _device(), _kernel_path("."), _programs_map(), _built_programs_map()
156 opencl_is_available(); // Make sure the OpenCL symbols are initialised *before* the
157 // CLKernelLibraryEx is built
160 CLKernelLibraryEx &CLKernelLibraryEx::get()
162 static CLKernelLibraryEx _kernel_library;
163 return _kernel_library;
166 Kernel CLKernelLibraryEx::create_kernel(const std::string &kernel_name,
167 const StringSet &build_options_set) const
169 // Find which program contains the kernel
170 auto kernel_program_it = _kernel_program_map.find(kernel_name);
172 if (_kernel_program_map.end() == kernel_program_it)
174 ARM_COMPUTE_ERROR_VAR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str());
176 std::string concat_str;
178 if (fp16_supported())
180 concat_str += " -DARM_COMPUTE_OPENCL_FP16_ENABLED=1 ";
183 if (get_cl_version(_device) == CLVersion::CL20)
185 concat_str += " -cl-std=CL2.0 ";
187 else if (arm_non_uniform_workgroup_supported(_device))
189 concat_str += " -cl-arm-non-uniform-work-group-size ";
193 ARM_COMPUTE_ERROR("Non uniform workgroup size is not supported!!");
196 // Check if the program has been built before with same build options.
197 const std::string program_name = kernel_program_it->second;
198 const std::string build_options = stringify_set(build_options_set) + concat_str;
200 const std::string built_program_name = program_name + "_" + build_options;
201 auto built_program_it = _built_programs_map.find(built_program_name);
203 cl::Program cl_program;
205 if (_built_programs_map.end() != built_program_it)
207 // If program has been built, retrieve to create kernel from it
208 cl_program = built_program_it->second;
213 Program program = load_program(program_name);
216 cl_program = program.build(build_options);
218 // Add built program to internal map
219 _built_programs_map.emplace(built_program_name, cl_program);
222 // Create and return kernel
223 return Kernel(kernel_name, cl_program);
226 void CLKernelLibraryEx::add_built_program(const std::string &built_program_name,
229 _built_programs_map.emplace(built_program_name, program);
232 bool CLKernelLibraryEx::fp16_supported() const { return ::fp16_supported(_device); }
234 bool CLKernelLibraryEx::int64_base_atomics_supported() const
236 return device_supports_extension(_device, "cl_khr_int64_base_atomics");
239 const Program &CLKernelLibraryEx::load_program(const std::string &program_name) const
241 const auto program_it = _programs_map.find(program_name);
243 if (program_it != _programs_map.end())
245 return program_it->second;
250 #ifdef EMBEDDED_KERNELS
251 const auto program_source_it = _program_source_map.find(program_name);
253 if (_program_source_map.end() == program_source_it)
255 ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str());
258 program = Program(_context, program_name, program_source_it->second);
259 #else /* EMBEDDED_KERNELS */
261 std::string source_name = _kernel_path + program_name;
262 std::string binary_name = source_name + "bin";
264 if (std::ifstream(binary_name).is_open())
266 const std::string program_binary = read_file(binary_name, true);
267 program = Program(_context, _device, program_name,
268 std::vector<unsigned char>(program_binary.begin(), program_binary.end()));
270 else if (std::ifstream(source_name).is_open())
272 program = Program(_context, program_name, read_file(source_name, false));
276 ARM_COMPUTE_ERROR_VAR("Kernel file %s does not exist.", source_name.c_str());
278 #endif /* EMBEDDED_KERNELS */
280 // Insert program to program map
281 const auto new_program = _programs_map.emplace(program_name, std::move(program));
283 return new_program.first->second;
286 std::string CLKernelLibraryEx::stringify_set(const StringSet &s) const
288 std::string concat_set;
290 #ifndef EMBEDDED_KERNELS
291 concat_set += "-I" + _kernel_path + " ";
292 #endif /* EMBEDDED_KERNELS */
295 for (const auto &el : s)
297 concat_set += " " + el;
303 std::string CLKernelLibraryEx::get_program_source(const std::string &program_name)
305 const auto program_source_it = _program_source_map.find(program_name);
307 if (program_source_it == _program_source_map.end())
309 ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str());
312 return program_source_it->second;
315 size_t CLKernelLibraryEx::max_local_workgroup_size(const cl::Kernel &kernel) const
319 size_t err = kernel.getWorkGroupInfo(_device, CL_KERNEL_WORK_GROUP_SIZE, &result);
320 ARM_COMPUTE_ERROR_ON_MSG(
322 "clGetKernelWorkGroupInfo failed to return the maximum workgroup size for the kernel");
323 ARM_COMPUTE_UNUSED(err);
328 cl::NDRange CLKernelLibraryEx::default_ndrange() const
330 // GPUTarget _target = get_target_from_device(_device);
331 cl::Device device = cl::Device::getDefault();
332 GPUTarget _target = get_target_from_device(device);
333 cl::NDRange default_range;
337 case GPUTarget::MIDGARD:
338 case GPUTarget::T600:
339 case GPUTarget::T700:
340 case GPUTarget::T800:
341 default_range = cl::NDRange(128u, 1);
344 default_range = cl::NullRange;
347 return default_range;
350 std::string CLKernelLibraryEx::get_device_version() { return _device.getInfo<CL_DEVICE_VERSION>(); }