2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
16 #include "engine_info.h"
17 #include "ocl_toolkit.h"
18 #include <unordered_map>
24 #include "istreamwrapper.h"
27 #define WIN32_LEAN_AND_MEAN
44 namespace cldnn { namespace gpu{
48 const char* device_info_failed_msg = "Device lookup failed";
50 int get_gpu_device_id()
56 HDEVINFO device_info_set = SetupDiGetClassDevsA(&GUID_DEVCLASS_DISPLAY, NULL, NULL, DIGCF_PRESENT);
57 if (device_info_set == INVALID_HANDLE_VALUE)
60 SP_DEVINFO_DATA devinfo_data;
61 std::memset(&devinfo_data, 0, sizeof(devinfo_data));
62 devinfo_data.cbSize = sizeof(devinfo_data);
64 for (DWORD dev_idx = 0; SetupDiEnumDeviceInfo(device_info_set, dev_idx, &devinfo_data); dev_idx++)
66 const size_t buf_size = 512;
68 if (!SetupDiGetDeviceInstanceIdA(device_info_set, &devinfo_data, buf, buf_size, NULL))
73 char* vendor_pos = std::strstr(buf, "VEN_");
74 if (vendor_pos != NULL && std::stoi(vendor_pos + 4, NULL, 16) == 0x8086)
76 char* device_pos = strstr(vendor_pos, "DEV_");
77 if (device_pos != NULL)
79 result = std::stoi(device_pos + 4, NULL, 16);
87 SetupDiDestroyDeviceInfoList(device_info_set);
90 #elif defined(__linux__)
92 std::string dev_base{ "/sys/devices/pci0000:00/0000:00:02.0/" };
93 std::ifstream ifs(dev_base + "vendor");
97 ifs >> std::hex >> ven_id;
101 ifs.open(dev_base + "device");
104 ifs >> std::hex >> result;
114 std::string to_string_hex(int val)
116 auto tmp = static_cast<unsigned int>(val);
117 if (tmp == 0) return "0x0";
119 const char* hex_chars = "0123456789ABCDEF";
122 char buf[] = "0000000000000000";
123 size_t i = sizeof(buf) / sizeof(buf[0]) - 1;
124 while (i > 0 && tmp > 0)
126 buf[--i] = hex_chars[tmp & 0xF];
130 return std::string("0x") + &buf[i];
135 std::shared_ptr<rapidjson::Document> get_cache_from_file(uint32_t compute_units_count, const gpu_toolkit& context) {
136 std::string tuning_cache_path = context.get_configuration().tuning_cache_path;
137 if (tuning_cache_path.compare("cache.json") == 0)
142 GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |
143 GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
144 (LPCSTR)&get_cache_from_file, &hm);
145 GetModuleFileName(hm, path, sizeof(path));
146 std::string bin_path(path);
147 tuning_cache_path = bin_path.substr(0, bin_path.find_last_of("\\")) + "\\cache.json";
150 dladdr((void*)device_info_failed_msg, &dl_info);
151 std::string bin_path(dl_info.dli_fname);
152 tuning_cache_path = bin_path.substr(0, bin_path.find_last_of("/")) + "/cache.json";
155 rapidjson::Document cacheFile;
156 rapidjson::Document cacheDeviceData;
157 auto computeUnits = std::to_string(compute_units_count);
158 std::ifstream f(tuning_cache_path);
161 rapidjson::IStreamWrapper isw{ f };
162 cacheFile.ParseStream(isw);
163 auto errorCode = cacheFile.GetParseError();
164 if (!cacheFile.HasMember(computeUnits.c_str()) && errorCode == 0)
168 if (cacheFile.HasMember(computeUnits.c_str()) && errorCode == 0)
170 cacheDeviceData.CopyFrom(cacheFile[computeUnits.c_str()], cacheDeviceData.GetAllocator());
174 cacheDeviceData.Parse("{}");
179 cacheDeviceData.Parse("{}");
181 return std::make_shared < rapidjson::Document>(std::move(cacheDeviceData));
184 } // namespace <anonymous>
186 engine_info_internal::engine_info_internal(const gpu_toolkit& context)
188 auto device_id = get_gpu_device_id();
189 if (0 == device_id) throw std::runtime_error(device_info_failed_msg);
190 dev_id = to_string_hex(device_id);
191 driver_version = context.device().getInfo<CL_DRIVER_VERSION>();
193 compute_units_count = context.device().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
195 device_cache = get_cache_from_file(compute_units_count, context);
198 std::cout << "[WARNING] error during parsing cache file, tuning data won't be used" << std::endl;
199 device_cache->Parse("{}");
201 cores_count = static_cast<uint32_t>(context.device().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>());
202 core_frequency = static_cast<uint32_t>(context.device().getInfo<CL_DEVICE_MAX_CLOCK_FREQUENCY>());
204 max_work_group_size = static_cast<uint64_t>(context.device().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>());
206 if (max_work_group_size > 256)
207 max_work_group_size = 256;
209 max_local_mem_size = static_cast<uint64_t>(context.device().getInfo<CL_DEVICE_LOCAL_MEM_SIZE>());
210 max_global_mem_size = static_cast<uint64_t>(context.device().getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>());
211 max_alloc_mem_size = static_cast<uint64_t>(context.device().getInfo<CL_DEVICE_MAX_MEM_ALLOC_SIZE>());
213 supports_image = static_cast<uint8_t>(context.device().getInfo<CL_DEVICE_IMAGE_SUPPORT>());
214 max_image2d_width = static_cast<uint64_t>(context.device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>());
215 max_image2d_height = static_cast<uint64_t>(context.device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>());
217 // Check for supported features.
218 auto extensions = context.device().getInfo<CL_DEVICE_EXTENSIONS>();
219 extensions.push_back(' '); // Add trailing space to ease searching (search with keyword with trailing space).
221 supports_fp16 = extensions.find("cl_khr_fp16 ") != std::string::npos;
222 supports_fp16_denorms = supports_fp16 && (context.device().getInfo<CL_DEVICE_HALF_FP_CONFIG>() & CL_FP_DENORM) != 0;
224 supports_subgroups_short = extensions.find("cl_intel_subgroups_short") != std::string::npos;
226 supports_imad = is_imad_supported(device_id);
227 supports_immad = is_immad_supported(device_id);