From f638e8e35de6a862691adac6e6f10d7fafe7f9da Mon Sep 17 00:00:00 2001 From: Mike Stroyan Date: Wed, 13 Jul 2016 10:10:25 -0600 Subject: [PATCH] layers: faster threading layer for single thread Add a check to the threading layer to detect a single thread case. If the application has only called vulkan from one thread at a time, then skip access counters for externally synchronized parameters. This greatly reduces the overhead of the layer for applications that don't truly use vulkan in a multi-threaded way. --- generator.py | 17 +++++++--- layers/threading.cpp | 90 ++++++++++++++++++++++++++++++++++++++-------------- layers/threading.h | 20 ++++++++++++ 3 files changed, 98 insertions(+), 29 deletions(-) diff --git a/generator.py b/generator.py index 0b97305..dbd393d 100755 --- a/generator.py +++ b/generator.py @@ -2573,9 +2573,9 @@ class ThreadOutputGenerator(OutputGenerator): limit = element[0:element.find('s[]')] + 'Count' dotp = limit.rfind('.p') limit = limit[0:dotp+1] + limit[dotp+2:dotp+3].lower() + limit[dotp+3:] - paramdecl += ' for(uint32_t index2=0;index2<'+limit+';index2++)' + paramdecl += ' for(uint32_t index2=0;index2<'+limit+';index2++)\n' element = element.replace('[]','[index2]') - paramdecl += ' ' + functionprefix + 'WriteObject(my_data, ' + element + ');\n' + paramdecl += ' ' + functionprefix + 'WriteObject(my_data, ' + element + ');\n' paramdecl += ' }\n' else: # externsync can list members to synchronize @@ -2601,7 +2601,7 @@ class ThreadOutputGenerator(OutputGenerator): for param in explicitexternsyncparams: externsyncattrib = param.attrib.get('externsync') paramname = param.find('name') - paramdecl += '// Host access to ' + paramdecl += ' // Host access to ' if externsyncattrib == 'true': if self.paramIsArray(param): paramdecl += 'each member of ' + paramname.text @@ -2810,12 +2810,19 @@ class ThreadOutputGenerator(OutputGenerator): else: assignresult = '' - self.appendSection('command', str(startthreadsafety)) + self.appendSection('command', ' bool threadChecks = startMultiThread();') + self.appendSection('command', ' if (threadChecks) {') + self.appendSection('command', " "+"\n ".join(str(startthreadsafety).rstrip().split("\n"))) + self.appendSection('command', ' }') params = cmdinfo.elem.findall('param/name') paramstext = ','.join([str(param.text) for param in params]) API = cmdinfo.elem.attrib.get('name').replace('vk','pTable->',1) self.appendSection('command', ' ' + assignresult + API + '(' + paramstext + ');') - self.appendSection('command', str(finishthreadsafety)) + self.appendSection('command', ' if (threadChecks) {') + self.appendSection('command', " "+"\n ".join(str(finishthreadsafety).rstrip().split("\n"))) + self.appendSection('command', ' } else {') + self.appendSection('command', ' finishMultiThread();') + self.appendSection('command', ' }') # Return result variable, if any. if (resulttype != None): self.appendSection('command', ' return result;') diff --git a/layers/threading.cpp b/layers/threading.cpp index d98aa1c..ed7663a 100644 --- a/layers/threading.cpp +++ b/layers/threading.cpp @@ -94,9 +94,16 @@ VKAPI_ATTR void VKAPI_CALL DestroyInstance(VkInstance instance, const VkAllocati } } - startWriteObject(my_data, instance); + bool threadChecks = startMultiThread(); + if (threadChecks) { + startWriteObject(my_data, instance); + } pTable->DestroyInstance(instance, pAllocator); - finishWriteObject(my_data, instance); + if (threadChecks) { + finishWriteObject(my_data, instance); + } else { + finishMultiThread(); + } // Disable and cleanup the temporary callback(s): if (callback_setup) { @@ -153,9 +160,16 @@ VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(VkPhysicalDevice gpu, const VkDevice VKAPI_ATTR void VKAPI_CALL DestroyDevice(VkDevice device, const VkAllocationCallbacks *pAllocator) { dispatch_key key = get_dispatch_key(device); layer_data *dev_data = get_my_data_ptr(key, layer_data_map); - startWriteObject(dev_data, device); + bool threadChecks = startMultiThread(); + if (threadChecks) { + startWriteObject(dev_data, device); + } dev_data->device_dispatch_table->DestroyDevice(device, pAllocator); - finishWriteObject(dev_data, device); + if (threadChecks) { + finishWriteObject(dev_data, device); + } else { + finishMultiThread(); + } layer_data_map.erase(key); } @@ -281,25 +295,39 @@ VKAPI_ATTR VkResult VKAPI_CALL CreateDebugReportCallbackEXT(VkInstance instance, const VkDebugReportCallbackCreateInfoEXT *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkDebugReportCallbackEXT *pMsgCallback) { layer_data *my_data = get_my_data_ptr(get_dispatch_key(instance), layer_data_map); - startReadObject(my_data, instance); + bool threadChecks = startMultiThread(); + if (threadChecks) { + startReadObject(my_data, instance); + } VkResult result = my_data->instance_dispatch_table->CreateDebugReportCallbackEXT(instance, pCreateInfo, pAllocator, pMsgCallback); if (VK_SUCCESS == result) { result = layer_create_msg_callback(my_data->report_data, false, pCreateInfo, pAllocator, pMsgCallback); } - finishReadObject(my_data, instance); + if (threadChecks) { + finishReadObject(my_data, instance); + } else { + finishMultiThread(); + } return result; } VKAPI_ATTR void VKAPI_CALL DestroyDebugReportCallbackEXT(VkInstance instance, VkDebugReportCallbackEXT callback, const VkAllocationCallbacks *pAllocator) { layer_data *my_data = get_my_data_ptr(get_dispatch_key(instance), layer_data_map); - startReadObject(my_data, instance); - startWriteObject(my_data, callback); + bool threadChecks = startMultiThread(); + if (threadChecks) { + startReadObject(my_data, instance); + startWriteObject(my_data, callback); + } my_data->instance_dispatch_table->DestroyDebugReportCallbackEXT(instance, callback, pAllocator); layer_destroy_msg_callback(my_data->report_data, callback, pAllocator); - finishReadObject(my_data, instance); - finishWriteObject(my_data, callback); + if (threadChecks) { + finishReadObject(my_data, instance); + finishWriteObject(my_data, callback); + } else { + finishMultiThread(); + } } VKAPI_ATTR VkResult VKAPI_CALL @@ -308,12 +336,19 @@ AllocateCommandBuffers(VkDevice device, const VkCommandBufferAllocateInfo *pAllo layer_data *my_data = get_my_data_ptr(key, layer_data_map); VkLayerDispatchTable *pTable = my_data->device_dispatch_table; VkResult result; - startReadObject(my_data, device); - startWriteObject(my_data, pAllocateInfo->commandPool); + bool threadChecks = startMultiThread(); + if (threadChecks) { + startReadObject(my_data, device); + startWriteObject(my_data, pAllocateInfo->commandPool); + } result = pTable->AllocateCommandBuffers(device, pAllocateInfo, pCommandBuffers); - finishReadObject(my_data, device); - finishWriteObject(my_data, pAllocateInfo->commandPool); + if (threadChecks) { + finishReadObject(my_data, device); + finishWriteObject(my_data, pAllocateInfo->commandPool); + } else { + finishMultiThread(); + } // Record mapping from command buffer to command pool if (VK_SUCCESS == result) { @@ -332,19 +367,26 @@ VKAPI_ATTR void VKAPI_CALL FreeCommandBuffers(VkDevice device, VkCommandPool com layer_data *my_data = get_my_data_ptr(key, layer_data_map); VkLayerDispatchTable *pTable = my_data->device_dispatch_table; const bool lockCommandPool = false; // pool is already directly locked - startReadObject(my_data, device); - startWriteObject(my_data, commandPool); - for (uint32_t index = 0; index < commandBufferCount; index++) { - startWriteObject(my_data, pCommandBuffers[index], lockCommandPool); + bool threadChecks = startMultiThread(); + if (threadChecks) { + startReadObject(my_data, device); + startWriteObject(my_data, commandPool); + for (uint32_t index = 0; index < commandBufferCount; index++) { + startWriteObject(my_data, pCommandBuffers[index], lockCommandPool); + } } pTable->FreeCommandBuffers(device, commandPool, commandBufferCount, pCommandBuffers); - finishReadObject(my_data, device); - finishWriteObject(my_data, commandPool); - for (uint32_t index = 0; index < commandBufferCount; index++) { - finishWriteObject(my_data, pCommandBuffers[index], lockCommandPool); - std::lock_guard lock(command_pool_lock); - command_pool_map.erase(pCommandBuffers[index]); + if (threadChecks) { + finishReadObject(my_data, device); + finishWriteObject(my_data, commandPool); + for (uint32_t index = 0; index < commandBufferCount; index++) { + finishWriteObject(my_data, pCommandBuffers[index], lockCommandPool); + std::lock_guard lock(command_pool_lock); + command_pool_map.erase(pCommandBuffers[index]); + } + } else { + finishMultiThread(); } } diff --git a/layers/threading.h b/layers/threading.h index 311ce7b..1d0924d 100644 --- a/layers/threading.h +++ b/layers/threading.h @@ -48,6 +48,26 @@ struct object_use_data { struct layer_data; +namespace threading { +volatile bool vulkan_in_use = false; +volatile bool vulkan_multi_threaded = false; +// starting check if an application is using vulkan from multiple threads. +inline bool startMultiThread() { + if (vulkan_multi_threaded) { + return true; + } + if (vulkan_in_use) { + vulkan_multi_threaded = true; + return true; + } + vulkan_in_use = true; + return false; +} + +// finishing check if an application is using vulkan from multiple threads. +inline void finishMultiThread() { vulkan_in_use = false; } +} // namespace threading + template class counter { public: const char *typeName; -- 2.7.4