layers: MR153, Correctly track inFlight cmdBuffers per queue
authorTobin Ehlis <tobine@google.com>
Wed, 20 Jan 2016 23:23:37 +0000 (16:23 -0700)
committerMark Lobodzinski <mark@lunarg.com>
Fri, 22 Jan 2016 22:26:35 +0000 (15:26 -0700)
There were per-queue vectors of the cmdBuffers in flight, but they were not
being consistently used/updated.
This change keeps two sets of inFlight cmdBuffers: one globally for all
cmdBuffers on the device, and one for each queue.
Ideally we could just track per-queue, but secondary command buffers are an
exception that's considered "in-flight" from the time they're recorded in a
primary command buffer with the vkCmdExecuteCommands() call so having a global
list provides a way to account for that.
Command buffers are added into both the global inFlight set and the individual
queue set at QueueSubmit time.
When cleaning up command buffers based on fences or Idle waits, correctly
remove the cmdBuffers from the appropriate queue inFlight set. Also, if the
cmdBuffer is not inFlight on any other queues, remove it from the the global
inFlight set.
Removed the deviceMap as it was only being used to hold a vector of queues.
Since layer_data struct is already per-device, just moved vector of queues
directly into layer_data struct.

layers/draw_state.cpp
layers/draw_state.h

index 7a513d2..de2a164 100644 (file)
@@ -95,6 +95,7 @@ struct layer_data {
     VkLayerDispatchTable* device_dispatch_table;
     VkLayerInstanceDispatchTable* instance_dispatch_table;
     devExts device_extensions;
+    vector<VkQueue> queues; // all queues under given device
     unordered_set<VkCommandBuffer> inFlightCmdBuffers;
     // Layer specific data
     unordered_map<VkSampler,             unique_ptr<SAMPLER_NODE>>           sampleMap;
@@ -111,7 +112,6 @@ struct layer_data {
     unordered_map<VkDeviceMemory,        VkImage>                            memImageMap;
     unordered_map<VkFence,               FENCE_NODE>                         fenceMap;
     unordered_map<VkQueue,               QUEUE_NODE>                         queueMap;
-    unordered_map<VkDevice,              DEVICE_NODE>                        deviceMap;
     unordered_map<VkEvent,               EVENT_NODE>                         eventMap;
     unordered_map<QueryObject,           bool>                               queryToStateMap;
     unordered_map<VkSemaphore,           uint32_t>                           semaphoreSignaledMap;
@@ -3202,10 +3202,13 @@ void trackCommandBuffers(layer_data* my_data, VkQueue queue, uint32_t cmdBufferC
     }
     if (queue_data != my_data->queueMap.end()) {
         for (uint32_t i = 0; i < cmdBufferCount; ++i) {
+            // Add cmdBuffers to both the global set and queue set
             for (auto secondaryCmdBuffer : my_data->commandBufferMap[pCmdBuffers[i]]->secondaryCommandBuffers) {
                 my_data->inFlightCmdBuffers.insert(secondaryCmdBuffer);
+                my_data->queueMap[queue].inFlightCmdBuffers.insert(secondaryCmdBuffer);
             }
             my_data->inFlightCmdBuffers.insert(pCmdBuffers[i]);
+            my_data->queueMap[queue].inFlightCmdBuffers.insert(pCmdBuffers[i]);
         }
     }
 }
@@ -3288,6 +3291,20 @@ VkBool32 cleanInFlightCmdBuffer(layer_data* my_data, VkCommandBuffer cmdBuffer)
     }
     return skip_call;
 }
+// Remove given cmd_buffer from inFlight set for given queue
+// If cmd_buffer is not inFlight on any other queues, also remove it from global inFlight set
+static inline void removeInFlightCmdBuffer(layer_data* dev_data, VkQueue queue, VkCommandBuffer cmd_buffer)
+{
+    dev_data->queueMap[queue].inFlightCmdBuffers.erase(cmd_buffer);
+    // Pull it off of global list initially, but if we find it in any other queue list, add it back in
+    dev_data->inFlightCmdBuffers.erase(cmd_buffer);
+    for (auto q : dev_data->queues) {
+        if ((q != queue) && (dev_data->queueMap[q].inFlightCmdBuffers.find(cmd_buffer) != dev_data->queueMap[q].inFlightCmdBuffers.end())) {
+            dev_data->inFlightCmdBuffers.insert(cmd_buffer);
+            break;
+        }
+    }
+}
 
 VK_LAYER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL vkWaitForFences(VkDevice device, uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout)
 {
@@ -3296,9 +3313,10 @@ VK_LAYER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL vkWaitForFences(VkDevice device,
     VkBool32 skip_call = VK_FALSE;
     if ((waitAll || fenceCount == 1) && result == VK_SUCCESS) {
         for (uint32_t i = 0; i < fenceCount; ++i) {
+            VkQueue fence_queue = dev_data->fenceMap[pFences[i]].queue;
             for (auto cmdBuffer : dev_data->fenceMap[pFences[i]].cmdBuffers) {
-                dev_data->inFlightCmdBuffers.erase(cmdBuffer);
                 skip_call |= cleanInFlightCmdBuffer(dev_data, cmdBuffer);
+                removeInFlightCmdBuffer(dev_data, fence_queue, cmdBuffer);
             }
         }
         decrementResources(dev_data, fenceCount, pFences);
@@ -3313,13 +3331,17 @@ VK_LAYER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL vkGetFenceStatus(VkDevice device,
 {
     layer_data* dev_data = get_my_data_ptr(get_dispatch_key(device), layer_data_map);
     VkResult result = dev_data->device_dispatch_table->GetFenceStatus(device, fence);
+    VkBool32 skip_call = VK_FALSE;
     if (result == VK_SUCCESS) {
+        auto fence_queue = dev_data->fenceMap[fence].queue;
         for (auto cmdBuffer : dev_data->fenceMap[fence].cmdBuffers) {
-            dev_data->inFlightCmdBuffers.erase(cmdBuffer);
-            cleanInFlightCmdBuffer(dev_data, cmdBuffer);
+            skip_call |= cleanInFlightCmdBuffer(dev_data, cmdBuffer);
+            removeInFlightCmdBuffer(dev_data, fence_queue, cmdBuffer);
         }
         decrementResources(dev_data, 1, &fence);
     }
+    if (VK_FALSE != skip_call)
+        return VK_ERROR_VALIDATION_FAILED_EXT;
     return result;
 }
 
@@ -3327,7 +3349,7 @@ VK_LAYER_EXPORT VKAPI_ATTR void VKAPI_CALL vkGetDeviceQueue(VkDevice device, uin
 {
     layer_data* dev_data = get_my_data_ptr(get_dispatch_key(device), layer_data_map);
     dev_data->device_dispatch_table->GetDeviceQueue(device, queueFamilyIndex, queueIndex, pQueue);
-    dev_data->deviceMap[device].queues.push_back(*pQueue);
+    dev_data->queues.push_back(*pQueue);
     dev_data->queueMap[*pQueue].device = device;
 }
 
@@ -3335,22 +3357,26 @@ VK_LAYER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL vkQueueWaitIdle(VkQueue queue)
 {
     layer_data* dev_data = get_my_data_ptr(get_dispatch_key(queue), layer_data_map);
     decrementResources(dev_data, queue);
-    // TODO : Is this a bug? Can't clear all device CmdBuffers on QueueWaitIdle
-    for (auto cmdBuffer : dev_data->inFlightCmdBuffers) {
-        cleanInFlightCmdBuffer(dev_data, cmdBuffer);
+    VkBool32 skip_call = VK_FALSE;
+    // Iterate over local set since we erase set members as we go in for loop
+    auto local_cb_set = dev_data->queueMap[queue].inFlightCmdBuffers;
+    for (auto cmdBuffer : local_cb_set) {
+        skip_call |= cleanInFlightCmdBuffer(dev_data, cmdBuffer);
+        removeInFlightCmdBuffer(dev_data, queue, cmdBuffer);
     }
-    dev_data->inFlightCmdBuffers.clear();
+    dev_data->queueMap[queue].inFlightCmdBuffers.clear();
+    if (VK_FALSE != skip_call)
+        return VK_ERROR_VALIDATION_FAILED_EXT;
     return dev_data->device_dispatch_table->QueueWaitIdle(queue);
 }
 
 VK_LAYER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL vkDeviceWaitIdle(VkDevice device)
 {
     layer_data* dev_data = get_my_data_ptr(get_dispatch_key(device), layer_data_map);
-    auto device_data = dev_data->deviceMap.find(device);
-    if (device_data != dev_data->deviceMap.end()) {
-        for (auto queue : device_data->second.queues) {
-            decrementResources(dev_data, queue);
-        }
+    for (auto queue : dev_data->queues) {
+        decrementResources(dev_data, queue);
+        // Clear all of the queue inFlightCmdBuffers (global set cleared below)
+        dev_data->queueMap[queue].inFlightCmdBuffers.clear();
     }
     for (auto cmdBuffer : dev_data->inFlightCmdBuffers) {
         cleanInFlightCmdBuffer(dev_data, cmdBuffer);
index 9f17bb9..45e8955 100755 (executable)
@@ -255,6 +255,7 @@ struct RENDER_PASS_NODE {
 class FENCE_NODE : public BASE_NODE {
   public:
     using BASE_NODE::in_use;
+    VkQueue queue;
     vector<VkCommandBuffer> cmdBuffers;
     bool needsSignaled;
     VkFence priorFence;
@@ -274,11 +275,6 @@ class QUEUE_NODE {
     unordered_set<VkCommandBuffer> inFlightCmdBuffers;
 };
 
-class DEVICE_NODE {
-  public:
-    vector<VkQueue> queues;
-};
-
 // Descriptor Data structures
 // Layout Node has the core layout data
 typedef struct _LAYOUT_NODE {