tu: Fix data race in userspace VMA management.
authorEmma Anholt <emma@anholt.net>
Tue, 25 Jul 2023 18:48:40 +0000 (11:48 -0700)
committerEmma Anholt <emma@anholt.net>
Tue, 25 Jul 2023 20:34:25 +0000 (13:34 -0700)
The sequence was two threads A and B on a shared VkDevice:

A: move a BO to zombie VMA list
A: drop the BO VMA lock
B: prepare to allocate a BO
B:   Lock BO VMA lock
B:     call tu_free_zombie_vma_locked()
B:       close the gem handle from the VMA list
B:   Drop BO VMA lock
B: allocate a BO, getting the recently-closed handle back.
B: initialize the BO struct for the new handle.
A: memset the BO struct to 0.

Multithreading in C is the worst.

Closes: #9049, #9247
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24324>

src/freedreno/ci/freedreno-a618-fails.txt
src/freedreno/ci/freedreno-a618-flakes.txt
src/freedreno/ci/freedreno-a630-fails.txt
src/freedreno/ci/freedreno-a630-flakes.txt
src/freedreno/vulkan/tu_knl_drm_msm.cc

index a5f0943..4d1c960 100644 (file)
@@ -378,5 +378,3 @@ dEQP-VK.api.driver_properties.conformance_version,Fail
 # After switch from 6.3.1 to 6.3.13
 dEQP-VK.binding_model.descriptor_buffer.basic.limits,Fail
 gmem-dEQP-VK.binding_model.descriptor_buffer.basic.limits,Fail
-dEQP-VK.api.object_management.multithreaded_per_thread_resources.event,Crash
-gmem-dEQP-VK.api.object_management.multithreaded_per_thread_resources.descriptor_pool_free_descriptor_set,Crash
index cc4df5d..0214f61 100644 (file)
@@ -213,15 +213,3 @@ spec@!opengl 3.2@gl-3.2-adj-prims cull-front pv-last
 spec@!opengl 3.2@gl-3.2-adj-prims line cull-back pv-last
 spec@!opengl 3.2@gl-3.2-adj-prims line cull-front pv-last
 spec@!opengl 3.2@gl-3.2-adj-prims pv-last
-
-# https://gitlab.freedesktop.org/mesa/mesa/-/issues/9049
-dEQP-VK.api.object_management.multithreaded_per_thread_resources.descriptor_pool
-dEQP-VK.api.object_management.multithreaded_per_thread_resources.descriptor_pool_free_descriptor_set
-dEQP-VK.api.object_management.multithreaded_per_thread_resources.device_memory_small
-dEQP-VK.api.object_management.multithreaded_per_thread_resources.event
-dEQP-VK.api.object_management.multithreaded_per_thread_resources.query_pool
-dEQP-VK.api.object_management.multithreaded_shared_resources.descriptor_pool
-dEQP-VK.api.object_management.multithreaded_shared_resources.device_memory_small
-dEQP-VK.api.object_management.multithreaded_shared_resources.event
-dEQP-VK.api.object_management.multithreaded_shared_resources.query_pool
-gmem-dEQP-VK.api.object_management.multithreaded_per_thread_resources.descriptor_pool_free_descriptor_set
index 17a6a82..4587bd7 100644 (file)
@@ -383,13 +383,5 @@ dynamic-dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_32_32.samples_2.d32_s
 # since Debian 12 (bookworm) uprev
 dEQP-VK.api.driver_properties.conformance_version,Fail
 
-# https://gitlab.freedesktop.org/mesa/mesa/-/issues/9049
-dEQP-VK.api.object_management.multithreaded_per_thread_resources.descriptor_pool_free_descriptor_set,Crash
-dEQP-VK.api.object_management.multithreaded_per_thread_resources.descriptor_pool,Crash
-dEQP-VK.api.object_management.multithreaded_per_thread_resources.event,Crash
-dEQP-VK.api.object_management.multithreaded_shared_resources.descriptor_pool,Crash
-dEQP-VK.api.object_management.multithreaded_shared_resources.event,Crash
-dEQP-VK.api.object_management.multithreaded_shared_resources.descriptor_pool_free_descriptor_set,Crash
-dEQP-VK.api.object_management.multithreaded_shared_resources.query_pool,Crash
 dEQP-VK.binding_model.descriptor_buffer.basic.limits,Fail
 gmem-dEQP-VK.binding_model.descriptor_buffer.basic.limits,Fail
index 680162b..9adb73f 100644 (file)
@@ -215,16 +215,3 @@ KHR-GL46.buffer_storage.map_persistent_flush
 
 # recently started flaking towards to UnexpectedPass
 spec@ext_external_objects@vk-depth-display@D24S8
-
-# Inconsistently crashing.
-# https://gitlab.freedesktop.org/mesa/mesa/-/issues/9049
-dEQP-VK.api.object_management.multithreaded_per_thread_resources.descriptor_pool
-dEQP-VK.api.object_management.multithreaded_per_thread_resources.descriptor_pool_free_descriptor_set
-dEQP-VK.api.object_management.multithreaded_per_thread_resources.device_memory_small
-dEQP-VK.api.object_management.multithreaded_per_thread_resources.event
-dEQP-VK.api.object_management.multithreaded_per_thread_resources.query_pool
-dEQP-VK.api.object_management.multithreaded_shared_resources.descriptor_pool
-dEQP-VK.api.object_management.multithreaded_shared_resources.device_memory_small
-dEQP-VK.api.object_management.multithreaded_shared_resources.event
-dEQP-VK.api.object_management.multithreaded_shared_resources.query_pool
-gmem-dEQP-VK.api.object_management.multithreaded_per_thread_resources.descriptor_pool_free_descriptor_set
index d093f57..0bc472f 100644 (file)
@@ -733,9 +733,13 @@ msm_bo_finish(struct tu_device *dev, struct tu_bo *bo)
       vma->iova = bo->iova;
       vma->size = bo->size;
       vma->fence = p_atomic_read(&dev->queues[0]->fence);
-      mtx_unlock(&dev->vma_mutex);
 
+      /* Must be cleared under the VMA mutex, or another thread could race to
+       * reap the VMA, closing the BO and letting a new GEM allocation produce
+       * this handle again.
+       */
       memset(bo, 0, sizeof(*bo));
+      mtx_unlock(&dev->vma_mutex);
    } else {
       /* Our BO structs are stored in a sparse array in the physical device,
        * so we don't want to free the BO pointer, instead we want to reset it