winsys/amdgpu: do not synchronize unsynchronized buffers

author Nicolai Hähnle <nicolai.haehnle@amd.com>

Wed, 7 Sep 2016 09:01:17 +0000 (11:01 +0200)

committer Nicolai Hähnle <nicolai.haehnle@amd.com>

Tue, 27 Sep 2016 14:45:11 +0000 (16:45 +0200)
author Nicolai Hähnle <nicolai.haehnle@amd.com>
Wed, 7 Sep 2016 09:01:17 +0000 (11:01 +0200)
committer Nicolai Hähnle <nicolai.haehnle@amd.com>
Tue, 27 Sep 2016 14:45:11 +0000 (16:45 +0200)
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c

index 0dbd0fb..37a7ba1 100644 (file)
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -73,43 +73,58 @@ static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
     }
  
     if (timeout == 0) {
+      unsigned idle_fences;
+      bool buffer_idle;
+
        pipe_mutex_lock(ws->bo_fence_lock);
-      if (bo->fence) {
-         if (amdgpu_fence_wait(bo->fence, 0, false)) {
-            /* Release the idle fence to avoid checking it again later. */
-            amdgpu_fence_reference(&bo->fence, NULL);
-         } else {
-            pipe_mutex_unlock(ws->bo_fence_lock);
-            return false;
-         }
+
+      for (idle_fences = 0; idle_fences < bo->num_fences; ++idle_fences) {
+         if (!amdgpu_fence_wait(bo->fences[idle_fences], 0, false))
+            break;
        }
+
+      /* Release the idle fences to avoid checking them again later. */
+      for (unsigned i = 0; i < idle_fences; ++i)
+         amdgpu_fence_reference(&bo->fences[i], NULL);
+
+      memmove(&bo->fences[0], &bo->fences[idle_fences],
+              (bo->num_fences - idle_fences) * sizeof(*bo->fences));
+      bo->num_fences -= idle_fences;
+
+      buffer_idle = !bo->num_fences;
        pipe_mutex_unlock(ws->bo_fence_lock);
-      return true;
  
+      return buffer_idle;
     } else {
-      struct pipe_fence_handle *fence = NULL;
-      bool fence_idle = false;
        bool buffer_idle = true;
  
-      /* Take a reference to the fences, so that we can wait for it
-       * without the lock. */
        pipe_mutex_lock(ws->bo_fence_lock);
-      amdgpu_fence_reference(&fence, bo->fence);
-      pipe_mutex_unlock(ws->bo_fence_lock);
+      while (bo->num_fences && buffer_idle) {
+         struct pipe_fence_handle *fence = NULL;
+         bool fence_idle = false;
  
-      /* Now wait for the fence. */
-      if (fence) {
+         amdgpu_fence_reference(&fence, bo->fences[0]);
+
+         /* Wait for the fence. */
+         pipe_mutex_unlock(ws->bo_fence_lock);
           if (amdgpu_fence_wait(fence, abs_timeout, true))
              fence_idle = true;
           else
              buffer_idle = false;
-      }
+         pipe_mutex_lock(ws->bo_fence_lock);
+
+         /* Release an idle fence to avoid checking it again later, keeping in
+          * mind that the fence array may have been modified by other threads.
+          */
+         if (fence_idle && bo->num_fences && bo->fences[0] == fence) {
+            amdgpu_fence_reference(&bo->fences[0], NULL);
+            memmove(&bo->fences[0], &bo->fences[1],
+                    (bo->num_fences - 1) * sizeof(*bo->fences));
+            bo->num_fences--;
+         }
  
-      /* Release idle fences to avoid checking them again later. */
-      pipe_mutex_lock(ws->bo_fence_lock);
-      if (fence == bo->fence && fence_idle)
-         amdgpu_fence_reference(&bo->fence, NULL);
-      amdgpu_fence_reference(&fence, NULL);
+         amdgpu_fence_reference(&fence, NULL);
+      }
        pipe_mutex_unlock(ws->bo_fence_lock);
  
        return buffer_idle;
@@ -122,6 +137,16 @@ static enum radeon_bo_domain amdgpu_bo_get_initial_domain(
     return ((struct amdgpu_winsys_bo*)buf)->initial_domain;
  }
  
+static void amdgpu_bo_remove_fences(struct amdgpu_winsys_bo *bo)
+{
+   for (unsigned i = 0; i < bo->num_fences; ++i)
+      amdgpu_fence_reference(&bo->fences[i], NULL);
+
+   FREE(bo->fences);
+   bo->num_fences = 0;
+   bo->max_fences = 0;
+}
+
  void amdgpu_bo_destroy(struct pb_buffer *_buf)
  {
     struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
@@ -135,7 +160,7 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
     amdgpu_va_range_free(bo->va_handle);
     amdgpu_bo_free(bo->bo);
  
-   amdgpu_fence_reference(&bo->fence, NULL);
+   amdgpu_bo_remove_fences(bo);
  
     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
        bo->ws->allocated_vram -= align64(bo->base.size, bo->ws->info.gart_page_size);
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h

index 07403dd..93cc83a 100644 (file)
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
@@ -62,8 +62,10 @@ struct amdgpu_winsys_bo {
      */
     volatile int is_shared; /* bool (int for atomicity) */
  
-   /* Fence for buffer synchronization. */
-   struct pipe_fence_handle *fence;
+   /* Fences for buffer synchronization. */
+   unsigned num_fences;
+   unsigned max_fences;
+   struct pipe_fence_handle **fences;
  
     struct list_head global_list_item;
  };
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c

index e2f2974..d5ea705 100644 (file)
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -801,33 +801,33 @@ static unsigned amdgpu_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
  
  DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false)
  
-/* Since the kernel driver doesn't synchronize execution between different
- * rings automatically, we have to add fence dependencies manually.
- */
-static void amdgpu_add_fence_dependencies(struct amdgpu_cs *acs)
+static void amdgpu_add_fence_dependency(struct amdgpu_cs *acs,
+                                        struct amdgpu_cs_buffer *buffer)
  {
     struct amdgpu_cs_context *cs = acs->csc;
-   int i;
+   struct amdgpu_winsys_bo *bo = buffer->bo;
+   struct amdgpu_cs_fence *dep;
+   unsigned new_num_fences = 0;
  
-   cs->request.number_of_dependencies = 0;
-
-   for (i = 0; i < cs->num_buffers; i++) {
-      struct amdgpu_cs_fence *dep;
+   for (unsigned j = 0; j < bo->num_fences; ++j) {
+      struct amdgpu_fence *bo_fence = (void *)bo->fences[j];
        unsigned idx;
  
-      struct amdgpu_fence *bo_fence = (void *)cs->buffers[i].bo->fence;
-      if (!bo_fence)
-         continue;
-
        if (bo_fence->ctx == acs->ctx &&
-          bo_fence->fence.ip_type == cs->request.ip_type &&
-          bo_fence->fence.ip_instance == cs->request.ip_instance &&
-          bo_fence->fence.ring == cs->request.ring)
+         bo_fence->fence.ip_type == cs->request.ip_type &&
+         bo_fence->fence.ip_instance == cs->request.ip_instance &&
+         bo_fence->fence.ring == cs->request.ring)
           continue;
  
        if (amdgpu_fence_wait((void *)bo_fence, 0, false))
           continue;
  
+      amdgpu_fence_reference(&bo->fences[new_num_fences], bo->fences[j]);
+      new_num_fences++;
+
+      if (!(buffer->usage & RADEON_USAGE_SYNCHRONIZED))
+         continue;
+
        if (bo_fence->submission_in_progress)
           os_wait_until_zero(&bo_fence->submission_in_progress,
                              PIPE_TIMEOUT_INFINITE);
@@ -844,6 +844,52 @@ static void amdgpu_add_fence_dependencies(struct amdgpu_cs *acs)
        dep = &cs->request.dependencies[idx];
        memcpy(dep, &bo_fence->fence, sizeof(*dep));
     }
+
+   for (unsigned j = new_num_fences; j < bo->num_fences; ++j)
+      amdgpu_fence_reference(&bo->fences[j], NULL);
+
+   bo->num_fences = new_num_fences;
+}
+
+/* Since the kernel driver doesn't synchronize execution between different
+ * rings automatically, we have to add fence dependencies manually.
+ */
+static void amdgpu_add_fence_dependencies(struct amdgpu_cs *acs)
+{
+   struct amdgpu_cs_context *cs = acs->csc;
+   int i;
+
+   cs->request.number_of_dependencies = 0;
+
+   for (i = 0; i < cs->num_buffers; i++)
+      amdgpu_add_fence_dependency(acs, &cs->buffers[i]);
+}
+
+static void amdgpu_add_fence(struct amdgpu_winsys_bo *bo,
+                             struct pipe_fence_handle *fence)
+{
+   if (bo->num_fences >= bo->max_fences) {
+      unsigned new_max_fences = MAX2(1, bo->max_fences * 2);
+      struct pipe_fence_handle **new_fences =
+         REALLOC(bo->fences,
+                 bo->num_fences * sizeof(*new_fences),
+                 new_max_fences * sizeof(*new_fences));
+      if (new_fences) {
+         bo->fences = new_fences;
+         bo->max_fences = new_max_fences;
+      } else {
+         fprintf(stderr, "amdgpu_add_fence: allocation failure, dropping fence\n");
+         if (!bo->num_fences)
+            return;
+
+         bo->num_fences--; /* prefer to keep a more recent fence if possible */
+         amdgpu_fence_reference(&bo->fences[bo->num_fences], NULL);
+      }
+   }
+
+   bo->fences[bo->num_fences] = NULL;
+   amdgpu_fence_reference(&bo->fences[bo->num_fences], fence);
+   bo->num_fences++;
  }
  
  void amdgpu_cs_submit_ib(void *job, int thread_index)
@@ -1031,9 +1077,9 @@ static int amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
        pipe_mutex_lock(ws->bo_fence_lock);
        amdgpu_add_fence_dependencies(cs);
        for (i = 0; i < num_buffers; i++) {
-         p_atomic_inc(&cur->buffers[i].bo->num_active_ioctls);
-         amdgpu_fence_reference(&cur->buffers[i].bo->fence,
-                                cur->fence);
+         struct amdgpu_winsys_bo *bo = cur->buffers[i].bo;
+         p_atomic_inc(&bo->num_active_ioctls);
+         amdgpu_add_fence(bo, cur->fence);
        }
        pipe_mutex_unlock(ws->bo_fence_lock);
author	Nicolai Hähnle <nicolai.haehnle@amd.com>
	Wed, 7 Sep 2016 09:01:17 +0000 (11:01 +0200)
committer	Nicolai Hähnle <nicolai.haehnle@amd.com>
	Tue, 27 Sep 2016 14:45:11 +0000 (16:45 +0200)
src/gallium/winsys/amdgpu/drm/amdgpu_bo.c		patch \| blob \| history
src/gallium/winsys/amdgpu/drm/amdgpu_bo.h		patch \| blob \| history
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c		patch \| blob \| history