gallium/u_threaded: don't map big VRAM buffers for the first upload directly

author Marek Olšák <marek.olsak@amd.com>

Wed, 1 Nov 2017 23:00:53 +0000 (00:00 +0100)

committer Marek Olšák <marek.olsak@amd.com>

Mon, 6 Nov 2017 15:23:20 +0000 (16:23 +0100)
author Marek Olšák <marek.olsak@amd.com>
Wed, 1 Nov 2017 23:00:53 +0000 (00:00 +0100)
committer Marek Olšák <marek.olsak@amd.com>
Mon, 6 Nov 2017 15:23:20 +0000 (16:23 +0100)
diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c

index 0f23258..ccce12b 100644 (file)
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -1284,6 +1284,20 @@ tc_improve_map_buffer_flags(struct threaded_context *tc,
     if (usage & tc_flags)
        return usage;
  
+   /* Use the staging upload if it's preferred. */
+   if (usage & (PIPE_TRANSFER_DISCARD_RANGE |
+                PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
+       !(usage & PIPE_TRANSFER_PERSISTENT) &&
+       /* Try not to decrement the counter if it's not positive. Still racy,
+        * but it makes it harder to wrap the counter from INT_MIN to INT_MAX. */
+       tres->max_forced_staging_uploads > 0 &&
+       p_atomic_dec_return(&tres->max_forced_staging_uploads) >= 0) {
+      usage &= ~(PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
+                 PIPE_TRANSFER_UNSYNCHRONIZED);
+
+      return usage | tc_flags | PIPE_TRANSFER_DISCARD_RANGE;
+   }
+
     /* Sparse buffers can't be mapped directly and can't be reallocated
      * (fully invalidated). That may just be a radeonsi limitation, but
      * the threaded context must obey it with radeonsi.
diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h

index 8977b03..ac7bc3d 100644 (file)
--- a/src/gallium/auxiliary/util/u_threaded_context.h
+++ b/src/gallium/auxiliary/util/u_threaded_context.h
@@ -241,6 +241,12 @@ struct threaded_resource {
      * pointers. */
     bool        is_shared;
     bool is_user_ptr;
+
+   /* If positive, prefer DISCARD_RANGE with a staging buffer over any other
+    * method of CPU access when map flags allow it. Useful for buffers that
+    * are too large for the visible VRAM window.
+    */
+   int max_forced_staging_uploads;
  };
  
  struct threaded_transfer {
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c

index 67daaa4..92521f4 100644 (file)
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -191,10 +191,15 @@ void si_init_resource_fields(struct r600_common_screen *rscreen,
         res->vram_usage = 0;
         res->gart_usage = 0;
  
-       if (res->domains & RADEON_DOMAIN_VRAM)
+       if (res->domains & RADEON_DOMAIN_VRAM) {
                 res->vram_usage = size;
-       else if (res->domains & RADEON_DOMAIN_GTT)
+
+               res->b.max_forced_staging_uploads =
+                       rscreen->info.has_dedicated_vram &&
+                       size >= rscreen->info.vram_vis_size / 4 ? 1 : 0;
+       } else if (res->domains & RADEON_DOMAIN_GTT) {
                 res->gart_usage = size;
+       }
  }
  
  bool si_alloc_resource(struct r600_common_screen *rscreen,
@@ -289,6 +294,7 @@ void si_replace_buffer_storage(struct pipe_context *ctx,
         pb_reference(&rdst->buf, rsrc->buf);
         rdst->gpu_address = rsrc->gpu_address;
         rdst->b.b.bind = rsrc->b.b.bind;
+       rdst->b.max_forced_staging_uploads = rsrc->b.max_forced_staging_uploads;
         rdst->flags = rsrc->flags;
  
         assert(rdst->vram_usage == rsrc->vram_usage);
author	Marek Olšák <marek.olsak@amd.com>
	Wed, 1 Nov 2017 23:00:53 +0000 (00:00 +0100)
committer	Marek Olšák <marek.olsak@amd.com>
	Mon, 6 Nov 2017 15:23:20 +0000 (16:23 +0100)
src/gallium/auxiliary/util/u_threaded_context.c		patch \| blob \| history
src/gallium/auxiliary/util/u_threaded_context.h		patch \| blob \| history
src/gallium/drivers/radeon/r600_buffer_common.c		patch \| blob \| history