swr: Add path to draw directly from client memory without copy.

author Bruce Cherniak <bruce.cherniak@intel.com>

Wed, 12 Jul 2017 20:04:47 +0000 (15:04 -0500)

committer Tim Rowley <timothy.o.rowley@intel.com>

Wed, 12 Jul 2017 21:56:40 +0000 (16:56 -0500)
author Bruce Cherniak <bruce.cherniak@intel.com>
Wed, 12 Jul 2017 20:04:47 +0000 (15:04 -0500)
committer Tim Rowley <timothy.o.rowley@intel.com>
Wed, 12 Jul 2017 21:56:40 +0000 (16:56 -0500)
diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h

index 753cbf3..8bed78f 100644 (file)
--- a/src/gallium/drivers/swr/swr_context.h
+++ b/src/gallium/drivers/swr/swr_context.h
@@ -51,6 +51,7 @@
  #define SWR_NEW_FRAMEBUFFER (1 << 15)
  #define SWR_NEW_CLIP (1 << 16)
  #define SWR_NEW_SO (1 << 17)
+#define SWR_LARGE_CLIENT_DRAW (1<<18) // Indicates client draw will block
  
  namespace std
  {
diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp

index eae4b57..62ad3f7 100644 (file)
--- a/src/gallium/drivers/swr/swr_draw.cpp
+++ b/src/gallium/drivers/swr/swr_draw.cpp
@@ -188,6 +188,15 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
                                     info->instance_count,
                                     info->start,
                                     info->start_instance);
+
+   /* On large client-buffer draw, we used client buffer directly, without
+    * copy.  Block until draw is finished.
+    * VMD is an example application that benefits from this. */
+   if (ctx->dirty & SWR_LARGE_CLIENT_DRAW) {
+      struct swr_screen *screen = swr_screen(pipe->screen);
+      swr_fence_submit(ctx, screen->flush_fence);
+      swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
+   }
  }
  
  
diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp

index 9352181..c8ff810 100644 (file)
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -61,6 +61,9 @@
  #define SWR_MAX_TEXTURE_CUBE_LEVELS 14  /* 8K x 8K for now */
  #define SWR_MAX_TEXTURE_ARRAY_LAYERS 512 /* 8K x 512 / 8K x 8K x 512 */
  
+/* Default max client_copy_limit */
+#define SWR_CLIENT_COPY_LIMIT 32768
+
  /* Flag indicates creation of alternate surface, to prevent recursive loop
   * in resource creation when msaa_force_enable is set. */
  #define SWR_RESOURCE_FLAG_ALT_SURFACE (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
@@ -1067,6 +1070,16 @@ swr_destroy_screen(struct pipe_screen *p_screen)
  static void
  swr_validate_env_options(struct swr_screen *screen)
  {
+   /* The client_copy_limit sets a maximum on the amount of user-buffer memory
+    * copied to scratch space on a draw.  Past this, the draw will access
+    * user-buffer directly and then block.  This is faster than queuing many
+    * large client draws. */
+   screen->client_copy_limit = SWR_CLIENT_COPY_LIMIT;
+   int client_copy_limit =
+      debug_get_num_option("SWR_CLIENT_COPY_LIMIT", SWR_CLIENT_COPY_LIMIT);
+   if (client_copy_limit > 0)
+      screen->client_copy_limit = client_copy_limit;
+
     /* XXX msaa under development, disable by default for now */
     screen->msaa_max_count = 0; /* was SWR_MAX_NUM_MULTISAMPLES; */
  
diff --git a/src/gallium/drivers/swr/swr_screen.h b/src/gallium/drivers/swr/swr_screen.h

index a10f426..a11ea9f 100644 (file)
--- a/src/gallium/drivers/swr/swr_screen.h
+++ b/src/gallium/drivers/swr/swr_screen.h
@@ -43,8 +43,10 @@ struct swr_screen {
  
     struct sw_winsys *winsys;
  
+   /* Configurable environment settings */
     boolean msaa_force_enable;
     uint8_t msaa_max_count;
+   uint32_t client_copy_limit;
  
     HANDLE hJitMgr;
  
diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp

index 4eef606..534f3c5 100644 (file)
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -1267,12 +1267,20 @@ swr_update_derived(struct pipe_context *pipe,
              partial_inbounds = 0;
              min_vertex_index = info.min_index;
  
-            /* Copy only needed vertices to scratch space */
              size = AlignUp(size, 4);
-            const void *ptr = (const uint8_t *) vb->buffer.user + base;
-            ptr = (uint8_t *)swr_copy_to_scratch_space(
-               ctx, &ctx->scratch->vertex_buffer, ptr, size);
-            p_data = (const uint8_t *)ptr - base;
+            /* If size of client memory copy is too large, don't copy. The
+             * draw will access user-buffer directly and then block.  This is
+             * faster than queuing many large client draws. */
+            if (size >= screen->client_copy_limit) {
+               post_update_dirty_flags |= SWR_LARGE_CLIENT_DRAW;
+               p_data = (const uint8_t *) vb->buffer.user;
+            } else {
+               /* Copy only needed vertices to scratch space */
+               const void *ptr = (const uint8_t *) vb->buffer.user + base;
+               ptr = (uint8_t *)swr_copy_to_scratch_space(
+                     ctx, &ctx->scratch->vertex_buffer, ptr, size);
+               p_data = (const uint8_t *)ptr - base;
+            }
           }
  
           swrVertexBuffers[i] = {0};
@@ -1311,12 +1319,19 @@ swr_update_derived(struct pipe_context *pipe,
  
              size = info.count * pitch;
              size = AlignUp(size, 4);
-
-            /* Copy indices to scratch space */
-            const void *ptr = info.index.user;
-            ptr = swr_copy_to_scratch_space(
-               ctx, &ctx->scratch->index_buffer, ptr, size);
-            p_data = (const uint8_t *)ptr;
+            /* If size of client memory copy is too large, don't copy. The
+             * draw will access user-buffer directly and then block.  This is
+             * faster than queuing many large client draws. */
+            if (size >= screen->client_copy_limit) {
+               post_update_dirty_flags |= SWR_LARGE_CLIENT_DRAW;
+               p_data = (const uint8_t *) info.index.user;
+            } else {
+               /* Copy indices to scratch space */
+               const void *ptr = info.index.user;
+               ptr = swr_copy_to_scratch_space(
+                     ctx, &ctx->scratch->index_buffer, ptr, size);
+               p_data = (const uint8_t *)ptr;
+            }
           }
  
           SWR_INDEX_BUFFER_STATE swrIndexBuffer;
author	Bruce Cherniak <bruce.cherniak@intel.com>
	Wed, 12 Jul 2017 20:04:47 +0000 (15:04 -0500)
committer	Tim Rowley <timothy.o.rowley@intel.com>
	Wed, 12 Jul 2017 21:56:40 +0000 (16:56 -0500)
src/gallium/drivers/swr/swr_context.h		patch \| blob \| history
src/gallium/drivers/swr/swr_draw.cpp		patch \| blob \| history
src/gallium/drivers/swr/swr_screen.cpp		patch \| blob \| history
src/gallium/drivers/swr/swr_screen.h		patch \| blob \| history
src/gallium/drivers/swr/swr_state.cpp		patch \| blob \| history