anv: change anv_batch_emit_merge to also do packing
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Wed, 6 Sep 2023 07:45:26 +0000 (10:45 +0300)
committerMarge Bot <emma+marge@anholt.net>
Wed, 6 Sep 2023 20:07:01 +0000 (20:07 +0000)
Instead of having that function do only merging of 2 sets of dwords,
it can also do the packing of the new dynamic values. This saves us a
bunch of local structures to declare and calling the packing functions
ourselves.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24536>

src/intel/vulkan/anv_private.h
src/intel/vulkan/genX_cmd_buffer.c
src/intel/vulkan/gfx8_cmd_buffer.c

index 3d8bb37..18849a6 100644 (file)
@@ -1603,18 +1603,17 @@ _anv_combine_address(struct anv_batch *batch, void *location,
       __dst;                                               \
    })
 
-#define anv_batch_emit_merge(batch, dwords0, dwords1)                   \
-   do {                                                                 \
-      uint32_t *dw;                                                     \
-                                                                        \
-      STATIC_ASSERT(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1));        \
-      dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0));         \
-      if (!dw)                                                          \
-         break;                                                         \
-      for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++)                \
-         dw[i] = (dwords0)[i] | (dwords1)[i];                           \
-      VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\
-   } while (0)
+#define anv_batch_emit_merge(batch, cmd, prepacked, name)               \
+   for (struct cmd name = { 0 },                                        \
+        *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd));    \
+        __builtin_expect(_dst != NULL, 1);                              \
+        ({ uint32_t _partial[__anv_cmd_length(cmd)];                    \
+           __anv_cmd_pack(cmd)(batch, _partial, &name);                 \
+           for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++)         \
+              ((uint32_t *)_dst)[i] = _partial[i] | (prepacked)[i];     \
+           VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
+           _dst = NULL;                                                 \
+         }))
 
 #define anv_batch_emit(batch, cmd, name)                            \
    for (struct cmd name = { __anv_cmd_header(cmd) },                    \
index ff868ef..dd8481b 100644 (file)
@@ -2887,46 +2887,42 @@ cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
        !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX))
       return;
 
-   /* Take dynamic primitive topology in to account with
-    *    3DSTATE_CLIP::ViewportXYClipTestEnable
-    */
-   const VkPolygonMode dynamic_raster_mode =
-      genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline,
-                                dyn->rs.polygon_mode,
-                                dyn->ia.primitive_topology);
-   const bool xy_clip_test_enable =
-      (dynamic_raster_mode == VK_POLYGON_MODE_FILL);
-
-   struct GENX(3DSTATE_CLIP) clip = {
-      GENX(3DSTATE_CLIP_header),
-      .APIMode = dyn->vp.depth_clip_negative_one_to_one ? APIMODE_OGL : APIMODE_D3D,
-      .ViewportXYClipTestEnable = xy_clip_test_enable,
-   };
+   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
 
-   ANV_SETUP_PROVOKING_VERTEX(clip, dyn->rs.provoking_vertex);
+   anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_CLIP),
+                        pipeline->gfx8.clip, clip) {
+      /* Take dynamic primitive topology in to account with
+       *    3DSTATE_CLIP::ViewportXYClipTestEnable
+       */
+      const VkPolygonMode dynamic_raster_mode =
+         genX(raster_polygon_mode)(pipeline,
+                                   dyn->rs.polygon_mode,
+                                   dyn->ia.primitive_topology);
+      const bool xy_clip_test_enable =
+         (dynamic_raster_mode == VK_POLYGON_MODE_FILL);
 
-   uint32_t dwords[GENX(3DSTATE_CLIP_length)];
+      clip.APIMode = dyn->vp.depth_clip_negative_one_to_one ?
+                     APIMODE_OGL : APIMODE_D3D;
+      clip.ViewportXYClipTestEnable = xy_clip_test_enable;
 
-   /* TODO(mesh): Multiview. */
-   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
-   if (anv_pipeline_is_primitive(pipeline)) {
-      const struct brw_vue_prog_data *last =
-         anv_pipeline_get_last_vue_prog_data(pipeline);
-      if (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT) {
-         clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ?
-                               dyn->vp.viewport_count - 1 : 0;
-      }
-   } else if (anv_pipeline_is_mesh(pipeline)) {
-      const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
-      if (mesh_prog_data->map.start_dw[VARYING_SLOT_VIEWPORT] >= 0) {
-         clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ?
-                               dyn->vp.viewport_count - 1 : 0;
+      ANV_SETUP_PROVOKING_VERTEX(clip, dyn->rs.provoking_vertex);
+
+      /* TODO(mesh): Multiview. */
+      if (anv_pipeline_is_primitive(pipeline)) {
+         const struct brw_vue_prog_data *last =
+            anv_pipeline_get_last_vue_prog_data(pipeline);
+         if (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT) {
+            clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ?
+                                  dyn->vp.viewport_count - 1 : 0;
+         }
+      } else if (anv_pipeline_is_mesh(pipeline)) {
+         const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
+         if (mesh_prog_data->map.start_dw[VARYING_SLOT_VIEWPORT] >= 0) {
+            clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ?
+                                  dyn->vp.viewport_count - 1 : 0;
+         }
       }
    }
-
-   GENX(3DSTATE_CLIP_pack)(NULL, dwords, &clip);
-   anv_batch_emit_merge(&cmd_buffer->batch, dwords,
-                        pipeline->gfx8.clip);
 }
 
 static void
@@ -3295,64 +3291,58 @@ cmd_buffer_emit_streamout(struct anv_cmd_buffer *cmd_buffer)
       &cmd_buffer->vk.dynamic_graphics_state;
    struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
 
-   uint32_t dwords[GENX(3DSTATE_STREAMOUT_length)];
-
-   struct GENX(3DSTATE_STREAMOUT) so = {
-      GENX(3DSTATE_STREAMOUT_header),
-      .RenderingDisable = dyn->rs.rasterizer_discard_enable,
-      .RenderStreamSelect = dyn->rs.rasterization_stream,
-   };
+   genX(streamout_prologue)(cmd_buffer);
 
+   anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT),
+                        pipeline->gfx8.streamout_state, so) {
+      so.RenderingDisable = dyn->rs.rasterizer_discard_enable;
+      so.RenderStreamSelect = dyn->rs.rasterization_stream;
 #if INTEL_NEEDS_WA_18022508906
-   /* Wa_18022508906 :
-    *
-    * SKL PRMs, Volume 7: 3D-Media-GPGPU, Stream Output Logic (SOL) Stage:
-    *
-    * SOL_INT::Render_Enable =
-    *   (3DSTATE_STREAMOUT::Force_Rending == Force_On) ||
-    *   (
-    *     (3DSTATE_STREAMOUT::Force_Rending != Force_Off) &&
-    *     !(3DSTATE_GS::Enable && 3DSTATE_GS::Output Vertex Size == 0) &&
-    *     !3DSTATE_STREAMOUT::API_Render_Disable &&
-    *     (
-    *       3DSTATE_DEPTH_STENCIL_STATE::Stencil_TestEnable ||
-    *       3DSTATE_DEPTH_STENCIL_STATE::Depth_TestEnable ||
-    *       3DSTATE_DEPTH_STENCIL_STATE::Depth_WriteEnable ||
-    *       3DSTATE_PS_EXTRA::PS_Valid ||
-    *       3DSTATE_WM::Legacy Depth_Buffer_Clear ||
-    *       3DSTATE_WM::Legacy Depth_Buffer_Resolve_Enable ||
-    *       3DSTATE_WM::Legacy Hierarchical_Depth_Buffer_Resolve_Enable
-    *     )
-    *   )
-    *
-    * If SOL_INT::Render_Enable is false, the SO stage will not forward any
-    * topologies down the pipeline. Which is not what we want for occlusion
-    * queries.
-    *
-    * Here we force rendering to get SOL_INT::Render_Enable when occlusion
-    * queries are active.
-    */
-   if (!so.RenderingDisable && cmd_buffer->state.gfx.n_occlusion_queries > 0)
-      so.ForceRendering = Force_on;
+      /* Wa_18022508906 :
+       *
+       * SKL PRMs, Volume 7: 3D-Media-GPGPU, Stream Output Logic (SOL) Stage:
+       *
+       * SOL_INT::Render_Enable =
+       *   (3DSTATE_STREAMOUT::Force_Rending == Force_On) ||
+       *   (
+       *     (3DSTATE_STREAMOUT::Force_Rending != Force_Off) &&
+       *     !(3DSTATE_GS::Enable && 3DSTATE_GS::Output Vertex Size == 0) &&
+       *     !3DSTATE_STREAMOUT::API_Render_Disable &&
+       *     (
+       *       3DSTATE_DEPTH_STENCIL_STATE::Stencil_TestEnable ||
+       *       3DSTATE_DEPTH_STENCIL_STATE::Depth_TestEnable ||
+       *       3DSTATE_DEPTH_STENCIL_STATE::Depth_WriteEnable ||
+       *       3DSTATE_PS_EXTRA::PS_Valid ||
+       *       3DSTATE_WM::Legacy Depth_Buffer_Clear ||
+       *       3DSTATE_WM::Legacy Depth_Buffer_Resolve_Enable ||
+       *       3DSTATE_WM::Legacy Hierarchical_Depth_Buffer_Resolve_Enable
+       *     )
+       *   )
+       *
+       * If SOL_INT::Render_Enable is false, the SO stage will not forward any
+       * topologies down the pipeline. Which is not what we want for occlusion
+       * queries.
+       *
+       * Here we force rendering to get SOL_INT::Render_Enable when occlusion
+       * queries are active.
+       */
+      if (!so.RenderingDisable && cmd_buffer->state.gfx.n_occlusion_queries > 0)
+         so.ForceRendering = Force_on;
 #endif
 
-   switch (dyn->rs.provoking_vertex) {
-   case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
-      so.ReorderMode = LEADING;
-      break;
+      switch (dyn->rs.provoking_vertex) {
+      case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
+         so.ReorderMode = LEADING;
+         break;
 
-   case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
-      so.ReorderMode = TRAILING;
-      break;
+      case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
+         so.ReorderMode = TRAILING;
+         break;
 
-   default:
-      unreachable("Invalid provoking vertex mode");
+      default:
+         unreachable("Invalid provoking vertex mode");
+      }
    }
-
-   genX(streamout_prologue)(cmd_buffer);
-
-   GENX(3DSTATE_STREAMOUT_pack)(NULL, dwords, &so);
-   anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx8.streamout_state);
 }
 
 ALWAYS_INLINE static void
index 89e698d..7de7f41 100644 (file)
@@ -289,29 +289,23 @@ genX(emit_gs)(struct anv_cmd_buffer *cmd_buffer)
       return;
    }
 
-   uint32_t dwords[GENX(3DSTATE_GS_length)];
    const struct vk_dynamic_graphics_state *dyn =
       &cmd_buffer->vk.dynamic_graphics_state;
+   anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_GS),
+                        pipeline->gfx8.gs, gs) {
+      switch (dyn->rs.provoking_vertex) {
+      case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
+         gs.ReorderMode = LEADING;
+         break;
 
-   struct GENX(3DSTATE_GS) gs = {
-      GENX(3DSTATE_GS_header),
-   };
-
-   switch (dyn->rs.provoking_vertex) {
-   case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
-      gs.ReorderMode = LEADING;
-      break;
-
-   case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
-      gs.ReorderMode = TRAILING;
-      break;
+      case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
+         gs.ReorderMode = TRAILING;
+         break;
 
-   default:
-      unreachable("Invalid provoking vertex mode");
+      default:
+         unreachable("Invalid provoking vertex mode");
+      }
    }
-
-   GENX(3DSTATE_GS_pack)(NULL, dwords, &gs);
-   anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx8.gs);
 }
 
 static void
@@ -552,40 +546,35 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH) ||
        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX) ||
        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS)) {
-      uint32_t sf_dw[GENX(3DSTATE_SF_length)];
-      struct GENX(3DSTATE_SF) sf = {
-         GENX(3DSTATE_SF_header),
-      };
-
-      ANV_SETUP_PROVOKING_VERTEX(sf, dyn->rs.provoking_vertex);
+      anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_SF),
+                           pipeline->gfx8.sf, sf) {
+         ANV_SETUP_PROVOKING_VERTEX(sf, dyn->rs.provoking_vertex);
 
-      sf.LineWidth = dyn->rs.line.width;
+         sf.LineWidth = dyn->rs.line.width;
 
-      /**
-       * From the Vulkan Spec:
-       *
-       *    "VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT specifies that the depth
-       *     bias representation is a factor of constant r equal to 1."
-       *
-       * From the SKL PRMs, Volume 7: 3D-Media-GPGPU, Depth Offset:
-       *
-       *    "When UNORM Depth Buffer is at Output Merger (or no Depth Buffer):
-       *
-       *     Bias = GlobalDepthOffsetConstant * r + GlobalDepthOffsetScale * MaxDepthSlope
-       *
-       *     Where r is the minimum representable value > 0 in the depth
-       *     buffer format, converted to float32 (note: If state bit Legacy
-       *     Global Depth Bias Enable is set, the r term will be forced to
-       *     1.0)"
-       *
-       * When VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT is set, enable
-       * LegacyGlobalDepthBiasEnable.
-       */
-      sf.LegacyGlobalDepthBiasEnable =
-         dyn->rs.depth_bias.representation == VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT;
-
-      GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
-      anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gfx8.sf);
+         /**
+          * From the Vulkan Spec:
+          *
+          *    "VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT specifies that the depth
+          *     bias representation is a factor of constant r equal to 1."
+          *
+          * From the SKL PRMs, Volume 7: 3D-Media-GPGPU, Depth Offset:
+          *
+          *    "When UNORM Depth Buffer is at Output Merger (or no Depth Buffer):
+          *
+          *     Bias = GlobalDepthOffsetConstant * r + GlobalDepthOffsetScale * MaxDepthSlope
+          *
+          *     Where r is the minimum representable value > 0 in the depth
+          *     buffer format, converted to float32 (note: If state bit Legacy
+          *     Global Depth Bias Enable is set, the r term will be forced to
+          *     1.0)"
+          *
+          * When VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT is set, enable
+          * LegacyGlobalDepthBiasEnable.
+          */
+         sf.LegacyGlobalDepthBiasEnable =
+            dyn->rs.depth_bias.representation == VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT;
+      }
    }
 
    if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
@@ -639,30 +628,31 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
       bool depth_clip_enable =
          vk_rasterization_state_depth_clip_enable(&dyn->rs);
 
-      uint32_t raster_dw[GENX(3DSTATE_RASTER_length)];
-      struct GENX(3DSTATE_RASTER) raster = {
-         GENX(3DSTATE_RASTER_header),
-         .APIMode = api_mode,
-         .DXMultisampleRasterizationEnable = msaa_raster_enable,
-         .AntialiasingEnable = aa_enable,
-         .CullMode     = genX(vk_to_intel_cullmode)[dyn->rs.cull_mode],
-         .FrontWinding = genX(vk_to_intel_front_face)[dyn->rs.front_face],
-         .GlobalDepthOffsetEnableSolid       = dyn->rs.depth_bias.enable,
-         .GlobalDepthOffsetEnableWireframe   = dyn->rs.depth_bias.enable,
-         .GlobalDepthOffsetEnablePoint       = dyn->rs.depth_bias.enable,
-         .GlobalDepthOffsetConstant          = dyn->rs.depth_bias.constant,
-         .GlobalDepthOffsetScale             = dyn->rs.depth_bias.slope,
-         .GlobalDepthOffsetClamp             = dyn->rs.depth_bias.clamp,
-         .FrontFaceFillMode = genX(vk_to_intel_fillmode)[dyn->rs.polygon_mode],
-         .BackFaceFillMode = genX(vk_to_intel_fillmode)[dyn->rs.polygon_mode],
-         .ViewportZFarClipTestEnable = depth_clip_enable,
-         .ViewportZNearClipTestEnable = depth_clip_enable,
-         .ConservativeRasterizationEnable = dyn->rs.conservative_mode !=
-                                            VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT,
-      };
-      GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster);
-      anv_batch_emit_merge(&cmd_buffer->batch, raster_dw,
-                           pipeline->gfx8.raster);
+      anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_RASTER),
+                           pipeline->gfx8.raster, raster) {
+         raster.APIMode = api_mode;
+         raster.DXMultisampleRasterizationEnable   = msaa_raster_enable;
+         raster.AntialiasingEnable                 = aa_enable;
+         raster.CullMode                           = genX(vk_to_intel_cullmode)[
+                                                        dyn->rs.cull_mode];
+         raster.FrontWinding                       = genX(vk_to_intel_front_face)[
+                                                        dyn->rs.front_face];
+         raster.GlobalDepthOffsetEnableSolid       = dyn->rs.depth_bias.enable;
+         raster.GlobalDepthOffsetEnableWireframe   = dyn->rs.depth_bias.enable;
+         raster.GlobalDepthOffsetEnablePoint       = dyn->rs.depth_bias.enable;
+         raster.GlobalDepthOffsetConstant          = dyn->rs.depth_bias.constant;
+         raster.GlobalDepthOffsetScale             = dyn->rs.depth_bias.slope;
+         raster.GlobalDepthOffsetClamp             = dyn->rs.depth_bias.clamp;
+         raster.FrontFaceFillMode                  = genX(vk_to_intel_fillmode)[
+                                                        dyn->rs.polygon_mode];
+         raster.BackFaceFillMode                   = genX(vk_to_intel_fillmode)[
+                                                        dyn->rs.polygon_mode];
+         raster.ViewportZFarClipTestEnable         = depth_clip_enable;
+         raster.ViewportZNearClipTestEnable        = depth_clip_enable;
+         raster.ConservativeRasterizationEnable    =
+            dyn->rs.conservative_mode !=
+            VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT;
+      }
    }
 
    if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE) ||
@@ -822,19 +812,14 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
       /* 3DSTATE_WM in the hope we can avoid spawning fragment shaders
        * threads.
        */
-      uint32_t wm_dwords[GENX(3DSTATE_WM_length)];
-      struct GENX(3DSTATE_WM) wm = {
-         GENX(3DSTATE_WM_header),
-
-         .ForceThreadDispatchEnable = anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
-                                      (pipeline->force_fragment_thread_dispatch ||
-                                       anv_cmd_buffer_all_color_write_masked(cmd_buffer)) ?
-                                      ForceON : 0,
-         .LineStippleEnable = dyn->rs.line.stipple.enable,
+      anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_WM),
+                           pipeline->gfx8.wm, wm) {
+         wm.ForceThreadDispatchEnable = anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
+                                        (pipeline->force_fragment_thread_dispatch ||
+                                        anv_cmd_buffer_all_color_write_masked(cmd_buffer)) ?
+                                        ForceON : 0;
+         wm.LineStippleEnable = dyn->rs.line.stipple.enable;
       };
-      GENX(3DSTATE_WM_pack)(NULL, wm_dwords, &wm);
-
-      anv_batch_emit_merge(&cmd_buffer->batch, wm_dwords, pipeline->gfx8.wm);
    }
 
    if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||