anv: move all dynamic state emission to cmd_buffer_flush_dynamic_state
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Sun, 30 Jul 2023 07:34:43 +0000 (10:34 +0300)
committerMarge Bot <emma+marge@anholt.net>
Wed, 6 Sep 2023 20:07:01 +0000 (20:07 +0000)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24536>

src/intel/vulkan/genX_cmd_buffer.c
src/intel/vulkan/gfx8_cmd_buffer.c

index dd8481b..2dc8f13 100644 (file)
@@ -33,8 +33,6 @@
 #include "genxml/gen_macros.h"
 #include "genxml/genX_pack.h"
 #include "genxml/genX_rt_pack.h"
-#include "common/intel_guardband.h"
-#include "compiler/brw_prim.h"
 #include "common/intel_genX_state.h"
 
 #include "ds/intel_tracepoints.h"
@@ -2873,277 +2871,6 @@ cmd_buffer_flush_mesh_inline_data(struct anv_cmd_buffer *cmd_buffer,
 }
 #endif
 
-static void
-cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
-{
-   const struct vk_dynamic_graphics_state *dyn =
-      &cmd_buffer->vk.dynamic_graphics_state;
-
-   if (!(cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) &&
-       !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) &&
-       !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE) &&
-       !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT) &&
-       !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_POLYGON_MODE) &&
-       !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX))
-      return;
-
-   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
-
-   anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_CLIP),
-                        pipeline->gfx8.clip, clip) {
-      /* Take dynamic primitive topology in to account with
-       *    3DSTATE_CLIP::ViewportXYClipTestEnable
-       */
-      const VkPolygonMode dynamic_raster_mode =
-         genX(raster_polygon_mode)(pipeline,
-                                   dyn->rs.polygon_mode,
-                                   dyn->ia.primitive_topology);
-      const bool xy_clip_test_enable =
-         (dynamic_raster_mode == VK_POLYGON_MODE_FILL);
-
-      clip.APIMode = dyn->vp.depth_clip_negative_one_to_one ?
-                     APIMODE_OGL : APIMODE_D3D;
-      clip.ViewportXYClipTestEnable = xy_clip_test_enable;
-
-      ANV_SETUP_PROVOKING_VERTEX(clip, dyn->rs.provoking_vertex);
-
-      /* TODO(mesh): Multiview. */
-      if (anv_pipeline_is_primitive(pipeline)) {
-         const struct brw_vue_prog_data *last =
-            anv_pipeline_get_last_vue_prog_data(pipeline);
-         if (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT) {
-            clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ?
-                                  dyn->vp.viewport_count - 1 : 0;
-         }
-      } else if (anv_pipeline_is_mesh(pipeline)) {
-         const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
-         if (mesh_prog_data->map.start_dw[VARYING_SLOT_VIEWPORT] >= 0) {
-            clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ?
-                                  dyn->vp.viewport_count - 1 : 0;
-         }
-      }
-   }
-}
-
-static void
-cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer)
-{
-   struct anv_instance *instance = cmd_buffer->device->physical->instance;
-   struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
-   const struct vk_dynamic_graphics_state *dyn =
-      &cmd_buffer->vk.dynamic_graphics_state;
-   uint32_t count = dyn->vp.viewport_count;
-   const VkViewport *viewports = dyn->vp.viewports;
-   struct anv_state sf_clip_state =
-      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64);
-
-   const float scale = dyn->vp.depth_clip_negative_one_to_one ? 0.5f : 1.0f;
-
-   for (uint32_t i = 0; i < count; i++) {
-      const VkViewport *vp = &viewports[i];
-
-      /* The gfx7 state struct has just the matrix and guardband fields, the
-       * gfx8 struct adds the min/max viewport fields. */
-      struct GENX(SF_CLIP_VIEWPORT) sfv = {
-         .ViewportMatrixElementm00 = vp->width / 2,
-         .ViewportMatrixElementm11 = vp->height / 2,
-         .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) * scale,
-         .ViewportMatrixElementm30 = vp->x + vp->width / 2,
-         .ViewportMatrixElementm31 = vp->y + vp->height / 2,
-         .ViewportMatrixElementm32 = dyn->vp.depth_clip_negative_one_to_one ?
-            (vp->minDepth + vp->maxDepth) * scale : vp->minDepth,
-         .XMinClipGuardband = -1.0f,
-         .XMaxClipGuardband = 1.0f,
-         .YMinClipGuardband = -1.0f,
-         .YMaxClipGuardband = 1.0f,
-         .XMinViewPort = vp->x,
-         .XMaxViewPort = vp->x + vp->width - 1,
-         .YMinViewPort = MIN2(vp->y, vp->y + vp->height),
-         .YMaxViewPort = MAX2(vp->y, vp->y + vp->height) - 1,
-      };
-
-      /* Fix depth test misrenderings by lowering translated depth range */
-      if (instance->lower_depth_range_rate != 1.0f)
-         sfv.ViewportMatrixElementm32 *= instance->lower_depth_range_rate;
-
-      const uint32_t fb_size_max = 1 << 14;
-      uint32_t x_min = 0, x_max = fb_size_max;
-      uint32_t y_min = 0, y_max = fb_size_max;
-
-      /* If we have a valid renderArea, include that */
-      if (gfx->render_area.extent.width > 0 &&
-          gfx->render_area.extent.height > 0) {
-         x_min = MAX2(x_min, gfx->render_area.offset.x);
-         x_max = MIN2(x_max, gfx->render_area.offset.x +
-                             gfx->render_area.extent.width);
-         y_min = MAX2(y_min, gfx->render_area.offset.y);
-         y_max = MIN2(y_max, gfx->render_area.offset.y +
-                             gfx->render_area.extent.height);
-      }
-
-      /* The client is required to have enough scissors for whatever it sets
-       * as ViewportIndex but it's possible that they've got more viewports
-       * set from a previous command.  Also, from the Vulkan 1.3.207:
-       *
-       *    "The application must ensure (using scissor if necessary) that
-       *    all rendering is contained within the render area."
-       *
-       * If the client doesn't set a scissor, that basically means it
-       * guarantees everything is in-bounds already.  If we end up using a
-       * guardband of [-1, 1] in that case, there shouldn't be much loss.
-       * It's theoretically possible that they could do all their clipping
-       * with clip planes but that'd be a bit odd.
-       */
-      if (i < dyn->vp.scissor_count) {
-         const VkRect2D *scissor = &dyn->vp.scissors[i];
-         x_min = MAX2(x_min, scissor->offset.x);
-         x_max = MIN2(x_max, scissor->offset.x + scissor->extent.width);
-         y_min = MAX2(y_min, scissor->offset.y);
-         y_max = MIN2(y_max, scissor->offset.y + scissor->extent.height);
-      }
-
-      /* Only bother calculating the guardband if our known render area is
-       * less than the maximum size.  Otherwise, it will calculate [-1, 1]
-       * anyway but possibly with precision loss.
-       */
-      if (x_min > 0 || x_max < fb_size_max ||
-          y_min > 0 || y_max < fb_size_max) {
-         intel_calculate_guardband_size(x_min, x_max, y_min, y_max,
-                                        sfv.ViewportMatrixElementm00,
-                                        sfv.ViewportMatrixElementm11,
-                                        sfv.ViewportMatrixElementm30,
-                                        sfv.ViewportMatrixElementm31,
-                                        &sfv.XMinClipGuardband,
-                                        &sfv.XMaxClipGuardband,
-                                        &sfv.YMinClipGuardband,
-                                        &sfv.YMaxClipGuardband);
-      }
-
-      GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, &sfv);
-   }
-
-   anv_batch_emit(&cmd_buffer->batch,
-                  GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) {
-      clip.SFClipViewportPointer = sf_clip_state.offset;
-   }
-}
-
-static void
-cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer)
-{
-   const struct vk_dynamic_graphics_state *dyn =
-      &cmd_buffer->vk.dynamic_graphics_state;
-   uint32_t count = dyn->vp.viewport_count;
-   const VkViewport *viewports = dyn->vp.viewports;
-   struct anv_state cc_state =
-      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32);
-
-   for (uint32_t i = 0; i < count; i++) {
-      const VkViewport *vp = &viewports[i];
-
-      /* From the Vulkan spec:
-       *
-       *    "It is valid for minDepth to be greater than or equal to
-       *    maxDepth."
-       */
-      float min_depth = MIN2(vp->minDepth, vp->maxDepth);
-      float max_depth = MAX2(vp->minDepth, vp->maxDepth);
-
-      struct GENX(CC_VIEWPORT) cc_viewport = {
-         .MinimumDepth = dyn->rs.depth_clamp_enable ? min_depth : 0.0f,
-         .MaximumDepth = dyn->rs.depth_clamp_enable ? max_depth : 1.0f,
-      };
-
-      GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);
-   }
-
-   anv_batch_emit(&cmd_buffer->batch,
-                  GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
-      cc.CCViewportPointer = cc_state.offset;
-   }
-}
-
-static void
-cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer)
-{
-   struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
-   const struct vk_dynamic_graphics_state *dyn =
-      &cmd_buffer->vk.dynamic_graphics_state;
-   uint32_t count = dyn->vp.scissor_count;
-   const VkRect2D *scissors = dyn->vp.scissors;
-   const VkViewport *viewports = dyn->vp.viewports;
-
-   /* Wa_1409725701:
-    *    "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
-    *    stored as an array of up to 16 elements. The location of first
-    *    element of the array, as specified by Pointer to SCISSOR_RECT, should
-    *    be aligned to a 64-byte boundary.
-    */
-   uint32_t alignment = 64;
-   struct anv_state scissor_state =
-      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, alignment);
-
-   for (uint32_t i = 0; i < count; i++) {
-      const VkRect2D *s = &scissors[i];
-      const VkViewport *vp = &viewports[i];
-
-      /* Since xmax and ymax are inclusive, we have to have xmax < xmin or
-       * ymax < ymin for empty clips.  In case clip x, y, width height are all
-       * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't
-       * what we want. Just special case empty clips and produce a canonical
-       * empty clip. */
-      static const struct GENX(SCISSOR_RECT) empty_scissor = {
-         .ScissorRectangleYMin = 1,
-         .ScissorRectangleXMin = 1,
-         .ScissorRectangleYMax = 0,
-         .ScissorRectangleXMax = 0
-      };
-
-      const int max = 0xffff;
-
-      uint32_t y_min = MAX2(s->offset.y, MIN2(vp->y, vp->y + vp->height));
-      uint32_t x_min = MAX2(s->offset.x, vp->x);
-      int64_t y_max = MIN2(s->offset.y + s->extent.height - 1,
-                       MAX2(vp->y, vp->y + vp->height) - 1);
-      int64_t x_max = MIN2(s->offset.x + s->extent.width - 1,
-                       vp->x + vp->width - 1);
-
-      y_max = CLAMP(y_max, 0, INT16_MAX >> 1);
-      x_max = CLAMP(x_max, 0, INT16_MAX >> 1);
-
-      /* Do this math using int64_t so overflow gets clamped correctly. */
-      if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
-         y_min = CLAMP((uint64_t) y_min, gfx->render_area.offset.y, max);
-         x_min = CLAMP((uint64_t) x_min, gfx->render_area.offset.x, max);
-         y_max = CLAMP((uint64_t) y_max, 0,
-                       gfx->render_area.offset.y +
-                       gfx->render_area.extent.height - 1);
-         x_max = CLAMP((uint64_t) x_max, 0,
-                       gfx->render_area.offset.x +
-                       gfx->render_area.extent.width - 1);
-      }
-
-      const struct GENX(SCISSOR_RECT) scissor = {
-         .ScissorRectangleYMin = y_min,
-         .ScissorRectangleXMin = x_min,
-         .ScissorRectangleYMax = y_max,
-         .ScissorRectangleXMax = x_max
-      };
-
-      if (s->extent.width <= 0 || s->extent.height <= 0) {
-         GENX(SCISSOR_RECT_pack)(NULL, scissor_state.map + i * 8,
-                                 &empty_scissor);
-      } else {
-         GENX(SCISSOR_RECT_pack)(NULL, scissor_state.map + i * 8, &scissor);
-      }
-   }
-
-   anv_batch_emit(&cmd_buffer->batch,
-                  GENX(3DSTATE_SCISSOR_STATE_POINTERS), ssp) {
-      ssp.ScissorRectPointer = scissor_state.offset;
-   }
-}
-
 ALWAYS_INLINE void
 genX(batch_emit_pipe_control)(struct anv_batch *batch,
                               const struct intel_device_info *devinfo,
@@ -3260,91 +2987,6 @@ genX(cmd_buffer_set_preemption)(struct anv_cmd_buffer *cmd_buffer, bool value)
 #endif
 }
 
-static void
-genX(streamout_prologue)(struct anv_cmd_buffer *cmd_buffer)
-{
-#if GFX_VERx10 >= 120
-   /* Wa_16013994831 - Disable preemption during streamout, enable back
-    * again if XFB not used by the current pipeline.
-    *
-    * Although this workaround applies to Gfx12+, we already disable object
-    * level preemption for another reason in genX_state.c so we can skip this
-    * for Gfx12.
-    */
-   if (!intel_needs_workaround(cmd_buffer->device->info, 16013994831))
-      return;
-
-   if (cmd_buffer->state.gfx.pipeline->uses_xfb) {
-      genX(cmd_buffer_set_preemption)(cmd_buffer, false);
-      return;
-   }
-
-   if (!cmd_buffer->state.gfx.object_preemption)
-      genX(cmd_buffer_set_preemption)(cmd_buffer, true);
-#endif
-}
-
-static void
-cmd_buffer_emit_streamout(struct anv_cmd_buffer *cmd_buffer)
-{
-   const struct vk_dynamic_graphics_state *dyn =
-      &cmd_buffer->vk.dynamic_graphics_state;
-   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
-
-   genX(streamout_prologue)(cmd_buffer);
-
-   anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT),
-                        pipeline->gfx8.streamout_state, so) {
-      so.RenderingDisable = dyn->rs.rasterizer_discard_enable;
-      so.RenderStreamSelect = dyn->rs.rasterization_stream;
-#if INTEL_NEEDS_WA_18022508906
-      /* Wa_18022508906 :
-       *
-       * SKL PRMs, Volume 7: 3D-Media-GPGPU, Stream Output Logic (SOL) Stage:
-       *
-       * SOL_INT::Render_Enable =
-       *   (3DSTATE_STREAMOUT::Force_Rending == Force_On) ||
-       *   (
-       *     (3DSTATE_STREAMOUT::Force_Rending != Force_Off) &&
-       *     !(3DSTATE_GS::Enable && 3DSTATE_GS::Output Vertex Size == 0) &&
-       *     !3DSTATE_STREAMOUT::API_Render_Disable &&
-       *     (
-       *       3DSTATE_DEPTH_STENCIL_STATE::Stencil_TestEnable ||
-       *       3DSTATE_DEPTH_STENCIL_STATE::Depth_TestEnable ||
-       *       3DSTATE_DEPTH_STENCIL_STATE::Depth_WriteEnable ||
-       *       3DSTATE_PS_EXTRA::PS_Valid ||
-       *       3DSTATE_WM::Legacy Depth_Buffer_Clear ||
-       *       3DSTATE_WM::Legacy Depth_Buffer_Resolve_Enable ||
-       *       3DSTATE_WM::Legacy Hierarchical_Depth_Buffer_Resolve_Enable
-       *     )
-       *   )
-       *
-       * If SOL_INT::Render_Enable is false, the SO stage will not forward any
-       * topologies down the pipeline. Which is not what we want for occlusion
-       * queries.
-       *
-       * Here we force rendering to get SOL_INT::Render_Enable when occlusion
-       * queries are active.
-       */
-      if (!so.RenderingDisable && cmd_buffer->state.gfx.n_occlusion_queries > 0)
-         so.ForceRendering = Force_on;
-#endif
-
-      switch (dyn->rs.provoking_vertex) {
-      case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
-         so.ReorderMode = LEADING;
-         break;
-
-      case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
-         so.ReorderMode = TRAILING;
-         break;
-
-      default:
-         unreachable("Invalid provoking vertex mode");
-      }
-   }
-}
-
 ALWAYS_INLINE static void
 genX(emit_hs)(struct anv_cmd_buffer *cmd_buffer)
 {
@@ -3632,41 +3274,6 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer)
                                           dirty & VK_SHADER_STAGE_ALL_GRAPHICS);
    }
 
-   cmd_buffer_emit_clip(cmd_buffer);
-
-   if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
-                                       ANV_CMD_DIRTY_XFB_ENABLE)) ||
-       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE) ||
-       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZATION_STREAM) ||
-       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX))
-      cmd_buffer_emit_streamout(cmd_buffer);
-
-   if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
-                                       ANV_CMD_DIRTY_RENDER_TARGETS)) ||
-       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS) ||
-       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS) ||
-       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE) ||
-       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE)) {
-      cmd_buffer_emit_viewport(cmd_buffer);
-      cmd_buffer_emit_depth_viewport(cmd_buffer);
-      cmd_buffer_emit_scissor(cmd_buffer);
-   }
-
-   if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
-       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY)) {
-      uint32_t topology;
-      if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
-         topology = _3DPRIM_PATCHLIST(dyn->ts.patch_control_points);
-      else
-         topology = genX(vk_to_intel_primitive_type)[dyn->ia.primitive_topology];
-
-      cmd_buffer->state.gfx.primitive_topology = topology;
-
-      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) {
-         vft.PrimitiveTopologyType = topology;
-      }
-   }
-
    if (any_dynamic_state_dirty || cmd_buffer->state.gfx.dirty)
       genX(cmd_buffer_flush_dynamic_state)(cmd_buffer);
 }
index 7de7f41..172c596 100644 (file)
@@ -31,6 +31,8 @@
 
 #include "genxml/gen_macros.h"
 #include "genxml/genX_pack.h"
+#include "common/intel_guardband.h"
+#include "compiler/brw_prim.h"
 
 void
 genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
@@ -488,6 +490,361 @@ genX(rasterization_mode)(VkPolygonMode raster_mode,
    }
 }
 
+static void
+cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
+{
+   const struct vk_dynamic_graphics_state *dyn =
+      &cmd_buffer->vk.dynamic_graphics_state;
+   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+
+   if (!(cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) &&
+       !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) &&
+       !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE) &&
+       !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT) &&
+       !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_POLYGON_MODE) &&
+       !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX))
+      return;
+
+   anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_CLIP),
+                        pipeline->gfx8.clip, clip) {
+      /* Take dynamic primitive topology in to account with
+       *    3DSTATE_CLIP::ViewportXYClipTestEnable
+       */
+      const VkPolygonMode dynamic_raster_mode =
+         genX(raster_polygon_mode)(pipeline,
+                                   dyn->rs.polygon_mode,
+                                   dyn->ia.primitive_topology);
+      const bool xy_clip_test_enable =
+         (dynamic_raster_mode == VK_POLYGON_MODE_FILL);
+
+      clip.APIMode = dyn->vp.depth_clip_negative_one_to_one ?
+                     APIMODE_OGL : APIMODE_D3D;
+      clip.ViewportXYClipTestEnable = xy_clip_test_enable;
+
+      ANV_SETUP_PROVOKING_VERTEX(clip, dyn->rs.provoking_vertex);
+
+      /* TODO(mesh): Multiview. */
+      if (anv_pipeline_is_primitive(pipeline)) {
+         const struct brw_vue_prog_data *last =
+            anv_pipeline_get_last_vue_prog_data(pipeline);
+         if (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT) {
+            clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ?
+                                  dyn->vp.viewport_count - 1 : 0;
+         }
+      } else if (anv_pipeline_is_mesh(pipeline)) {
+         const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
+         if (mesh_prog_data->map.start_dw[VARYING_SLOT_VIEWPORT] >= 0) {
+            clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ?
+                                  dyn->vp.viewport_count - 1 : 0;
+         }
+      }
+   }
+}
+
+static void
+genX(streamout_prologue)(struct anv_cmd_buffer *cmd_buffer)
+{
+#if GFX_VERx10 >= 120
+   /* Wa_16013994831 - Disable preemption during streamout, enable back
+    * again if XFB not used by the current pipeline.
+    *
+    * Although this workaround applies to Gfx12+, we already disable object
+    * level preemption for another reason in genX_state.c so we can skip this
+    * for Gfx12.
+    */
+   if (!intel_needs_workaround(cmd_buffer->device->info, 16013994831))
+      return;
+
+   if (cmd_buffer->state.gfx.pipeline->uses_xfb) {
+      genX(cmd_buffer_set_preemption)(cmd_buffer, false);
+      return;
+   }
+
+   if (!cmd_buffer->state.gfx.object_preemption)
+      genX(cmd_buffer_set_preemption)(cmd_buffer, true);
+#endif
+}
+
+static void
+cmd_buffer_emit_streamout(struct anv_cmd_buffer *cmd_buffer)
+{
+   const struct vk_dynamic_graphics_state *dyn =
+      &cmd_buffer->vk.dynamic_graphics_state;
+   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+
+   genX(streamout_prologue)(cmd_buffer);
+
+   anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT),
+                        pipeline->gfx8.streamout_state, so) {
+      so.RenderingDisable = dyn->rs.rasterizer_discard_enable;
+      so.RenderStreamSelect = dyn->rs.rasterization_stream;
+#if INTEL_NEEDS_WA_18022508906
+      /* Wa_18022508906 :
+       *
+       * SKL PRMs, Volume 7: 3D-Media-GPGPU, Stream Output Logic (SOL) Stage:
+       *
+       * SOL_INT::Render_Enable =
+       *   (3DSTATE_STREAMOUT::Force_Rending == Force_On) ||
+       *   (
+       *     (3DSTATE_STREAMOUT::Force_Rending != Force_Off) &&
+       *     !(3DSTATE_GS::Enable && 3DSTATE_GS::Output Vertex Size == 0) &&
+       *     !3DSTATE_STREAMOUT::API_Render_Disable &&
+       *     (
+       *       3DSTATE_DEPTH_STENCIL_STATE::Stencil_TestEnable ||
+       *       3DSTATE_DEPTH_STENCIL_STATE::Depth_TestEnable ||
+       *       3DSTATE_DEPTH_STENCIL_STATE::Depth_WriteEnable ||
+       *       3DSTATE_PS_EXTRA::PS_Valid ||
+       *       3DSTATE_WM::Legacy Depth_Buffer_Clear ||
+       *       3DSTATE_WM::Legacy Depth_Buffer_Resolve_Enable ||
+       *       3DSTATE_WM::Legacy Hierarchical_Depth_Buffer_Resolve_Enable
+       *     )
+       *   )
+       *
+       * If SOL_INT::Render_Enable is false, the SO stage will not forward any
+       * topologies down the pipeline. Which is not what we want for occlusion
+       * queries.
+       *
+       * Here we force rendering to get SOL_INT::Render_Enable when occlusion
+       * queries are active.
+       */
+      if (!so.RenderingDisable && cmd_buffer->state.gfx.n_occlusion_queries > 0)
+         so.ForceRendering = Force_on;
+#endif
+
+      switch (dyn->rs.provoking_vertex) {
+      case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
+         so.ReorderMode = LEADING;
+         break;
+
+      case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
+         so.ReorderMode = TRAILING;
+         break;
+
+      default:
+         unreachable("Invalid provoking vertex mode");
+      }
+   }
+}
+
+static void
+cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer)
+{
+   struct anv_instance *instance = cmd_buffer->device->physical->instance;
+   struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
+   const struct vk_dynamic_graphics_state *dyn =
+      &cmd_buffer->vk.dynamic_graphics_state;
+   uint32_t count = dyn->vp.viewport_count;
+   const VkViewport *viewports = dyn->vp.viewports;
+   struct anv_state sf_clip_state =
+      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64);
+
+   const float scale = dyn->vp.depth_clip_negative_one_to_one ? 0.5f : 1.0f;
+
+   for (uint32_t i = 0; i < count; i++) {
+      const VkViewport *vp = &viewports[i];
+
+      /* The gfx7 state struct has just the matrix and guardband fields, the
+       * gfx8 struct adds the min/max viewport fields. */
+      struct GENX(SF_CLIP_VIEWPORT) sfv = {
+         .ViewportMatrixElementm00 = vp->width / 2,
+         .ViewportMatrixElementm11 = vp->height / 2,
+         .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) * scale,
+         .ViewportMatrixElementm30 = vp->x + vp->width / 2,
+         .ViewportMatrixElementm31 = vp->y + vp->height / 2,
+         .ViewportMatrixElementm32 = dyn->vp.depth_clip_negative_one_to_one ?
+            (vp->minDepth + vp->maxDepth) * scale : vp->minDepth,
+         .XMinClipGuardband = -1.0f,
+         .XMaxClipGuardband = 1.0f,
+         .YMinClipGuardband = -1.0f,
+         .YMaxClipGuardband = 1.0f,
+         .XMinViewPort = vp->x,
+         .XMaxViewPort = vp->x + vp->width - 1,
+         .YMinViewPort = MIN2(vp->y, vp->y + vp->height),
+         .YMaxViewPort = MAX2(vp->y, vp->y + vp->height) - 1,
+      };
+
+      /* Fix depth test misrenderings by lowering translated depth range */
+      if (instance->lower_depth_range_rate != 1.0f)
+         sfv.ViewportMatrixElementm32 *= instance->lower_depth_range_rate;
+
+      const uint32_t fb_size_max = 1 << 14;
+      uint32_t x_min = 0, x_max = fb_size_max;
+      uint32_t y_min = 0, y_max = fb_size_max;
+
+      /* If we have a valid renderArea, include that */
+      if (gfx->render_area.extent.width > 0 &&
+          gfx->render_area.extent.height > 0) {
+         x_min = MAX2(x_min, gfx->render_area.offset.x);
+         x_max = MIN2(x_max, gfx->render_area.offset.x +
+                             gfx->render_area.extent.width);
+         y_min = MAX2(y_min, gfx->render_area.offset.y);
+         y_max = MIN2(y_max, gfx->render_area.offset.y +
+                             gfx->render_area.extent.height);
+      }
+
+      /* The client is required to have enough scissors for whatever it sets
+       * as ViewportIndex but it's possible that they've got more viewports
+       * set from a previous command.  Also, from the Vulkan 1.3.207:
+       *
+       *    "The application must ensure (using scissor if necessary) that
+       *    all rendering is contained within the render area."
+       *
+       * If the client doesn't set a scissor, that basically means it
+       * guarantees everything is in-bounds already.  If we end up using a
+       * guardband of [-1, 1] in that case, there shouldn't be much loss.
+       * It's theoretically possible that they could do all their clipping
+       * with clip planes but that'd be a bit odd.
+       */
+      if (i < dyn->vp.scissor_count) {
+         const VkRect2D *scissor = &dyn->vp.scissors[i];
+         x_min = MAX2(x_min, scissor->offset.x);
+         x_max = MIN2(x_max, scissor->offset.x + scissor->extent.width);
+         y_min = MAX2(y_min, scissor->offset.y);
+         y_max = MIN2(y_max, scissor->offset.y + scissor->extent.height);
+      }
+
+      /* Only bother calculating the guardband if our known render area is
+       * less than the maximum size.  Otherwise, it will calculate [-1, 1]
+       * anyway but possibly with precision loss.
+       */
+      if (x_min > 0 || x_max < fb_size_max ||
+          y_min > 0 || y_max < fb_size_max) {
+         intel_calculate_guardband_size(x_min, x_max, y_min, y_max,
+                                        sfv.ViewportMatrixElementm00,
+                                        sfv.ViewportMatrixElementm11,
+                                        sfv.ViewportMatrixElementm30,
+                                        sfv.ViewportMatrixElementm31,
+                                        &sfv.XMinClipGuardband,
+                                        &sfv.XMaxClipGuardband,
+                                        &sfv.YMinClipGuardband,
+                                        &sfv.YMaxClipGuardband);
+      }
+
+      GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, &sfv);
+   }
+
+   anv_batch_emit(&cmd_buffer->batch,
+                  GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) {
+      clip.SFClipViewportPointer = sf_clip_state.offset;
+   }
+}
+
+static void
+cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer)
+{
+   const struct vk_dynamic_graphics_state *dyn =
+      &cmd_buffer->vk.dynamic_graphics_state;
+   uint32_t count = dyn->vp.viewport_count;
+   const VkViewport *viewports = dyn->vp.viewports;
+   struct anv_state cc_state =
+      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32);
+
+   for (uint32_t i = 0; i < count; i++) {
+      const VkViewport *vp = &viewports[i];
+
+      /* From the Vulkan spec:
+       *
+       *    "It is valid for minDepth to be greater than or equal to
+       *    maxDepth."
+       */
+      float min_depth = MIN2(vp->minDepth, vp->maxDepth);
+      float max_depth = MAX2(vp->minDepth, vp->maxDepth);
+
+      struct GENX(CC_VIEWPORT) cc_viewport = {
+         .MinimumDepth = dyn->rs.depth_clamp_enable ? min_depth : 0.0f,
+         .MaximumDepth = dyn->rs.depth_clamp_enable ? max_depth : 1.0f,
+      };
+
+      GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);
+   }
+
+   anv_batch_emit(&cmd_buffer->batch,
+                  GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
+      cc.CCViewportPointer = cc_state.offset;
+   }
+}
+
+static void
+cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer)
+{
+   struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
+   const struct vk_dynamic_graphics_state *dyn =
+      &cmd_buffer->vk.dynamic_graphics_state;
+   uint32_t count = dyn->vp.scissor_count;
+   const VkRect2D *scissors = dyn->vp.scissors;
+   const VkViewport *viewports = dyn->vp.viewports;
+
+   /* Wa_1409725701:
+    *    "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
+    *    stored as an array of up to 16 elements. The location of first
+    *    element of the array, as specified by Pointer to SCISSOR_RECT, should
+    *    be aligned to a 64-byte boundary.
+    */
+   uint32_t alignment = 64;
+   struct anv_state scissor_state =
+      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, alignment);
+
+   for (uint32_t i = 0; i < count; i++) {
+      const VkRect2D *s = &scissors[i];
+      const VkViewport *vp = &viewports[i];
+
+      /* Since xmax and ymax are inclusive, we have to have xmax < xmin or
+       * ymax < ymin for empty clips.  In case clip x, y, width height are all
+       * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't
+       * what we want. Just special case empty clips and produce a canonical
+       * empty clip. */
+      static const struct GENX(SCISSOR_RECT) empty_scissor = {
+         .ScissorRectangleYMin = 1,
+         .ScissorRectangleXMin = 1,
+         .ScissorRectangleYMax = 0,
+         .ScissorRectangleXMax = 0
+      };
+
+      const int max = 0xffff;
+
+      uint32_t y_min = MAX2(s->offset.y, MIN2(vp->y, vp->y + vp->height));
+      uint32_t x_min = MAX2(s->offset.x, vp->x);
+      int64_t y_max = MIN2(s->offset.y + s->extent.height - 1,
+                       MAX2(vp->y, vp->y + vp->height) - 1);
+      int64_t x_max = MIN2(s->offset.x + s->extent.width - 1,
+                       vp->x + vp->width - 1);
+
+      y_max = CLAMP(y_max, 0, INT16_MAX >> 1);
+      x_max = CLAMP(x_max, 0, INT16_MAX >> 1);
+
+      /* Do this math using int64_t so overflow gets clamped correctly. */
+      if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
+         y_min = CLAMP((uint64_t) y_min, gfx->render_area.offset.y, max);
+         x_min = CLAMP((uint64_t) x_min, gfx->render_area.offset.x, max);
+         y_max = CLAMP((uint64_t) y_max, 0,
+                       gfx->render_area.offset.y +
+                       gfx->render_area.extent.height - 1);
+         x_max = CLAMP((uint64_t) x_max, 0,
+                       gfx->render_area.offset.x +
+                       gfx->render_area.extent.width - 1);
+      }
+
+      const struct GENX(SCISSOR_RECT) scissor = {
+         .ScissorRectangleYMin = y_min,
+         .ScissorRectangleXMin = x_min,
+         .ScissorRectangleYMax = y_max,
+         .ScissorRectangleXMax = x_max
+      };
+
+      if (s->extent.width <= 0 || s->extent.height <= 0) {
+         GENX(SCISSOR_RECT_pack)(NULL, scissor_state.map + i * 8,
+                                 &empty_scissor);
+      } else {
+         GENX(SCISSOR_RECT_pack)(NULL, scissor_state.map + i * 8, &scissor);
+      }
+   }
+
+   anv_batch_emit(&cmd_buffer->batch,
+                  GENX(3DSTATE_SCISSOR_STATE_POINTERS), ssp) {
+      ssp.ScissorRectPointer = scissor_state.offset;
+   }
+}
+
 void
 genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
 {
@@ -495,6 +852,41 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
    struct vk_dynamic_graphics_state *dyn =
       &cmd_buffer->vk.dynamic_graphics_state;
 
+   cmd_buffer_emit_clip(cmd_buffer);
+
+   if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
+                                       ANV_CMD_DIRTY_XFB_ENABLE)) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZATION_STREAM) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX))
+      cmd_buffer_emit_streamout(cmd_buffer);
+
+   if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
+                                       ANV_CMD_DIRTY_RENDER_TARGETS)) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE)) {
+      cmd_buffer_emit_viewport(cmd_buffer);
+      cmd_buffer_emit_depth_viewport(cmd_buffer);
+      cmd_buffer_emit_scissor(cmd_buffer);
+   }
+
+   if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
+       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY)) {
+      uint32_t topology;
+      if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
+         topology = _3DPRIM_PATCHLIST(dyn->ts.patch_control_points);
+      else
+         topology = genX(vk_to_intel_primitive_type)[dyn->ia.primitive_topology];
+
+      cmd_buffer->state.gfx.primitive_topology = topology;
+
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) {
+         vft.PrimitiveTopologyType = topology;
+      }
+   }
+
    if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI)) {
       const uint32_t ve_count =