anv: fix VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT state
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Mon, 11 Oct 2021 15:31:26 +0000 (18:31 +0300)
committerMarge Bot <emma+marge@anholt.net>
Thu, 24 Mar 2022 10:49:07 +0000 (10:49 +0000)
First, there is a problem if you do the following

 vkCmdSetColorWriteEnableEXT(attachmentCount = 8)
 vkCmdBindPipeline(GFX, with attachmentCount = 4)
 vkCmdDraw()
 vkCmdBindPipeline(GFX, with attachmentCount = 8)
 vkCmdDraw()

Because in the dynamic state emission code we rely on the first
pipeline to figure the number of BLEND_STATE entries to prepare. This
is wrong, we should fill all entries so that the dynamic state works
regardless of the number of attachments in the pipeline. With regard
to the dynamic values, we should retain enable/disable values that do
not concern the current pipeline.

Second, 3DSTATE_WM was not always reemitted when the pipeline changed.
But since it is not emitted as part of the pipeline, this results in
inconsistent state being programmed.

Third, we end up disabling the fragment stage completely in some
cases. And that is programming the pipeline inconsistently and
triggering a hang on TGL.

v2: Fix comment (Tapani)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: b15bfe92f7f8 ("anv: implement VK_EXT_color_write_enable")
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15310>

src/intel/vulkan/anv_cmd_buffer.c
src/intel/vulkan/anv_pipeline.c
src/intel/vulkan/genX_pipeline.c
src/intel/vulkan/gfx7_cmd_buffer.c
src/intel/vulkan/gfx8_cmd_buffer.c

index bc33816..fc0e10c 100644 (file)
@@ -1600,9 +1600,16 @@ void anv_CmdSetColorWriteEnableEXT(
 
    assert(attachmentCount <= MAX_RTS);
 
-   uint8_t color_writes = 0;
-   for (uint32_t i = 0; i < attachmentCount; i++)
-      color_writes |= pColorWriteEnables[i] ? (1 << i) : 0;
+   /* Keep the existing values outside the color attachments count of the
+    * current pipeline.
+    */
+   uint8_t color_writes = cmd_buffer->state.gfx.dynamic.color_writes;
+   for (uint32_t i = 0; i < attachmentCount; i++) {
+      if (pColorWriteEnables[i])
+         color_writes |= BITFIELD_BIT(i);
+      else
+         color_writes &= ~BITFIELD_BIT(i);
+   }
 
    if (cmd_buffer->state.gfx.dynamic.color_writes != color_writes) {
       cmd_buffer->state.gfx.dynamic.color_writes = color_writes;
index 52b6430..e7db921 100644 (file)
@@ -487,12 +487,9 @@ populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
    key->ignore_sample_mask_out = false;
 
    assert(rendering_info->colorAttachmentCount <= MAX_RTS);
-   for (uint32_t i = 0; i < rendering_info->colorAttachmentCount; i++) {
-      if (rendering_info->pColorAttachmentFormats[i] != VK_FORMAT_UNDEFINED)
-         key->color_outputs_valid |= (1 << i);
-   }
-
-   key->nr_color_regions = rendering_info->colorAttachmentCount;
+   /* Consider all inputs as valid until look at the NIR variables. */
+   key->color_outputs_valid = (1u << MAX_RTS) - 1;
+   key->nr_color_regions = MAX_RTS;
 
    /* To reduce possible shader recompilations we would need to know if
     * there is a SampleMask output variable to compute if we should emit
@@ -1119,6 +1116,26 @@ static void
 anv_pipeline_link_fs(const struct brw_compiler *compiler,
                      struct anv_pipeline_stage *stage)
 {
+   /* Initially the valid outputs value is set to all possible render targets
+    * valid (see populate_wm_prog_key()), before we look at the shader
+    * variables. Here we look at the output variables of the shader an compute
+    * a correct number of render target outputs.
+    */
+   stage->key.wm.color_outputs_valid = 0;
+   nir_foreach_shader_out_variable_safe(var, stage->nir) {
+      if (var->data.location < FRAG_RESULT_DATA0)
+         continue;
+
+      const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
+      const unsigned array_len =
+         glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
+      assert(rt + array_len <= MAX_RTS);
+
+      stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len);
+   }
+   stage->key.wm.nr_color_regions =
+      util_last_bit(stage->key.wm.color_outputs_valid);
+
    unsigned num_rt_bindings;
    struct anv_pipeline_binding rt_bindings[MAX_RTS];
    if (stage->key.wm.nr_color_regions > 0) {
@@ -1152,71 +1169,6 @@ anv_pipeline_link_fs(const struct brw_compiler *compiler,
    typed_memcpy(stage->bind_map.surface_to_descriptor,
                 rt_bindings, num_rt_bindings);
    stage->bind_map.surface_count += num_rt_bindings;
-
-   /* Now that we've set up the color attachments, we can go through and
-    * eliminate any shader outputs that map to VK_ATTACHMENT_UNUSED in the
-    * hopes that dead code can clean them up in this and any earlier shader
-    * stages.
-    */
-   nir_function_impl *impl = nir_shader_get_entrypoint(stage->nir);
-   bool deleted_output = false;
-   nir_foreach_shader_out_variable_safe(var, stage->nir) {
-      /* TODO: We don't delete depth/stencil writes.  We probably could if the
-       * subpass doesn't have a depth/stencil attachment.
-       */
-      if (var->data.location < FRAG_RESULT_DATA0)
-         continue;
-
-      const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
-
-      /* If this is the RT at location 0 and we have alpha to coverage
-       * enabled we still need that write because it will affect the coverage
-       * mask even if it's never written to a color target.
-       */
-      if (rt == 0 && stage->key.wm.alpha_to_coverage)
-         continue;
-
-      const unsigned array_len =
-         glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
-      assert(rt + array_len <= MAX_RTS);
-
-      if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid &
-                             BITFIELD_RANGE(rt, array_len))) {
-         deleted_output = true;
-         var->data.mode = nir_var_function_temp;
-         exec_node_remove(&var->node);
-         exec_list_push_tail(&impl->locals, &var->node);
-      }
-   }
-
-   if (deleted_output)
-      nir_fixup_deref_modes(stage->nir);
-
-   /* Initially the valid outputs value is based off the renderpass color
-    * attachments (see populate_wm_prog_key()), now that we've potentially
-    * deleted variables that map to unused attachments, we need to update the
-    * valid outputs for the backend compiler based on what output variables
-    * are actually used. */
-   stage->key.wm.color_outputs_valid = 0;
-   nir_foreach_shader_out_variable_safe(var, stage->nir) {
-      if (var->data.location < FRAG_RESULT_DATA0)
-         continue;
-
-      const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
-      const unsigned array_len =
-         glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
-      assert(rt + array_len <= MAX_RTS);
-
-      stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len);
-   }
-
-   /* We stored the number of subpass color attachments in nr_color_regions
-    * when calculating the key for caching.  Now that we've computed the bind
-    * map, we can reduce this to the actual max before we go into the back-end
-    * compiler.
-    */
-   stage->key.wm.nr_color_regions =
-      util_last_bit(stage->key.wm.color_outputs_valid);
 }
 
 static void
@@ -2368,10 +2320,12 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
                                  PIPELINE_COLOR_WRITE_CREATE_INFO_EXT);
 
          if (color_write_info) {
-            dynamic->color_writes = 0;
+            dynamic->color_writes = (1u << MAX_RTS) - 1;
             for (uint32_t i = 0; i < color_write_info->attachmentCount; i++) {
-               dynamic->color_writes |=
-                  color_write_info->pColorWriteEnables[i] ? (1u << i) : 0;
+               if (color_write_info->pColorWriteEnables[i])
+                  dynamic->color_writes |= BITFIELD_BIT(i);
+               else
+                  dynamic->color_writes &= ~BITFIELD_BIT(i);
             }
          }
       }
index a78bc30..a1bccf9 100644 (file)
@@ -1434,10 +1434,18 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
          .SourceAlphaBlendFactor = vk_to_intel_blend[a->srcAlphaBlendFactor],
          .DestinationAlphaBlendFactor = vk_to_intel_blend[a->dstAlphaBlendFactor],
          .AlphaBlendFunction = vk_to_intel_blend_op[a->alphaBlendOp],
-         .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
-         .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
-         .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
-         .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
+         .WriteDisableAlpha =
+            (dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) == 0 &&
+            !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
+         .WriteDisableRed =
+            (dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) == 0 &&
+            !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
+         .WriteDisableGreen =
+             (dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) == 0 &&
+             !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
+         .WriteDisableBlue =
+             (dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) == 0 &&
+             !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
       };
 
       if (a->srcColorBlendFactor != a->srcAlphaBlendFactor ||
@@ -2253,7 +2261,8 @@ has_color_buffer_write_enabled(const struct anv_graphics_pipeline *pipeline,
       if (binding->index == UINT32_MAX)
          continue;
 
-      if (blend && blend->pAttachments[binding->index].colorWriteMask != 0)
+      if (blend && binding->index < blend->attachmentCount &&
+          blend->pAttachments[binding->index].colorWriteMask != 0)
          return true;
    }
 
index 5060362..285d140 100644 (file)
@@ -283,14 +283,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
    if (anv_cmd_buffer_needs_dynamic_state(cmd_buffer,
                                           ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |
                                           ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY)) {
-      const uint8_t color_writes = cmd_buffer->state.gfx.dynamic.color_writes;
-
-      bool dirty_color_blend =
-         cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE;
-
-      bool dirty_primitive_topology =
-         cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
-
       VkPolygonMode dynamic_raster_mode;
       VkPrimitiveTopology primitive_topology =
          cmd_buffer->state.gfx.dynamic.primitive_topology;
@@ -298,21 +290,20 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
          genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline,
                                    primitive_topology);
 
-      if (dirty_color_blend || dirty_primitive_topology) {
-         uint32_t dwords[GENX(3DSTATE_WM_length)];
-         struct GENX(3DSTATE_WM) wm = {
-            GENX(3DSTATE_WM_header),
-
-            .ThreadDispatchEnable = pipeline->force_fragment_thread_dispatch ||
-                                    color_writes,
-            .MultisampleRasterizationMode =
-               genX(ms_rasterization_mode)(pipeline, dynamic_raster_mode),
-         };
-         GENX(3DSTATE_WM_pack)(NULL, dwords, &wm);
-
-         anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx7.wm);
-      }
+      const uint8_t color_writes = cmd_buffer->state.gfx.dynamic.color_writes;
+      uint32_t dwords[GENX(3DSTATE_WM_length)];
+      struct GENX(3DSTATE_WM) wm = {
+         GENX(3DSTATE_WM_header),
+
+         .ThreadDispatchEnable = pipeline->force_fragment_thread_dispatch ||
+                                 color_writes,
+         .MultisampleRasterizationMode =
+                                 genX(ms_rasterization_mode)(pipeline,
+                                                             dynamic_raster_mode),
+      };
+      GENX(3DSTATE_WM_pack)(NULL, dwords, &wm);
 
+      anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx7.wm);
    }
 
    if (anv_cmd_buffer_needs_dynamic_state(cmd_buffer,
@@ -329,14 +320,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
       bool dirty_color_blend =
          cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE;
 
-      /* Blend states of each RT */
-      uint32_t surface_count = 0;
-      struct anv_pipeline_bind_map *map;
-      if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
-         map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map;
-         surface_count = map->surface_count;
-      }
-
       uint32_t blend_dws[GENX(BLEND_STATE_length) +
                          MAX_RTS * GENX(BLEND_STATE_ENTRY_length)];
       uint32_t *dws = blend_dws;
@@ -348,10 +331,9 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
       bool dirty_logic_op =
          cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP;
 
-      for (uint32_t i = 0; i < surface_count; i++) {
-         struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
-         bool write_disabled =
-            dirty_color_blend && (color_writes & (1u << binding->index)) == 0;
+      for (uint32_t i = 0; i < MAX_RTS; i++) {
+         bool write_disabled = dirty_color_blend &&
+            (color_writes & BITFIELD_BIT(i)) == 0;
          struct GENX(BLEND_STATE_ENTRY) entry = {
             .WriteDisableAlpha = write_disabled,
             .WriteDisableRed   = write_disabled,
@@ -365,7 +347,7 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
       }
 
       uint32_t num_dwords = GENX(BLEND_STATE_length) +
-         GENX(BLEND_STATE_ENTRY_length) * surface_count;
+         GENX(BLEND_STATE_ENTRY_length) * MAX_RTS;
 
       struct anv_state blend_states =
          anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws,
index 154e192..5e16510 100644 (file)
@@ -642,43 +642,32 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
                                           ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |
                                           ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP)) {
       const uint8_t color_writes = cmd_buffer->state.gfx.dynamic.color_writes;
+
       /* 3DSTATE_WM in the hope we can avoid spawning fragment shaders
        * threads.
        */
-      bool dirty_color_blend =
-         cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE;
-
-      if (dirty_color_blend) {
-         uint32_t dwords[MAX2(GENX(3DSTATE_WM_length),
-                              GENX(3DSTATE_PS_BLEND_length))];
-         struct GENX(3DSTATE_WM) wm = {
-            GENX(3DSTATE_WM_header),
+      uint32_t wm_dwords[GENX(3DSTATE_WM_length)];
+      struct GENX(3DSTATE_WM) wm = {
+         GENX(3DSTATE_WM_header),
 
-            .ForceThreadDispatchEnable = (pipeline->force_fragment_thread_dispatch ||
-                                          !color_writes) ? ForceON : 0,
-         };
-         GENX(3DSTATE_WM_pack)(NULL, dwords, &wm);
-
-         anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx8.wm);
+         .ForceThreadDispatchEnable = (pipeline->force_fragment_thread_dispatch ||
+                                       !color_writes) ? ForceON : 0,
+      };
+      GENX(3DSTATE_WM_pack)(NULL, wm_dwords, &wm);
 
-         /* 3DSTATE_PS_BLEND to be consistent with the rest of the
-          * BLEND_STATE_ENTRY.
-          */
-         struct GENX(3DSTATE_PS_BLEND) ps_blend = {
-            GENX(3DSTATE_PS_BLEND_header),
-            .HasWriteableRT = color_writes != 0,
-         };
-         GENX(3DSTATE_PS_BLEND_pack)(NULL, dwords, &ps_blend);
-         anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx8.ps_blend);
-      }
+      anv_batch_emit_merge(&cmd_buffer->batch, wm_dwords, pipeline->gfx8.wm);
 
-      /* Blend states of each RT */
-      uint32_t surface_count = 0;
-      struct anv_pipeline_bind_map *map;
-      if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
-         map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map;
-         surface_count = map->surface_count;
-      }
+      /* 3DSTATE_PS_BLEND to be consistent with the rest of the
+       * BLEND_STATE_ENTRY.
+       */
+      uint32_t ps_blend_dwords[GENX(3DSTATE_PS_BLEND_length)];
+      struct GENX(3DSTATE_PS_BLEND) ps_blend = {
+         GENX(3DSTATE_PS_BLEND_header),
+         .HasWriteableRT = color_writes != 0,
+      };
+      GENX(3DSTATE_PS_BLEND_pack)(NULL, ps_blend_dwords, &ps_blend);
+      anv_batch_emit_merge(&cmd_buffer->batch, ps_blend_dwords,
+                           pipeline->gfx8.ps_blend);
 
       uint32_t blend_dws[GENX(BLEND_STATE_length) +
                          MAX_RTS * GENX(BLEND_STATE_ENTRY_length)];
@@ -691,10 +680,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
       bool dirty_logic_op =
          cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP;
 
-      for (uint32_t i = 0; i < surface_count; i++) {
-         struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
-         bool write_disabled =
-            dirty_color_blend && (color_writes & (1u << binding->index)) == 0;
+      for (uint32_t i = 0; i < MAX_RTS; i++) {
+         bool write_disabled = (color_writes & BITFIELD_BIT(i)) == 0;
          struct GENX(BLEND_STATE_ENTRY) entry = {
             .WriteDisableAlpha = write_disabled,
             .WriteDisableRed   = write_disabled,
@@ -708,7 +695,7 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
       }
 
       uint32_t num_dwords = GENX(BLEND_STATE_length) +
-         GENX(BLEND_STATE_ENTRY_length) * surface_count;
+         GENX(BLEND_STATE_ENTRY_length) * MAX_RTS;
 
       struct anv_state blend_states =
          anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws,