radv: add support for dynamic color blend enable
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 7 Nov 2022 12:21:22 +0000 (13:21 +0100)
committerMarge Bot <emma+marge@anholt.net>
Fri, 18 Nov 2022 15:29:33 +0000 (15:29 +0000)
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19579>

src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_pipeline.c
src/amd/vulkan/radv_private.h

index c45ea64..f6478f3 100644 (file)
@@ -134,6 +134,7 @@ const struct radv_dynamic_state default_dynamic_state = {
    .provoking_vertex_mode = VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT,
    .depth_clamp_enable = 0u,
    .color_write_mask = 0u,
+   .color_blend_enable = 0u,
 };
 
 static void
@@ -291,6 +292,8 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dy
 
    RADV_CMP_COPY(color_write_mask, RADV_DYNAMIC_COLOR_WRITE_MASK);
 
+   RADV_CMP_COPY(color_blend_enable, RADV_DYNAMIC_COLOR_BLEND_ENABLE);
+
 #undef RADV_CMP_COPY
 
    cmd_buffer->state.dirty |= dest_mask;
@@ -1912,6 +1915,13 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
        cmd_buffer->state.emitted_graphics_pipeline->ms.db_eqaa != pipeline->ms.db_eqaa)
       cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_CONSERVATIVE_RAST_MODE;
 
+   if (!cmd_buffer->state.emitted_graphics_pipeline ||
+       memcmp(cmd_buffer->state.emitted_graphics_pipeline->cb_blend_control,
+              pipeline->cb_blend_control, sizeof(pipeline->cb_blend_control)) ||
+       memcmp(cmd_buffer->state.emitted_graphics_pipeline->sx_mrt_blend_opt,
+              pipeline->sx_mrt_blend_opt, sizeof(pipeline->sx_mrt_blend_opt)))
+      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE;
+
    radeon_emit_array(cmd_buffer->cs, pipeline->base.cs.buf, pipeline->base.cs.cdw);
 
    if (pipeline->has_ngg_culling &&
@@ -3916,6 +3926,43 @@ radv_emit_sample_mask(struct radv_cmd_buffer *cmd_buffer)
 }
 
 static void
+radv_emit_color_blend_enable(struct radv_cmd_buffer *cmd_buffer)
+{
+   const struct radv_physical_device *pdevice = cmd_buffer->device->physical_device;
+   const struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
+   const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+   unsigned cb_blend_control[MAX_RTS], sx_mrt_blend_opt[MAX_RTS];
+
+   for (unsigned i = 0; i < MAX_RTS; i++) {
+      bool blend_enable = (d->color_blend_enable >> (i * 4)) & 0xf;
+
+      cb_blend_control[i] = pipeline->cb_blend_control[i];
+      sx_mrt_blend_opt[i] = pipeline->sx_mrt_blend_opt[i];
+
+      if (blend_enable) {
+         cb_blend_control[i] |= S_028780_ENABLE(1);
+      } else if (pdevice->rad_info.has_rbplus) {
+         /* Make sure to keep RB+ blend optimizations disabled for dual source blending. */
+         if (G_028760_COLOR_COMB_FCN(sx_mrt_blend_opt[i]) != V_028760_OPT_COMB_NONE &&
+             G_028760_ALPHA_COMB_FCN(sx_mrt_blend_opt[i]) != V_028760_OPT_COMB_NONE) {
+            sx_mrt_blend_opt[i] &= C_028760_COLOR_COMB_FCN;
+            sx_mrt_blend_opt[i] &= C_028760_ALPHA_COMB_FCN;
+            sx_mrt_blend_opt[i] |= S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
+                                   S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
+         }
+      }
+   }
+
+   radeon_set_context_reg_seq(cmd_buffer->cs, R_028780_CB_BLEND0_CONTROL, MAX_RTS);
+   radeon_emit_array(cmd_buffer->cs, cb_blend_control, MAX_RTS);
+
+   if (pdevice->rad_info.has_rbplus) {
+      radeon_set_context_reg_seq(cmd_buffer->cs, R_028760_SX_MRT0_BLEND_OPT, MAX_RTS);
+      radeon_emit_array(cmd_buffer->cs, sx_mrt_blend_opt, MAX_RTS);
+   }
+}
+
+static void
 radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty)
 {
    uint64_t states =
@@ -4020,6 +4067,9 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer, bool pip
    if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLAMP_ENABLE)
       radv_emit_depth_clamp_enable(cmd_buffer);
 
+   if (states & RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE)
+      radv_emit_color_blend_enable(cmd_buffer);
+
    cmd_buffer->state.dirty &= ~states;
 }
 
@@ -6609,6 +6659,27 @@ radv_CmdSetColorWriteMaskEXT(VkCommandBuffer commandBuffer, uint32_t firstAttach
 }
 
 VKAPI_ATTR void VKAPI_CALL
+radv_CmdSetColorBlendEnableEXT(VkCommandBuffer commandBuffer, uint32_t firstAttachment,
+                               uint32_t attachmentCount, const VkBool32* pColorBlendEnables)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
+   uint32_t color_blend_enable = 0;
+
+   assert(firstAttachment + attachmentCount <= MAX_RTS);
+
+   for (uint32_t i = 0; i < attachmentCount; i++) {
+      unsigned idx = firstAttachment + i;
+
+      color_blend_enable |= pColorBlendEnables[i] ? (0xfu << (idx * 4)) : 0;
+   }
+
+   state->dynamic.color_blend_enable = color_blend_enable;
+
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE;
+}
+
+VKAPI_ATTR void VKAPI_CALL
 radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount,
                         const VkCommandBuffer *pCmdBuffers)
 {
index 86b5c77..5f6e8e9 100644 (file)
@@ -55,8 +55,6 @@ struct radv_blend_state {
 
    uint32_t cb_target_mask;
    uint32_t cb_target_enabled_4bit;
-   uint32_t sx_mrt_blend_opt[8];
-   uint32_t cb_blend_control[8];
 
    uint32_t spi_shader_col_format;
    uint32_t col_format_is_int8;
@@ -575,7 +573,11 @@ radv_pipeline_compute_spi_color_formats(const struct radv_graphics_pipeline *pip
            !(blend->cb_target_mask & (0xfu << (i * 4))))) {
          cf = V_028714_SPI_SHADER_ZERO;
       } else {
-         bool blend_enable = blend->blend_enable_4bit & (0xfu << (i * 4));
+         /* Assume blend is enabled when the state is dynamic. This might select a suboptimal format
+          * in some situations but changing color export formats dynamically is hard.
+          */
+         bool blend_enable = (pipeline->dynamic_states & RADV_DYNAMIC_COLOR_BLEND_ENABLE) ||
+                             blend->blend_enable_4bit & (0xfu << (i * 4));
 
          cf = radv_choose_spi_color_format(pipeline->base.device, fmt, blend_enable,
                                            blend->need_src_alpha & (1 << i));
@@ -710,8 +712,6 @@ radv_can_enable_dual_src(const struct vk_color_blend_attachment_state *att)
    bool eqRGB_minmax = eqRGB == VK_BLEND_OP_MIN || eqRGB == VK_BLEND_OP_MAX;
    bool eqA_minmax = eqA == VK_BLEND_OP_MIN || eqA == VK_BLEND_OP_MAX;
 
-   assert(att->blend_enable);
-
    if (!eqRGB_minmax && (is_dual_src(srcRGB) || is_dual_src(dstRGB)))
       return true;
    if (!eqA_minmax && (is_dual_src(srcA) || is_dual_src(dstA)))
@@ -751,9 +751,6 @@ radv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline,
          VkBlendFactor srcA = state->cb->attachments[i].src_alpha_blend_factor;
          VkBlendFactor dstA = state->cb->attachments[i].dst_alpha_blend_factor;
 
-         blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
-                                     S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
-
          if (!(pipeline->dynamic_states & RADV_DYNAMIC_COLOR_WRITE_MASK) &&
              !state->cb->attachments[i].write_mask)
             continue;
@@ -766,8 +763,9 @@ radv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline,
 
          blend.cb_target_mask |= (unsigned)state->cb->attachments[i].write_mask << (4 * i);
          blend.cb_target_enabled_4bit |= 0xfu << (4 * i);
-         if (!state->cb->attachments[i].blend_enable) {
-            blend.cb_blend_control[i] = blend_cntl;
+         if (!(pipeline->dynamic_states & RADV_DYNAMIC_COLOR_BLEND_ENABLE) &&
+             !state->cb->attachments[i].blend_enable) {
+            pipeline->cb_blend_control[i] = blend_cntl;
             continue;
          }
 
@@ -820,12 +818,11 @@ radv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline,
             dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
 
          /* Set the final value. */
-         blend.sx_mrt_blend_opt[i] =
+         pipeline->sx_mrt_blend_opt[i] =
             S_028760_COLOR_SRC_OPT(srcRGB_opt) | S_028760_COLOR_DST_OPT(dstRGB_opt) |
             S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
             S_028760_ALPHA_SRC_OPT(srcA_opt) | S_028760_ALPHA_DST_OPT(dstA_opt) |
             S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
-         blend_cntl |= S_028780_ENABLE(1);
 
          blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
          blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(gfx_level, srcRGB));
@@ -836,7 +833,7 @@ radv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline,
             blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(gfx_level, srcA));
             blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(gfx_level, dstA));
          }
-         blend.cb_blend_control[i] = blend_cntl;
+         pipeline->cb_blend_control[i] = blend_cntl;
 
          blend.blend_enable_4bit |= 0xfu << (i * 4);
 
@@ -847,19 +844,14 @@ radv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline,
              dstRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA)
             blend.need_src_alpha |= 1 << i;
       }
-      for (i = state->cb->attachment_count; i < 8; i++) {
-         blend.cb_blend_control[i] = 0;
-         blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
-                                     S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
-      }
    }
 
    if (device->physical_device->rad_info.has_rbplus) {
       /* Disable RB+ blend optimizations for dual source blending. */
       if (blend.mrt0_is_dual_src) {
          for (i = 0; i < 8; i++) {
-            blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
-                                        S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
+            pipeline->sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
+                                            S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
          }
       }
 
@@ -993,7 +985,7 @@ radv_pipeline_out_of_order_rast(struct radv_graphics_pipeline *pipeline,
       return false;
 
    /* Be conservative if a logic operation is enabled with color buffers. */
-   if (colormask &&
+   if (colormask && (pipeline->dynamic_states & RADV_DYNAMIC_COLOR_BLEND_ENABLE) &&
        ((pipeline->dynamic_states & RADV_DYNAMIC_LOGIC_OP_ENABLE) || state->cb->logic_op_enable))
       return false;
 
@@ -1337,6 +1329,8 @@ radv_dynamic_state_mask(VkDynamicState state)
       return RADV_DYNAMIC_DEPTH_CLAMP_ENABLE;
    case VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT:
       return RADV_DYNAMIC_COLOR_WRITE_MASK;
+   case VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT:
+      return RADV_DYNAMIC_COLOR_BLEND_ENABLE;
    default:
       unreachable("Unhandled dynamic state");
    }
@@ -1347,9 +1341,12 @@ radv_pipeline_is_blend_enabled(const struct radv_graphics_pipeline *pipeline,
                                const struct vk_color_blend_state *cb)
 {
    if (cb) {
+      if (pipeline->dynamic_states & (RADV_DYNAMIC_COLOR_WRITE_MASK |
+                                      RADV_DYNAMIC_COLOR_BLEND_ENABLE))
+         return true;
+
       for (uint32_t i = 0; i < cb->attachment_count; i++) {
-         if (((pipeline->dynamic_states & RADV_DYNAMIC_COLOR_WRITE_MASK) ||
-              cb->attachments[i].write_mask) && cb->attachments[i].blend_enable)
+         if (cb->attachments[i].write_mask && cb->attachments[i].blend_enable)
             return true;
       }
    }
@@ -1924,6 +1921,15 @@ radv_pipeline_init_dynamic_state(struct radv_graphics_pipeline *pipeline,
       }
    }
 
+   if (radv_pipeline_has_color_attachments(state->rp) && states & RADV_DYNAMIC_COLOR_BLEND_ENABLE) {
+      for (unsigned i = 0; i < state->cb->attachment_count; i++) {
+         if (!state->cb->attachments[i].blend_enable)
+            continue;
+
+         dynamic->color_blend_enable |= 0xfu << (i * 4);
+      }
+   }
+
    pipeline->dynamic_state.mask = states;
 }
 
@@ -4360,17 +4366,6 @@ radv_pipeline_emit_blend_state(struct radeon_cmdbuf *ctx_cs,
                                const struct radv_graphics_pipeline *pipeline,
                                const struct radv_blend_state *blend)
 {
-   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
-
-   radeon_set_context_reg_seq(ctx_cs, R_028780_CB_BLEND0_CONTROL, 8);
-   radeon_emit_array(ctx_cs, blend->cb_blend_control, 8);
-
-   if (pdevice->rad_info.has_rbplus) {
-
-      radeon_set_context_reg_seq(ctx_cs, R_028760_SX_MRT0_BLEND_OPT, 8);
-      radeon_emit_array(ctx_cs, blend->sx_mrt_blend_opt, 8);
-   }
-
    radeon_set_context_reg(ctx_cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format);
 
    radeon_set_context_reg(ctx_cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask);
index d3bd53a..d7c4599 100644 (file)
@@ -1121,7 +1121,8 @@ enum radv_dynamic_state_bits {
    RADV_DYNAMIC_PROVOKING_VERTEX_MODE = 1ull << 39,
    RADV_DYNAMIC_DEPTH_CLAMP_ENABLE = 1ull << 40,
    RADV_DYNAMIC_COLOR_WRITE_MASK = 1ull << 41,
-   RADV_DYNAMIC_ALL = (1ull << 42) - 1,
+   RADV_DYNAMIC_COLOR_BLEND_ENABLE = 1ull << 42,
+   RADV_DYNAMIC_ALL = (1ull << 43) - 1,
 };
 
 enum radv_cmd_dirty_bits {
@@ -1169,13 +1170,14 @@ enum radv_cmd_dirty_bits {
    RADV_CMD_DIRTY_DYNAMIC_PROVOKING_VERTEX_MODE = 1ull << 39,
    RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLAMP_ENABLE = 1ull << 40,
    RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK = 1ull << 41,
-   RADV_CMD_DIRTY_DYNAMIC_ALL = (1ull << 42) - 1,
-   RADV_CMD_DIRTY_PIPELINE = 1ull << 42,
-   RADV_CMD_DIRTY_INDEX_BUFFER = 1ull << 43,
-   RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 44,
-   RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 45,
-   RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 46,
-   RADV_CMD_DIRTY_GUARDBAND = 1ull << 47,
+   RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE = 1ull << 42,
+   RADV_CMD_DIRTY_DYNAMIC_ALL = (1ull << 43) - 1,
+   RADV_CMD_DIRTY_PIPELINE = 1ull << 43,
+   RADV_CMD_DIRTY_INDEX_BUFFER = 1ull << 44,
+   RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 45,
+   RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 46,
+   RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 47,
+   RADV_CMD_DIRTY_GUARDBAND = 1ull << 48,
 };
 
 enum radv_cmd_flush_bits {
@@ -1397,6 +1399,8 @@ struct radv_dynamic_state {
    bool depth_clamp_enable;
 
    uint32_t color_write_mask;
+
+   uint32_t color_blend_enable;
 };
 
 extern const struct radv_dynamic_state default_dynamic_state;
@@ -2075,6 +2079,8 @@ struct radv_graphics_pipeline {
    uint8_t vtx_emit_num;
    uint64_t needed_dynamic_state;
    unsigned cb_color_control;
+   unsigned cb_blend_control[MAX_RTS];
+   unsigned sx_mrt_blend_opt[MAX_RTS];
    uint32_t binding_stride[MAX_VBS];
    uint8_t attrib_bindings[MAX_VERTEX_ATTRIBS];
    uint32_t attrib_ends[MAX_VERTEX_ATTRIBS];