anv: implement VK_KHR_fragment_shading_rate
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Mon, 19 Oct 2020 07:12:43 +0000 (10:12 +0300)
committerMarge Bot <eric+marge@anholt.net>
Sun, 2 May 2021 20:20:06 +0000 (20:20 +0000)
Available on Gen11+.

v2: Order shading rate in correct order (Samuel)

v3: Move CPS_STATE emission to genX_state.c

v4: Don't override various output structures (Jason)

v5: Rebase on top master (Lionel)

v6: Fix invalid VkPhysicalDeviceFragmentShadingRatePropertiesKHR
    (min|max)FragmentShadingRateAttachmentTexelSize values (Ken)
    Drop #endif comment

v7: Limit extension to Gfx11+ (Lionel)
    Support conservative raster (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7455>

src/intel/vulkan/anv_cmd_buffer.c
src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_genX.h
src/intel/vulkan/anv_pipeline.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/genX_pipeline.c
src/intel/vulkan/genX_state.c
src/intel/vulkan/gfx8_cmd_buffer.c

index bb8aef5..e70c1cb 100644 (file)
@@ -199,6 +199,9 @@ anv_dynamic_state_copy(struct anv_dynamic_state *dest,
 
    ANV_CMP_COPY(color_writes, ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE);
 
+   ANV_CMP_COPY(fragment_shading_rate.width, ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE);
+   ANV_CMP_COPY(fragment_shading_rate.height, ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE);
+
 #undef ANV_CMP_COPY
 
    return changed;
@@ -1433,3 +1436,14 @@ void anv_CmdSetColorWriteEnableEXT(
       cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE;
    }
 }
+
+void anv_CmdSetFragmentShadingRateKHR(
+    VkCommandBuffer                             commandBuffer,
+    const VkExtent2D*                           pFragmentSize,
+    const VkFragmentShadingRateCombinerOpKHR    combinerOps[2])
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+   cmd_buffer->state.gfx.dynamic.fragment_shading_rate = *pFragmentSize;
+   cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE;
+}
index 2c555a0..7a6ef1e 100644 (file)
@@ -211,6 +211,7 @@ get_device_extensions(const struct anv_physical_device *device,
       .KHR_external_memory_fd                = true,
       .KHR_external_semaphore                = true,
       .KHR_external_semaphore_fd             = true,
+      .KHR_fragment_shading_rate             = device->info.ver >= 11,
       .KHR_get_memory_requirements2          = true,
       .KHR_image_format_list                 = true,
       .KHR_imageless_framebuffer             = true,
@@ -1467,6 +1468,13 @@ void anv_GetPhysicalDeviceFeatures2(
          break;
       }
 
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
+         VkPhysicalDeviceFragmentShadingRateFeaturesKHR *features =
+            (VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext;
+         features->pipelineFragmentShadingRate = true;
+         break;
+      }
+
       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ROBUSTNESS_FEATURES_EXT: {
          VkPhysicalDeviceImageRobustnessFeaturesEXT *features =
             (VkPhysicalDeviceImageRobustnessFeaturesEXT *)ext;
@@ -2210,6 +2218,49 @@ void anv_GetPhysicalDeviceProperties2(
          break;
       }
 
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: {
+         VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props =
+            (VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext;
+         if (pdevice->info.ver < 11) {
+            props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
+            props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
+            props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
+            props->primitiveFragmentShadingRateWithMultipleViewports = false;
+            props->layeredShadingRateAttachments = false;
+            props->fragmentShadingRateNonTrivialCombinerOps = true;
+            props->maxFragmentSize = (VkExtent2D) { 1, 1 };
+            props->maxFragmentSizeAspectRatio = 1;
+            props->maxFragmentShadingRateCoverageSamples = 0;
+            props->maxFragmentShadingRateRasterizationSamples = 0;
+            props->fragmentShadingRateWithShaderDepthStencilWrites = false;
+            props->fragmentShadingRateWithSampleMask = false;
+            props->fragmentShadingRateWithShaderSampleMask = false;
+            props->fragmentShadingRateWithConservativeRasterization = true;
+            props->fragmentShadingRateWithFragmentShaderInterlock = false;
+            props->fragmentShadingRateWithCustomSampleLocations = false;
+            props->fragmentShadingRateStrictMultiplyCombiner = false;
+         } else {
+            props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 1, 1 };
+            props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 4, 4 };
+            props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
+            props->primitiveFragmentShadingRateWithMultipleViewports = pdevice->info.ver >= 12;
+            props->layeredShadingRateAttachments = false;
+            props->fragmentShadingRateNonTrivialCombinerOps = true;
+            props->maxFragmentSize = (VkExtent2D) { 4, 4 };
+            props->maxFragmentSizeAspectRatio = 4;
+            props->maxFragmentShadingRateCoverageSamples = 4 * 4;
+            props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_16_BIT;
+            props->fragmentShadingRateWithShaderDepthStencilWrites = false;
+            props->fragmentShadingRateWithSampleMask = true;
+            props->fragmentShadingRateWithShaderSampleMask = false;
+            props->fragmentShadingRateWithConservativeRasterization = true;
+            props->fragmentShadingRateWithFragmentShaderInterlock = true;
+            props->fragmentShadingRateWithCustomSampleLocations = true;
+            props->fragmentShadingRateStrictMultiplyCombiner = false;
+         }
+         break;
+      }
+
       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR: {
          VkPhysicalDeviceDriverPropertiesKHR *properties =
             (VkPhysicalDeviceDriverPropertiesKHR *) ext;
@@ -4737,3 +4788,36 @@ vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion)
    *pSupportedVersion = MIN2(*pSupportedVersion, 4u);
    return VK_SUCCESS;
 }
+
+VkResult anv_GetPhysicalDeviceFragmentShadingRatesKHR(
+    VkPhysicalDevice                            physicalDevice,
+    uint32_t*                                   pFragmentShadingRateCount,
+    VkPhysicalDeviceFragmentShadingRateKHR*     pFragmentShadingRates)
+{
+   ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
+   VK_OUTARRAY_MAKE(out, pFragmentShadingRates, pFragmentShadingRateCount);
+
+#define append_rate(_samples, _width, _height)                          \
+   do {                                                                 \
+      vk_outarray_append(&out, __r) {                                   \
+         __r->sampleCounts = _samples;                                  \
+         __r->fragmentSize = (VkExtent2D) {                             \
+            .width = _width,                                            \
+            .height = _height,                                          \
+         };                                                             \
+      }                                                                 \
+   } while (0)
+
+   VkSampleCountFlags sample_counts =
+      isl_device_get_sample_counts(&physical_device->isl_dev);
+
+   for (uint32_t x = 4; x >= 1; x /= 2) {
+       for (uint32_t y = 4; y >= 1; y /= 2) {
+         append_rate(sample_counts, x, y);
+      }
+   }
+
+#undef append_rate
+
+   return vk_outarray_status(&out);
+}
index 18d7cbc..7a4fe83 100644 (file)
@@ -109,6 +109,11 @@ void genX(emit_multisample)(struct anv_batch *batch, uint32_t samples,
 void genX(emit_sample_pattern)(struct anv_batch *batch, uint32_t samples,
                                const VkSampleLocationEXT *locations);
 
+void genX(emit_shading_rate)(struct anv_batch *batch,
+                             const struct anv_graphics_pipeline *pipeline,
+                             struct anv_state cps_states,
+                             struct anv_dynamic_state *dynamic_state);
+
 void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
                                 struct anv_address dst, struct anv_address src,
                                 uint32_t size);
index 6f9a1bb..5cb8900 100644 (file)
@@ -171,6 +171,7 @@ anv_shader_compile_to_nir(struct anv_device *device,
          .vk_memory_model = true,
          .vk_memory_model_device_scope = true,
          .workgroup_memory_explicit_layout = true,
+         .fragment_shading_rate = pdevice->info.ver >= 11,
       },
       .ubo_addr_format =
          anv_nir_ubo_addr_format(pdevice, device->robust_buffer_access),
@@ -322,6 +323,8 @@ void anv_DestroyPipeline(
 
       if (gfx_pipeline->blend_state.map)
          anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->blend_state);
+      if (gfx_pipeline->cps_state.map)
+         anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->cps_state);
 
       for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
          if (gfx_pipeline->shaders[s])
@@ -458,14 +461,42 @@ populate_gs_prog_key(const struct intel_device_info *devinfo,
    populate_base_prog_key(devinfo, flags, robust_buffer_acccess, &key->base);
 }
 
+static bool
+pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline,
+                          const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info)
+{
+   if (pipeline->sample_shading_enable)
+      return false;
+
+   /* Not dynamic & not specified for the pipeline. */
+   if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 && !fsr_info)
+      return false;
+
+   /* Not dynamic & pipeline has a 1x1 fragment shading rate with no
+    * possibility for element of the pipeline to change the value.
+    */
+   if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 &&
+       fsr_info->fragmentSize.width <= 1 &&
+       fsr_info->fragmentSize.height <= 1 &&
+       fsr_info->combinerOps[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR &&
+       fsr_info->combinerOps[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR)
+      return false;
+
+   return true;
+}
+
 static void
-populate_wm_prog_key(const struct intel_device_info *devinfo,
+populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
                      VkPipelineShaderStageCreateFlags flags,
                      bool robust_buffer_acccess,
                      const struct anv_subpass *subpass,
                      const VkPipelineMultisampleStateCreateInfo *ms_info,
+                     const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info,
                      struct brw_wm_prog_key *key)
 {
+   const struct anv_device *device = pipeline->base.device;
+   const struct intel_device_info *devinfo = &device->info;
+
    memset(key, 0, sizeof(*key));
 
    populate_base_prog_key(devinfo, flags, robust_buffer_acccess, &key->base);
@@ -513,6 +544,10 @@ populate_wm_prog_key(const struct intel_device_info *devinfo,
 
       key->frag_coord_adds_sample_pos = key->persample_interp;
    }
+
+   key->coarse_pixel =
+      device->vk.enabled_extensions.KHR_fragment_shading_rate &&
+      pipeline_has_coarse_pixel(pipeline, fsr_info);
 }
 
 static void
@@ -1304,10 +1339,12 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
       case MESA_SHADER_FRAGMENT: {
          const bool raster_enabled =
             !info->pRasterizationState->rasterizerDiscardEnable;
-         populate_wm_prog_key(devinfo, sinfo->flags,
+         populate_wm_prog_key(pipeline, sinfo->flags,
                               pipeline->base.device->robust_buffer_access,
                               pipeline->subpass,
                               raster_enabled ? info->pMultisampleState : NULL,
+                              vk_find_struct_const(info->pNext,
+                                                   PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR),
                               &stages[stage].key.wm);
          break;
       }
@@ -1438,6 +1475,24 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
          goto fail;
       }
 
+      /* This is rather ugly.
+       *
+       * Any variable annotated as interpolated by sample essentially disables
+       * coarse pixel shading. Unfortunately the CTS tests exercising this set
+       * the varying value in the previous stage using a constant. Our NIR
+       * infrastructure is clever enough to lookup variables across stages and
+       * constant fold, removing the variable. So in order to comply with CTS
+       * we have check variables here.
+       */
+      if (s == MESA_SHADER_FRAGMENT) {
+         nir_foreach_variable_in_list(var, &stages[s].nir->variables) {
+            if (var->data.sample) {
+               stages[s].key.wm.coarse_pixel = false;
+               break;
+            }
+         }
+      }
+
       stages[s].feedback.duration += os_time_get_nano() - stage_start;
    }
 
@@ -1861,14 +1916,7 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
 
    pipeline->dynamic_state = default_dynamic_state;
 
-   if (pCreateInfo->pDynamicState) {
-      /* Remove all of the states that are marked as dynamic */
-      uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
-      for (uint32_t s = 0; s < count; s++) {
-         states &= ~anv_cmd_dirty_bit_for_vk_dynamic_state(
-            pCreateInfo->pDynamicState->pDynamicStates[s]);
-      }
-   }
+   states &= ~pipeline->dynamic_states;
 
    struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
 
@@ -2100,14 +2148,22 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
       }
    }
 
+   const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_state =
+      vk_find_struct_const(pCreateInfo->pNext,
+                           PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR);
+   if (fsr_state) {
+      if (states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE)
+         dynamic->fragment_shading_rate = fsr_state->fragmentSize;
+   }
+
    pipeline->dynamic_state_mask = states;
 
-   /* For now that only state that can be either dynamic or baked in the
-    * pipeline is the sample location & color blend.
+   /* Mark states that can either be dynamic or fully baked into the pipeline.
     */
    pipeline->static_state_mask = states &
       (ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS |
-       ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE);
+       ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |
+       ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE);
 }
 
 static void
@@ -2210,7 +2266,17 @@ anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
 
    assert(pCreateInfo->pRasterizationState);
 
+   pipeline->dynamic_states = 0;
+   if (pCreateInfo->pDynamicState) {
+      /* Remove all of the states that are marked as dynamic */
+      uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
+      for (uint32_t s = 0; s < count; s++) {
+         pipeline->dynamic_states |= anv_cmd_dirty_bit_for_vk_dynamic_state(
+            pCreateInfo->pDynamicState->pDynamicStates[s]);
+      }
+   }
    copy_non_dynamic_state(pipeline, pCreateInfo);
+
    pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState->depthClampEnable;
 
    /* Previously we enabled depth clipping when !depthClampEnable.
index 7c262f4..b6289b7 100644 (file)
@@ -2240,6 +2240,7 @@ enum anv_cmd_dirty_bits {
    ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP                  = 1 << 23, /* VK_DYNAMIC_STATE_STENCIL_OP_EXT */
    ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS            = 1 << 24, /* VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT */
    ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE           = 1 << 25, /* VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT */
+   ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE                = 1 << 26, /* VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR */
 };
 typedef uint32_t anv_cmd_dirty_mask_t;
 
@@ -2265,7 +2266,8 @@ typedef uint32_t anv_cmd_dirty_mask_t;
     ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE |         \
     ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP |                  \
     ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS |            \
-    ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE)
+    ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |           \
+    ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE)
 
 static inline enum anv_cmd_dirty_bits
 anv_cmd_dirty_bit_for_vk_dynamic_state(VkDynamicState vk_state)
@@ -2317,6 +2319,8 @@ anv_cmd_dirty_bit_for_vk_dynamic_state(VkDynamicState vk_state)
       return ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
    case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
       return ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE;
+   case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR:
+      return ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE;
    default:
       assert(!"Unsupported dynamic state");
       return 0;
@@ -2646,6 +2650,8 @@ struct anv_dynamic_state {
       VkSampleLocationEXT                       locations[MAX_SAMPLE_LOCATIONS];
    } sample_locations;
 
+   VkExtent2D                                   fragment_shading_rate;
+
    VkCullModeFlags                              cull_mode;
    VkFrontFace                                  front_face;
    VkPrimitiveTopology                          primitive_topology;
@@ -3358,6 +3364,9 @@ struct anv_graphics_pipeline {
 
    struct anv_dynamic_state                     dynamic_state;
 
+   /* States declared dynamic at pipeline creation. */
+   anv_cmd_dirty_mask_t                         dynamic_states;
+
    uint32_t                                     topology;
 
    struct anv_subpass *                         subpass;
@@ -3385,6 +3394,8 @@ struct anv_graphics_pipeline {
 
    struct anv_state                             blend_state;
 
+   struct anv_state                             cps_state;
+
    uint32_t                                     vb_used;
    struct anv_pipeline_vertex_binding {
       uint32_t                                  stride;
index 9e05d52..b6a4cea 100644 (file)
@@ -374,8 +374,12 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
 
       assert(0 <= input_index);
 
-      /* gl_Viewport and gl_Layer are stored in the VUE header */
-      if (attr == VARYING_SLOT_VIEWPORT || attr == VARYING_SLOT_LAYER) {
+      /* gl_Viewport, gl_Layer and FragmentShadingRateKHR are stored in the
+       * VUE header
+       */
+      if (attr == VARYING_SLOT_VIEWPORT ||
+          attr == VARYING_SLOT_LAYER ||
+          attr == VARYING_SLOT_PRIMITIVE_SHADING_RATE) {
          continue;
       }
 
@@ -828,6 +832,25 @@ emit_ms_state(struct anv_graphics_pipeline *pipeline,
    anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
       sm.SampleMask = sample_mask;
    }
+
+   pipeline->cps_state = ANV_STATE_NULL;
+#if GFX_VER >= 11
+   if (!(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) &&
+       pipeline->base.device->vk.enabled_extensions.KHR_fragment_shading_rate) {
+#if GFX_VER >= 12
+      struct anv_device *device = pipeline->base.device;
+      const uint32_t num_dwords =
+         GENX(CPS_STATE_length) * 4 * pipeline->dynamic_state.viewport.count;
+      pipeline->cps_state =
+         anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords, 32);
+#endif
+
+      genX(emit_shading_rate)(&pipeline->base.batch,
+                              pipeline,
+                              pipeline->cps_state,
+                              &pipeline->dynamic_state);
+   }
+#endif
 }
 
 static const uint32_t vk_to_intel_logic_op[] = {
@@ -1571,12 +1594,16 @@ emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline,
 
          int varying = output->location;
          uint8_t component_mask = output->component_mask;
-         /* VARYING_SLOT_PSIZ contains three scalar fields packed together:
-          * - VARYING_SLOT_LAYER    in VARYING_SLOT_PSIZ.y
-          * - VARYING_SLOT_VIEWPORT in VARYING_SLOT_PSIZ.z
-          * - VARYING_SLOT_PSIZ     in VARYING_SLOT_PSIZ.w
+         /* VARYING_SLOT_PSIZ contains four scalar fields packed together:
+          * - VARYING_SLOT_PRIMITIVE_SHADING_RATE in VARYING_SLOT_PSIZ.x
+          * - VARYING_SLOT_LAYER                  in VARYING_SLOT_PSIZ.y
+          * - VARYING_SLOT_VIEWPORT               in VARYING_SLOT_PSIZ.z
+          * - VARYING_SLOT_PSIZ                   in VARYING_SLOT_PSIZ.w
           */
-         if (varying == VARYING_SLOT_LAYER) {
+         if (varying == VARYING_SLOT_PRIMITIVE_SHADING_RATE) {
+            varying = VARYING_SLOT_PSIZ;
+            component_mask = 1 << 0; // SO_DECL_COMPMASK_X
+         } else if (varying == VARYING_SLOT_LAYER) {
             varying = VARYING_SLOT_PSIZ;
             component_mask = 1 << 1; // SO_DECL_COMPMASK_Y
          } else if (varying == VARYING_SLOT_VIEWPORT) {
@@ -2250,6 +2277,8 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
       assert(!wm_prog_data->inner_coverage); /* Not available in SPIR-V */
       if (!wm_prog_data->uses_sample_mask)
          ps.InputCoverageMaskState = ICMS_NONE;
+      else if (wm_prog_data->per_coarse_pixel_dispatch)
+         ps.InputCoverageMaskState  = ICMS_NORMAL;
       else if (wm_prog_data->post_depth_coverage)
          ps.InputCoverageMaskState = ICMS_DEPTH_COVERAGE;
       else
@@ -2257,6 +2286,12 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
 #else
       ps.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
 #endif
+
+#if GFX_VER >= 11
+      ps.PixelShaderRequiresSourceDepthandorWPlaneCoefficients =
+         wm_prog_data->uses_depth_w_coefficients;
+      ps.PixelShaderIsPerCoarsePixel = wm_prog_data->per_coarse_pixel_dispatch;
+#endif
    }
 }
 
index fd91f9b..d677f22 100644 (file)
@@ -576,6 +576,47 @@ genX(emit_sample_pattern)(struct anv_batch *batch, uint32_t samples,
 }
 #endif
 
+#if GFX_VER >= 11
+void
+genX(emit_shading_rate)(struct anv_batch *batch,
+                        const struct anv_graphics_pipeline *pipeline,
+                        struct anv_state cps_states,
+                        struct anv_dynamic_state *dynamic_state)
+{
+   const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
+   const bool cps_enable = wm_prog_data && wm_prog_data->per_coarse_pixel_dispatch;
+
+#if GFX_VER == 11
+   anv_batch_emit(batch, GENX(3DSTATE_CPS), cps) {
+      cps.CoarsePixelShadingMode = cps_enable ? CPS_MODE_CONSTANT : CPS_MODE_NONE;
+      if (cps_enable) {
+         cps.MinCPSizeX = dynamic_state->fragment_shading_rate.width;
+         cps.MinCPSizeY = dynamic_state->fragment_shading_rate.height;
+      }
+   }
+#elif GFX_VER == 12
+   for (uint32_t i = 0; i < dynamic_state->viewport.count; i++) {
+      uint32_t *cps_state_dwords =
+         cps_states.map + GENX(CPS_STATE_length) * 4 * i;
+      struct GENX(CPS_STATE) cps_state = {
+         .CoarsePixelShadingMode = cps_enable ? CPS_MODE_CONSTANT : CPS_MODE_NONE,
+      };
+
+      if (cps_enable) {
+         cps_state.MinCPSizeX = dynamic_state->fragment_shading_rate.width;
+         cps_state.MinCPSizeY = dynamic_state->fragment_shading_rate.height;
+      }
+
+      GENX(CPS_STATE_pack)(NULL, cps_state_dwords, &cps_state);
+   }
+
+   anv_batch_emit(batch, GENX(3DSTATE_CPS_POINTERS), cps) {
+      cps.CoarsePixelShadingStateArrayPointer = cps_states.offset;
+   }
+#endif
+}
+#endif /* GFX_VER >= 11 */
+
 static uint32_t
 vk_to_intel_tex_filter(VkFilter filter, bool anisotropyEnable)
 {
@@ -771,6 +812,10 @@ VkResult genX(CreateSampler)(
          .SamplerDisable = false,
          .TextureBorderColorMode = DX10OGL,
 
+#if GFX_VER >= 11
+         .CPSLODCompensationEnable = true,
+#endif
+
 #if GFX_VER >= 8
          .LODPreClampMode = CLAMP_MODE_OGL,
 #else
index b750dea..67a8a2e 100644 (file)
@@ -722,6 +722,23 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
       }
    }
 
+#if GFX_VER >= 11
+   if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) {
+      struct anv_state cps_states = ANV_STATE_NULL;
+
+#if GFX_VER >= 12
+      uint32_t count = cmd_buffer->state.gfx.dynamic.viewport.count;
+      cps_states =
+         anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
+                                            GENX(CPS_STATE_length) * 4 * count,
+                                            32);
+#endif /* GFX_VER >= 12 */
+
+      genX(emit_shading_rate)(&cmd_buffer->batch, pipeline, cps_states,
+                              &cmd_buffer->state.gfx.dynamic);
+   }
+#endif /* GFX_VER >= 11 */
+
    cmd_buffer->state.gfx.dirty = 0;
 }