Available on Gen11+.
v2: Order shading rate in correct order (Samuel)
v3: Move CPS_STATE emission to genX_state.c
v4: Don't override various output structures (Jason)
v5: Rebase on top master (Lionel)
v6: Fix invalid VkPhysicalDeviceFragmentShadingRatePropertiesKHR
(min|max)FragmentShadingRateAttachmentTexelSize values (Ken)
Drop #endif comment
v7: Limit extension to Gfx11+ (Lionel)
Support conservative raster (Lionel)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7455>
ANV_CMP_COPY(color_writes, ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE);
+ ANV_CMP_COPY(fragment_shading_rate.width, ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE);
+ ANV_CMP_COPY(fragment_shading_rate.height, ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE);
+
#undef ANV_CMP_COPY
return changed;
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE;
}
}
+
+void anv_CmdSetFragmentShadingRateKHR(
+ VkCommandBuffer commandBuffer,
+ const VkExtent2D* pFragmentSize,
+ const VkFragmentShadingRateCombinerOpKHR combinerOps[2])
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ cmd_buffer->state.gfx.dynamic.fragment_shading_rate = *pFragmentSize;
+ cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE;
+}
.KHR_external_memory_fd = true,
.KHR_external_semaphore = true,
.KHR_external_semaphore_fd = true,
+ .KHR_fragment_shading_rate = device->info.ver >= 11,
.KHR_get_memory_requirements2 = true,
.KHR_image_format_list = true,
.KHR_imageless_framebuffer = true,
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
+ VkPhysicalDeviceFragmentShadingRateFeaturesKHR *features =
+ (VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext;
+ features->pipelineFragmentShadingRate = true;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ROBUSTNESS_FEATURES_EXT: {
VkPhysicalDeviceImageRobustnessFeaturesEXT *features =
(VkPhysicalDeviceImageRobustnessFeaturesEXT *)ext;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: {
+ VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props =
+ (VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext;
+ if (pdevice->info.ver < 11) {
+ props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
+ props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
+ props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
+ props->primitiveFragmentShadingRateWithMultipleViewports = false;
+ props->layeredShadingRateAttachments = false;
+ props->fragmentShadingRateNonTrivialCombinerOps = true;
+ props->maxFragmentSize = (VkExtent2D) { 1, 1 };
+ props->maxFragmentSizeAspectRatio = 1;
+ props->maxFragmentShadingRateCoverageSamples = 0;
+ props->maxFragmentShadingRateRasterizationSamples = 0;
+ props->fragmentShadingRateWithShaderDepthStencilWrites = false;
+ props->fragmentShadingRateWithSampleMask = false;
+ props->fragmentShadingRateWithShaderSampleMask = false;
+ props->fragmentShadingRateWithConservativeRasterization = true;
+ props->fragmentShadingRateWithFragmentShaderInterlock = false;
+ props->fragmentShadingRateWithCustomSampleLocations = false;
+ props->fragmentShadingRateStrictMultiplyCombiner = false;
+ } else {
+ props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 1, 1 };
+ props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 4, 4 };
+ props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
+ props->primitiveFragmentShadingRateWithMultipleViewports = pdevice->info.ver >= 12;
+ props->layeredShadingRateAttachments = false;
+ props->fragmentShadingRateNonTrivialCombinerOps = true;
+ props->maxFragmentSize = (VkExtent2D) { 4, 4 };
+ props->maxFragmentSizeAspectRatio = 4;
+ props->maxFragmentShadingRateCoverageSamples = 4 * 4;
+ props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_16_BIT;
+ props->fragmentShadingRateWithShaderDepthStencilWrites = false;
+ props->fragmentShadingRateWithSampleMask = true;
+ props->fragmentShadingRateWithShaderSampleMask = false;
+ props->fragmentShadingRateWithConservativeRasterization = true;
+ props->fragmentShadingRateWithFragmentShaderInterlock = true;
+ props->fragmentShadingRateWithCustomSampleLocations = true;
+ props->fragmentShadingRateStrictMultiplyCombiner = false;
+ }
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR: {
VkPhysicalDeviceDriverPropertiesKHR *properties =
(VkPhysicalDeviceDriverPropertiesKHR *) ext;
*pSupportedVersion = MIN2(*pSupportedVersion, 4u);
return VK_SUCCESS;
}
+
+VkResult anv_GetPhysicalDeviceFragmentShadingRatesKHR(
+ VkPhysicalDevice physicalDevice,
+ uint32_t* pFragmentShadingRateCount,
+ VkPhysicalDeviceFragmentShadingRateKHR* pFragmentShadingRates)
+{
+ ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
+ VK_OUTARRAY_MAKE(out, pFragmentShadingRates, pFragmentShadingRateCount);
+
+#define append_rate(_samples, _width, _height) \
+ do { \
+ vk_outarray_append(&out, __r) { \
+ __r->sampleCounts = _samples; \
+ __r->fragmentSize = (VkExtent2D) { \
+ .width = _width, \
+ .height = _height, \
+ }; \
+ } \
+ } while (0)
+
+ VkSampleCountFlags sample_counts =
+ isl_device_get_sample_counts(&physical_device->isl_dev);
+
+ for (uint32_t x = 4; x >= 1; x /= 2) {
+ for (uint32_t y = 4; y >= 1; y /= 2) {
+ append_rate(sample_counts, x, y);
+ }
+ }
+
+#undef append_rate
+
+ return vk_outarray_status(&out);
+}
void genX(emit_sample_pattern)(struct anv_batch *batch, uint32_t samples,
const VkSampleLocationEXT *locations);
+void genX(emit_shading_rate)(struct anv_batch *batch,
+ const struct anv_graphics_pipeline *pipeline,
+ struct anv_state cps_states,
+ struct anv_dynamic_state *dynamic_state);
+
void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address dst, struct anv_address src,
uint32_t size);
.vk_memory_model = true,
.vk_memory_model_device_scope = true,
.workgroup_memory_explicit_layout = true,
+ .fragment_shading_rate = pdevice->info.ver >= 11,
},
.ubo_addr_format =
anv_nir_ubo_addr_format(pdevice, device->robust_buffer_access),
if (gfx_pipeline->blend_state.map)
anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->blend_state);
+ if (gfx_pipeline->cps_state.map)
+ anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->cps_state);
for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
if (gfx_pipeline->shaders[s])
populate_base_prog_key(devinfo, flags, robust_buffer_acccess, &key->base);
}
+static bool
+pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline,
+ const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info)
+{
+ if (pipeline->sample_shading_enable)
+ return false;
+
+ /* Not dynamic & not specified for the pipeline. */
+ if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 && !fsr_info)
+ return false;
+
+ /* Not dynamic & pipeline has a 1x1 fragment shading rate with no
+ * possibility for element of the pipeline to change the value.
+ */
+ if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 &&
+ fsr_info->fragmentSize.width <= 1 &&
+ fsr_info->fragmentSize.height <= 1 &&
+ fsr_info->combinerOps[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR &&
+ fsr_info->combinerOps[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR)
+ return false;
+
+ return true;
+}
+
static void
-populate_wm_prog_key(const struct intel_device_info *devinfo,
+populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
VkPipelineShaderStageCreateFlags flags,
bool robust_buffer_acccess,
const struct anv_subpass *subpass,
const VkPipelineMultisampleStateCreateInfo *ms_info,
+ const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info,
struct brw_wm_prog_key *key)
{
+ const struct anv_device *device = pipeline->base.device;
+ const struct intel_device_info *devinfo = &device->info;
+
memset(key, 0, sizeof(*key));
populate_base_prog_key(devinfo, flags, robust_buffer_acccess, &key->base);
key->frag_coord_adds_sample_pos = key->persample_interp;
}
+
+ key->coarse_pixel =
+ device->vk.enabled_extensions.KHR_fragment_shading_rate &&
+ pipeline_has_coarse_pixel(pipeline, fsr_info);
}
static void
case MESA_SHADER_FRAGMENT: {
const bool raster_enabled =
!info->pRasterizationState->rasterizerDiscardEnable;
- populate_wm_prog_key(devinfo, sinfo->flags,
+ populate_wm_prog_key(pipeline, sinfo->flags,
pipeline->base.device->robust_buffer_access,
pipeline->subpass,
raster_enabled ? info->pMultisampleState : NULL,
+ vk_find_struct_const(info->pNext,
+ PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR),
&stages[stage].key.wm);
break;
}
goto fail;
}
+ /* This is rather ugly.
+ *
+ * Any variable annotated as interpolated by sample essentially disables
+ * coarse pixel shading. Unfortunately the CTS tests exercising this set
+ * the varying value in the previous stage using a constant. Our NIR
+ * infrastructure is clever enough to lookup variables across stages and
+ * constant fold, removing the variable. So in order to comply with CTS
+ * we have check variables here.
+ */
+ if (s == MESA_SHADER_FRAGMENT) {
+ nir_foreach_variable_in_list(var, &stages[s].nir->variables) {
+ if (var->data.sample) {
+ stages[s].key.wm.coarse_pixel = false;
+ break;
+ }
+ }
+ }
+
stages[s].feedback.duration += os_time_get_nano() - stage_start;
}
pipeline->dynamic_state = default_dynamic_state;
- if (pCreateInfo->pDynamicState) {
- /* Remove all of the states that are marked as dynamic */
- uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
- for (uint32_t s = 0; s < count; s++) {
- states &= ~anv_cmd_dirty_bit_for_vk_dynamic_state(
- pCreateInfo->pDynamicState->pDynamicStates[s]);
- }
- }
+ states &= ~pipeline->dynamic_states;
struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
}
}
+ const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_state =
+ vk_find_struct_const(pCreateInfo->pNext,
+ PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR);
+ if (fsr_state) {
+ if (states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE)
+ dynamic->fragment_shading_rate = fsr_state->fragmentSize;
+ }
+
pipeline->dynamic_state_mask = states;
- /* For now that only state that can be either dynamic or baked in the
- * pipeline is the sample location & color blend.
+ /* Mark states that can either be dynamic or fully baked into the pipeline.
*/
pipeline->static_state_mask = states &
(ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS |
- ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE);
+ ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |
+ ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE);
}
static void
assert(pCreateInfo->pRasterizationState);
+ pipeline->dynamic_states = 0;
+ if (pCreateInfo->pDynamicState) {
+ /* Remove all of the states that are marked as dynamic */
+ uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
+ for (uint32_t s = 0; s < count; s++) {
+ pipeline->dynamic_states |= anv_cmd_dirty_bit_for_vk_dynamic_state(
+ pCreateInfo->pDynamicState->pDynamicStates[s]);
+ }
+ }
copy_non_dynamic_state(pipeline, pCreateInfo);
+
pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState->depthClampEnable;
/* Previously we enabled depth clipping when !depthClampEnable.
ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1 << 23, /* VK_DYNAMIC_STATE_STENCIL_OP_EXT */
ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1 << 24, /* VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT */
ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE = 1 << 25, /* VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT */
+ ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE = 1 << 26, /* VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR */
};
typedef uint32_t anv_cmd_dirty_mask_t;
ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | \
ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP | \
ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS | \
- ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE)
+ ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE | \
+ ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE)
static inline enum anv_cmd_dirty_bits
anv_cmd_dirty_bit_for_vk_dynamic_state(VkDynamicState vk_state)
return ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
return ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE;
+ case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR:
+ return ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE;
default:
assert(!"Unsupported dynamic state");
return 0;
VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
} sample_locations;
+ VkExtent2D fragment_shading_rate;
+
VkCullModeFlags cull_mode;
VkFrontFace front_face;
VkPrimitiveTopology primitive_topology;
struct anv_dynamic_state dynamic_state;
+ /* States declared dynamic at pipeline creation. */
+ anv_cmd_dirty_mask_t dynamic_states;
+
uint32_t topology;
struct anv_subpass * subpass;
struct anv_state blend_state;
+ struct anv_state cps_state;
+
uint32_t vb_used;
struct anv_pipeline_vertex_binding {
uint32_t stride;
assert(0 <= input_index);
- /* gl_Viewport and gl_Layer are stored in the VUE header */
- if (attr == VARYING_SLOT_VIEWPORT || attr == VARYING_SLOT_LAYER) {
+ /* gl_Viewport, gl_Layer and FragmentShadingRateKHR are stored in the
+ * VUE header
+ */
+ if (attr == VARYING_SLOT_VIEWPORT ||
+ attr == VARYING_SLOT_LAYER ||
+ attr == VARYING_SLOT_PRIMITIVE_SHADING_RATE) {
continue;
}
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
sm.SampleMask = sample_mask;
}
+
+ pipeline->cps_state = ANV_STATE_NULL;
+#if GFX_VER >= 11
+ if (!(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) &&
+ pipeline->base.device->vk.enabled_extensions.KHR_fragment_shading_rate) {
+#if GFX_VER >= 12
+ struct anv_device *device = pipeline->base.device;
+ const uint32_t num_dwords =
+ GENX(CPS_STATE_length) * 4 * pipeline->dynamic_state.viewport.count;
+ pipeline->cps_state =
+ anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords, 32);
+#endif
+
+ genX(emit_shading_rate)(&pipeline->base.batch,
+ pipeline,
+ pipeline->cps_state,
+ &pipeline->dynamic_state);
+ }
+#endif
}
static const uint32_t vk_to_intel_logic_op[] = {
int varying = output->location;
uint8_t component_mask = output->component_mask;
- /* VARYING_SLOT_PSIZ contains three scalar fields packed together:
- * - VARYING_SLOT_LAYER in VARYING_SLOT_PSIZ.y
- * - VARYING_SLOT_VIEWPORT in VARYING_SLOT_PSIZ.z
- * - VARYING_SLOT_PSIZ in VARYING_SLOT_PSIZ.w
+ /* VARYING_SLOT_PSIZ contains four scalar fields packed together:
+ * - VARYING_SLOT_PRIMITIVE_SHADING_RATE in VARYING_SLOT_PSIZ.x
+ * - VARYING_SLOT_LAYER in VARYING_SLOT_PSIZ.y
+ * - VARYING_SLOT_VIEWPORT in VARYING_SLOT_PSIZ.z
+ * - VARYING_SLOT_PSIZ in VARYING_SLOT_PSIZ.w
*/
- if (varying == VARYING_SLOT_LAYER) {
+ if (varying == VARYING_SLOT_PRIMITIVE_SHADING_RATE) {
+ varying = VARYING_SLOT_PSIZ;
+ component_mask = 1 << 0; // SO_DECL_COMPMASK_X
+ } else if (varying == VARYING_SLOT_LAYER) {
varying = VARYING_SLOT_PSIZ;
component_mask = 1 << 1; // SO_DECL_COMPMASK_Y
} else if (varying == VARYING_SLOT_VIEWPORT) {
assert(!wm_prog_data->inner_coverage); /* Not available in SPIR-V */
if (!wm_prog_data->uses_sample_mask)
ps.InputCoverageMaskState = ICMS_NONE;
+ else if (wm_prog_data->per_coarse_pixel_dispatch)
+ ps.InputCoverageMaskState = ICMS_NORMAL;
else if (wm_prog_data->post_depth_coverage)
ps.InputCoverageMaskState = ICMS_DEPTH_COVERAGE;
else
#else
ps.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
#endif
+
+#if GFX_VER >= 11
+ ps.PixelShaderRequiresSourceDepthandorWPlaneCoefficients =
+ wm_prog_data->uses_depth_w_coefficients;
+ ps.PixelShaderIsPerCoarsePixel = wm_prog_data->per_coarse_pixel_dispatch;
+#endif
}
}
}
#endif
+#if GFX_VER >= 11
+void
+genX(emit_shading_rate)(struct anv_batch *batch,
+ const struct anv_graphics_pipeline *pipeline,
+ struct anv_state cps_states,
+ struct anv_dynamic_state *dynamic_state)
+{
+ const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
+ const bool cps_enable = wm_prog_data && wm_prog_data->per_coarse_pixel_dispatch;
+
+#if GFX_VER == 11
+ anv_batch_emit(batch, GENX(3DSTATE_CPS), cps) {
+ cps.CoarsePixelShadingMode = cps_enable ? CPS_MODE_CONSTANT : CPS_MODE_NONE;
+ if (cps_enable) {
+ cps.MinCPSizeX = dynamic_state->fragment_shading_rate.width;
+ cps.MinCPSizeY = dynamic_state->fragment_shading_rate.height;
+ }
+ }
+#elif GFX_VER == 12
+ for (uint32_t i = 0; i < dynamic_state->viewport.count; i++) {
+ uint32_t *cps_state_dwords =
+ cps_states.map + GENX(CPS_STATE_length) * 4 * i;
+ struct GENX(CPS_STATE) cps_state = {
+ .CoarsePixelShadingMode = cps_enable ? CPS_MODE_CONSTANT : CPS_MODE_NONE,
+ };
+
+ if (cps_enable) {
+ cps_state.MinCPSizeX = dynamic_state->fragment_shading_rate.width;
+ cps_state.MinCPSizeY = dynamic_state->fragment_shading_rate.height;
+ }
+
+ GENX(CPS_STATE_pack)(NULL, cps_state_dwords, &cps_state);
+ }
+
+ anv_batch_emit(batch, GENX(3DSTATE_CPS_POINTERS), cps) {
+ cps.CoarsePixelShadingStateArrayPointer = cps_states.offset;
+ }
+#endif
+}
+#endif /* GFX_VER >= 11 */
+
static uint32_t
vk_to_intel_tex_filter(VkFilter filter, bool anisotropyEnable)
{
.SamplerDisable = false,
.TextureBorderColorMode = DX10OGL,
+#if GFX_VER >= 11
+ .CPSLODCompensationEnable = true,
+#endif
+
#if GFX_VER >= 8
.LODPreClampMode = CLAMP_MODE_OGL,
#else
}
}
+#if GFX_VER >= 11
+ if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) {
+ struct anv_state cps_states = ANV_STATE_NULL;
+
+#if GFX_VER >= 12
+ uint32_t count = cmd_buffer->state.gfx.dynamic.viewport.count;
+ cps_states =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
+ GENX(CPS_STATE_length) * 4 * count,
+ 32);
+#endif /* GFX_VER >= 12 */
+
+ genX(emit_shading_rate)(&cmd_buffer->batch, pipeline, cps_states,
+ &cmd_buffer->state.gfx.dynamic);
+ }
+#endif /* GFX_VER >= 11 */
+
cmd_buffer->state.gfx.dirty = 0;
}