From 168b13364f052be00d2015b6851d6c84e61064e9 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 27 Apr 2022 23:44:40 +0300 Subject: [PATCH] anv: rework sample location MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit On Gfx7 we can only give the sample location for a given multisample number. This means everytime the multisampling value changes, we have to re-emit the locations. It's fine because it's also where (3DSTATE_MULTISAMPLE) the number of samples is stored. On Gfx8+ though, 3DSTATE_MULTISAMPLE only holds the number of samples and all the sample locations for all number of samples are located in 3DSTATE_SAMPLE_PATTERN. So to be more effecient there, we need to track the locations for all sample numbers and compare new values with the relevant sample count when touching the dynamic state. Signed-off-by: Lionel Landwerlin Reviewed-by: Tapani Pälli Part-of: --- src/intel/vulkan/anv_cmd_buffer.c | 58 ++++++++++++++---- src/intel/vulkan/anv_genX.h | 8 ++- src/intel/vulkan/anv_pipeline.c | 23 ++++++-- src/intel/vulkan/anv_private.h | 24 +++++++- src/intel/vulkan/genX_cmd_buffer.c | 12 ++++ src/intel/vulkan/genX_pipeline.c | 18 ------ src/intel/vulkan/genX_state.c | 117 +++++++++++++------------------------ src/intel/vulkan/gfx7_cmd_buffer.c | 6 +- src/intel/vulkan/gfx8_cmd_buffer.c | 8 +-- 9 files changed, 153 insertions(+), 121 deletions(-) diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index ea62f8b..e549cdc 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -107,6 +107,23 @@ const struct anv_dynamic_state default_dynamic_state = { .logic_op = 0, }; +void +anv_dynamic_state_init(struct anv_dynamic_state *state) +{ + *state = default_dynamic_state; + +#define INIT_LOCATIONS(idx) \ + memcpy(state->sample_locations.locations_##idx, \ + intel_sample_positions_##idx##x, \ + sizeof(state->sample_locations.locations_##idx)) + INIT_LOCATIONS(1); + INIT_LOCATIONS(2); + INIT_LOCATIONS(4); + INIT_LOCATIONS(8); + INIT_LOCATIONS(16); +#undef INIT_LOCATIONS +} + /** * Copy the dynamic state from src to dest based on the copy_mask. * @@ -199,10 +216,26 @@ anv_dynamic_state_copy(struct anv_dynamic_state *dest, ANV_CMP_COPY(logic_op, ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP); if (copy_mask & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) { - typed_memcpy(dest->sample_locations.locations, - src->sample_locations.locations, - ARRAY_SIZE(src->sample_locations.locations)); - changed |= ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS; +#define ANV_CMP_COPY_LOCATIONS(idx) \ + if (memcmp(dest->sample_locations.locations_##idx, \ + src->sample_locations.locations_##idx, \ + sizeof(src->sample_locations.locations_##idx))) { \ + typed_memcpy(dest->sample_locations.locations_##idx, \ + src->sample_locations.locations_##idx, \ + ARRAY_SIZE(src->sample_locations.locations_##idx)); \ + changed |= ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS; \ + } + + switch (src->sample_locations.pipeline_samples) { + case 1: ANV_CMP_COPY_LOCATIONS(1); break; + case 2: ANV_CMP_COPY_LOCATIONS(2); break; + case 4: ANV_CMP_COPY_LOCATIONS(4); break; + case 8: ANV_CMP_COPY_LOCATIONS(8); break; + case 16: ANV_CMP_COPY_LOCATIONS(16); break; + default: unreachable("invalid sample count"); + } + +#undef ANV_CMP_COPY_LOCATIONS } ANV_CMP_COPY(color_writes, ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE); @@ -226,7 +259,7 @@ anv_cmd_state_init(struct anv_cmd_buffer *cmd_buffer) state->current_pipeline = UINT32_MAX; state->restart_index = UINT32_MAX; - state->gfx.dynamic = default_dynamic_state; + anv_dynamic_state_init(&state->gfx.dynamic); } static void @@ -855,11 +888,16 @@ void anv_CmdSetSampleLocationsEXT( struct anv_dynamic_state *dyn_state = &cmd_buffer->state.gfx.dynamic; uint32_t samples = pSampleLocationsInfo->sampleLocationsPerPixel; - - typed_memcpy(dyn_state->sample_locations.locations, - pSampleLocationsInfo->pSampleLocations, samples); - - cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS; + struct intel_sample_position *positions = + anv_dynamic_state_get_sample_locations(dyn_state, samples); + for (uint32_t i = 0; i < samples; i++) { + if (positions[i].x != pSampleLocationsInfo->pSampleLocations[i].x || + positions[i].y != pSampleLocationsInfo->pSampleLocations[i].y) { + positions[i].x = pSampleLocationsInfo->pSampleLocations[i].x; + positions[i].y = pSampleLocationsInfo->pSampleLocations[i].y; + cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS; + } + } } void anv_CmdSetLineStippleEXT( diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 1a65879..95b9eda 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -36,6 +36,8 @@ #error This file is included by means other than anv_private.h #endif +struct intel_sample_positions; + extern const uint32_t genX(vk_to_intel_cullmode)[]; extern const uint32_t genX(vk_to_intel_front_face)[]; @@ -129,10 +131,10 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch, enum intel_urb_deref_block_size *deref_block_size); void genX(emit_multisample)(struct anv_batch *batch, uint32_t samples, - const VkSampleLocationEXT *locations); + const struct intel_sample_position *positions); -void genX(emit_sample_pattern)(struct anv_batch *batch, uint32_t samples, - const VkSampleLocationEXT *locations); +void genX(emit_sample_pattern)(struct anv_batch *batch, + const struct anv_dynamic_state *dynamic_state); void genX(emit_shading_rate)(struct anv_batch *batch, const struct anv_graphics_pipeline *pipeline, diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index d8f3f81..e6eaf5a 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -2110,7 +2110,7 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline, { anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL; - pipeline->dynamic_state = default_dynamic_state; + anv_dynamic_state_init(&pipeline->dynamic_state); states &= ~pipeline->dynamic_states; @@ -2314,22 +2314,33 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline, const VkPipelineSampleLocationsStateCreateInfoEXT *sl_info = ms_info ? vk_find_struct_const(ms_info, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT) : NULL; - uint32_t samples = ms_info ? ms_info->rasterizationSamples : 1; + uint32_t samples = MAX2(1, ms_info ? ms_info->rasterizationSamples : 1); + struct intel_sample_position *locations; + switch (samples) { + case 1: locations = dynamic->sample_locations.locations_1; break; + case 2: locations = dynamic->sample_locations.locations_2; break; + case 4: locations = dynamic->sample_locations.locations_4; break; + case 8: locations = dynamic->sample_locations.locations_8; break; + case 16: locations = dynamic->sample_locations.locations_16; break; + default: unreachable("invalid sample count"); + } + if (sl_info) { const VkSampleLocationEXT *positions = sl_info->sampleLocationsInfo.pSampleLocations; for (uint32_t i = 0; i < samples; i++) { - dynamic->sample_locations.locations[i].x = positions[i].x; - dynamic->sample_locations.locations[i].y = positions[i].y; + locations[i].x = positions[i].x; + locations[i].y = positions[i].y; } } else { const struct intel_sample_position *positions = intel_get_sample_positions(samples); for (uint32_t i = 0; i < samples; i++) { - dynamic->sample_locations.locations[i].x = positions[i].x; - dynamic->sample_locations.locations[i].y = positions[i].y; + locations[i].x = positions[i].x; + locations[i].y = positions[i].y; } } + dynamic->sample_locations.pipeline_samples = samples; } if (states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) { diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index d8f1982..9b01538 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -49,6 +49,7 @@ #include "common/intel_gem.h" #include "common/intel_l3_config.h" #include "common/intel_measure.h" +#include "common/intel_sample_positions.h" #include "dev/intel_device_info.h" #include "blorp/blorp.h" #include "compiler/brw_compiler.h" @@ -2707,7 +2708,13 @@ struct anv_dynamic_state { } line_stipple; struct { - VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS]; + struct intel_sample_position locations_1[1]; + struct intel_sample_position locations_2[2]; + struct intel_sample_position locations_4[4]; + struct intel_sample_position locations_8[8]; + struct intel_sample_position locations_16[16]; + /* Only valid on the pipeline dynamic state */ + unsigned pipeline_samples; } sample_locations; struct { @@ -2736,10 +2743,25 @@ struct anv_dynamic_state { extern const struct anv_dynamic_state default_dynamic_state; +void anv_dynamic_state_init(struct anv_dynamic_state *state); uint32_t anv_dynamic_state_copy(struct anv_dynamic_state *dest, const struct anv_dynamic_state *src, uint32_t copy_mask); +static inline struct intel_sample_position * +anv_dynamic_state_get_sample_locations(struct anv_dynamic_state *state, + unsigned samples) +{ + switch (samples) { + case 1: return state->sample_locations.locations_1; break; + case 2: return state->sample_locations.locations_2; break; + case 4: return state->sample_locations.locations_4; break; + case 8: return state->sample_locations.locations_8; break; + case 16: return state->sample_locations.locations_16; break; + default: unreachable("invalid sample count"); + } +} + struct anv_surface_state { struct anv_state state; /** Address of the surface referred to by this state diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index f1fbfe0..cc34261 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1788,6 +1788,18 @@ genX(BeginCommandBuffer)( cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS; } +#if GFX_VER >= 8 + /* Emit the sample pattern at the beginning of the batch because the + * default locations emitted at the device initialization might have been + * changed by a previous command buffer. + * + * Do not change that when we're continuing a previous renderpass. + */ + if (cmd_buffer->device->vk.enabled_extensions.EXT_sample_locations && + !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) + genX(emit_sample_pattern)(&cmd_buffer->batch, NULL); +#endif + #if GFX_VERx10 >= 75 if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { const VkCommandBufferInheritanceConditionalRenderingInfoEXT *conditional_rendering_info = diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 0d774cf..8685b98 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -929,24 +929,6 @@ emit_ms_state(struct anv_graphics_pipeline *pipeline, NULL); #endif - /* If EXT_sample_locations is enabled and the sample locations are not - * dynamic, then we need to emit those position in the pipeline batch. On - * Gfx8+ this is part of 3DSTATE_SAMPLE_PATTERN, prior to that this is in - * 3DSTATE_MULTISAMPLE. - */ - if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations && - !(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)) { -#if GFX_VER >= 8 - genX(emit_sample_pattern)(&pipeline->base.batch, - pipeline->rasterization_samples, - pipeline->dynamic_state.sample_locations.locations); -#else - genX(emit_multisample)(&pipeline->base.batch, - pipeline->rasterization_samples, - pipeline->dynamic_state.sample_locations.locations); -#endif - } - /* From the Vulkan 1.0 spec: * If pSampleMask is NULL, it is treated as if the mask has all bits * enabled, i.e. no coverage is removed from fragments. diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c index 25fa9db..ab9d5f1 100644 --- a/src/intel/vulkan/genX_state.c +++ b/src/intel/vulkan/genX_state.c @@ -256,7 +256,7 @@ init_render_queue_state(struct anv_queue *queue) #if GFX_VER >= 8 anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY), ck); - genX(emit_sample_pattern)(&batch, 0, NULL); + genX(emit_sample_pattern)(&batch, NULL); /* The BDW+ docs describe how to use the 3DSTATE_WM_HZ_OP instruction in the * section titled, "Optimized Depth Buffer Clear and/or Stencil Buffer @@ -646,7 +646,7 @@ genX(emit_l3_config)(struct anv_batch *batch, void genX(emit_multisample)(struct anv_batch *batch, uint32_t samples, - const VkSampleLocationEXT *locations) + const struct intel_sample_position *positions) { anv_batch_emit(batch, GENX(3DSTATE_MULTISAMPLE), ms) { ms.NumberofMultisamples = __builtin_ffs(samples) - 1; @@ -660,41 +660,21 @@ genX(emit_multisample)(struct anv_batch *batch, uint32_t samples, */ ms.PixelPositionOffsetEnable = false; #else - - if (locations) { - switch (samples) { - case 1: - INTEL_SAMPLE_POS_1X_ARRAY(ms.Sample, locations); - break; - case 2: - INTEL_SAMPLE_POS_2X_ARRAY(ms.Sample, locations); - break; - case 4: - INTEL_SAMPLE_POS_4X_ARRAY(ms.Sample, locations); - break; - case 8: - INTEL_SAMPLE_POS_8X_ARRAY(ms.Sample, locations); - break; - default: - break; - } - } else { - switch (samples) { - case 1: - INTEL_SAMPLE_POS_1X(ms.Sample); - break; - case 2: - INTEL_SAMPLE_POS_2X(ms.Sample); - break; - case 4: - INTEL_SAMPLE_POS_4X(ms.Sample); - break; - case 8: - INTEL_SAMPLE_POS_8X(ms.Sample); - break; - default: + switch (samples) { + case 1: + INTEL_SAMPLE_POS_1X_ARRAY(ms.Sample, positions); + break; + case 2: + INTEL_SAMPLE_POS_2X_ARRAY(ms.Sample, positions); + break; + case 4: + INTEL_SAMPLE_POS_4X_ARRAY(ms.Sample, positions); + break; + case 8: + INTEL_SAMPLE_POS_8X_ARRAY(ms.Sample, positions); + break; + default: break; - } } #endif } @@ -702,53 +682,38 @@ genX(emit_multisample)(struct anv_batch *batch, uint32_t samples, #if GFX_VER >= 8 void -genX(emit_sample_pattern)(struct anv_batch *batch, uint32_t samples, - const VkSampleLocationEXT *locations) +genX(emit_sample_pattern)(struct anv_batch *batch, + const struct anv_dynamic_state *d) { /* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and * VkPhysicalDeviceFeatures::standardSampleLocations. */ anv_batch_emit(batch, GENX(3DSTATE_SAMPLE_PATTERN), sp) { - if (locations) { - /* The Skylake PRM Vol. 2a "3DSTATE_SAMPLE_PATTERN" says: - * - * "When programming the sample offsets (for NUMSAMPLES_4 or _8 - * and MSRASTMODE_xxx_PATTERN), the order of the samples 0 to 3 - * (or 7 for 8X, or 15 for 16X) must have monotonically increasing - * distance from the pixel center. This is required to get the - * correct centroid computation in the device." - * - * However, the Vulkan spec seems to require that the the samples - * occur in the order provided through the API. The standard sample - * patterns have the above property that they have monotonically - * increasing distances from the center but client-provided ones do - * not. As long as this only affects centroid calculations as the - * docs say, we should be ok because OpenGL and Vulkan only require - * that the centroid be some lit sample and that it's the same for - * all samples in a pixel; they have no requirement that it be the - * one closest to center. - */ - switch (samples) { - case 1: - INTEL_SAMPLE_POS_1X_ARRAY(sp._1xSample, locations); - break; - case 2: - INTEL_SAMPLE_POS_2X_ARRAY(sp._2xSample, locations); - break; - case 4: - INTEL_SAMPLE_POS_4X_ARRAY(sp._4xSample, locations); - break; - case 8: - INTEL_SAMPLE_POS_8X_ARRAY(sp._8xSample, locations); - break; + /* The Skylake PRM Vol. 2a "3DSTATE_SAMPLE_PATTERN" says: + * + * "When programming the sample offsets (for NUMSAMPLES_4 or _8 + * and MSRASTMODE_xxx_PATTERN), the order of the samples 0 to 3 + * (or 7 for 8X, or 15 for 16X) must have monotonically increasing + * distance from the pixel center. This is required to get the + * correct centroid computation in the device." + * + * However, the Vulkan spec seems to require that the the samples occur + * in the order provided through the API. The standard sample patterns + * have the above property that they have monotonically increasing + * distances from the center but client-provided ones do not. As long as + * this only affects centroid calculations as the docs say, we should be + * ok because OpenGL and Vulkan only require that the centroid be some + * lit sample and that it's the same for all samples in a pixel; they + * have no requirement that it be the one closest to center. + */ + if (d) { + INTEL_SAMPLE_POS_1X_ARRAY(sp._1xSample, d->sample_locations.locations_1); + INTEL_SAMPLE_POS_2X_ARRAY(sp._2xSample, d->sample_locations.locations_2); + INTEL_SAMPLE_POS_4X_ARRAY(sp._4xSample, d->sample_locations.locations_4); + INTEL_SAMPLE_POS_8X_ARRAY(sp._8xSample, d->sample_locations.locations_8); #if GFX_VER >= 9 - case 16: - INTEL_SAMPLE_POS_16X_ARRAY(sp._16xSample, locations); - break; + INTEL_SAMPLE_POS_16X_ARRAY(sp._16xSample, d->sample_locations.locations_16); #endif - default: - break; - } } else { INTEL_SAMPLE_POS_1X(sp._1xSample); INTEL_SAMPLE_POS_2X(sp._2xSample); diff --git a/src/intel/vulkan/gfx7_cmd_buffer.c b/src/intel/vulkan/gfx7_cmd_buffer.c index 3d0b142..6f9d619 100644 --- a/src/intel/vulkan/gfx7_cmd_buffer.c +++ b/src/intel/vulkan/gfx7_cmd_buffer.c @@ -301,10 +301,12 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx7.wm); } - if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) { + if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)) { genX(emit_multisample)(&cmd_buffer->batch, pipeline->rasterization_samples, - d->sample_locations.locations); + anv_dynamic_state_get_sample_locations(d, + pipeline->rasterization_samples)); } if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE | diff --git a/src/intel/vulkan/gfx8_cmd_buffer.c b/src/intel/vulkan/gfx8_cmd_buffer.c index 68ca350..4f5c118 100644 --- a/src/intel/vulkan/gfx8_cmd_buffer.c +++ b/src/intel/vulkan/gfx8_cmd_buffer.c @@ -622,11 +622,9 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) } #endif - if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) { - genX(emit_sample_pattern)(&cmd_buffer->batch, - pipeline->rasterization_samples, - d->sample_locations.locations); - } + if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations && + cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) + genX(emit_sample_pattern)(&cmd_buffer->batch, d); if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE | -- 2.7.4