#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
#include "genxml/genX_rt_pack.h"
-#include "common/intel_guardband.h"
-#include "compiler/brw_prim.h"
#include "common/intel_genX_state.h"
#include "ds/intel_tracepoints.h"
}
#endif
-static void
-cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
-{
- const struct vk_dynamic_graphics_state *dyn =
- &cmd_buffer->vk.dynamic_graphics_state;
-
- if (!(cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) &&
- !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) &&
- !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE) &&
- !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT) &&
- !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_POLYGON_MODE) &&
- !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX))
- return;
-
- struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
-
- anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_CLIP),
- pipeline->gfx8.clip, clip) {
- /* Take dynamic primitive topology in to account with
- * 3DSTATE_CLIP::ViewportXYClipTestEnable
- */
- const VkPolygonMode dynamic_raster_mode =
- genX(raster_polygon_mode)(pipeline,
- dyn->rs.polygon_mode,
- dyn->ia.primitive_topology);
- const bool xy_clip_test_enable =
- (dynamic_raster_mode == VK_POLYGON_MODE_FILL);
-
- clip.APIMode = dyn->vp.depth_clip_negative_one_to_one ?
- APIMODE_OGL : APIMODE_D3D;
- clip.ViewportXYClipTestEnable = xy_clip_test_enable;
-
- ANV_SETUP_PROVOKING_VERTEX(clip, dyn->rs.provoking_vertex);
-
- /* TODO(mesh): Multiview. */
- if (anv_pipeline_is_primitive(pipeline)) {
- const struct brw_vue_prog_data *last =
- anv_pipeline_get_last_vue_prog_data(pipeline);
- if (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT) {
- clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ?
- dyn->vp.viewport_count - 1 : 0;
- }
- } else if (anv_pipeline_is_mesh(pipeline)) {
- const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
- if (mesh_prog_data->map.start_dw[VARYING_SLOT_VIEWPORT] >= 0) {
- clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ?
- dyn->vp.viewport_count - 1 : 0;
- }
- }
- }
-}
-
-static void
-cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer)
-{
- struct anv_instance *instance = cmd_buffer->device->physical->instance;
- struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
- const struct vk_dynamic_graphics_state *dyn =
- &cmd_buffer->vk.dynamic_graphics_state;
- uint32_t count = dyn->vp.viewport_count;
- const VkViewport *viewports = dyn->vp.viewports;
- struct anv_state sf_clip_state =
- anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64);
-
- const float scale = dyn->vp.depth_clip_negative_one_to_one ? 0.5f : 1.0f;
-
- for (uint32_t i = 0; i < count; i++) {
- const VkViewport *vp = &viewports[i];
-
- /* The gfx7 state struct has just the matrix and guardband fields, the
- * gfx8 struct adds the min/max viewport fields. */
- struct GENX(SF_CLIP_VIEWPORT) sfv = {
- .ViewportMatrixElementm00 = vp->width / 2,
- .ViewportMatrixElementm11 = vp->height / 2,
- .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) * scale,
- .ViewportMatrixElementm30 = vp->x + vp->width / 2,
- .ViewportMatrixElementm31 = vp->y + vp->height / 2,
- .ViewportMatrixElementm32 = dyn->vp.depth_clip_negative_one_to_one ?
- (vp->minDepth + vp->maxDepth) * scale : vp->minDepth,
- .XMinClipGuardband = -1.0f,
- .XMaxClipGuardband = 1.0f,
- .YMinClipGuardband = -1.0f,
- .YMaxClipGuardband = 1.0f,
- .XMinViewPort = vp->x,
- .XMaxViewPort = vp->x + vp->width - 1,
- .YMinViewPort = MIN2(vp->y, vp->y + vp->height),
- .YMaxViewPort = MAX2(vp->y, vp->y + vp->height) - 1,
- };
-
- /* Fix depth test misrenderings by lowering translated depth range */
- if (instance->lower_depth_range_rate != 1.0f)
- sfv.ViewportMatrixElementm32 *= instance->lower_depth_range_rate;
-
- const uint32_t fb_size_max = 1 << 14;
- uint32_t x_min = 0, x_max = fb_size_max;
- uint32_t y_min = 0, y_max = fb_size_max;
-
- /* If we have a valid renderArea, include that */
- if (gfx->render_area.extent.width > 0 &&
- gfx->render_area.extent.height > 0) {
- x_min = MAX2(x_min, gfx->render_area.offset.x);
- x_max = MIN2(x_max, gfx->render_area.offset.x +
- gfx->render_area.extent.width);
- y_min = MAX2(y_min, gfx->render_area.offset.y);
- y_max = MIN2(y_max, gfx->render_area.offset.y +
- gfx->render_area.extent.height);
- }
-
- /* The client is required to have enough scissors for whatever it sets
- * as ViewportIndex but it's possible that they've got more viewports
- * set from a previous command. Also, from the Vulkan 1.3.207:
- *
- * "The application must ensure (using scissor if necessary) that
- * all rendering is contained within the render area."
- *
- * If the client doesn't set a scissor, that basically means it
- * guarantees everything is in-bounds already. If we end up using a
- * guardband of [-1, 1] in that case, there shouldn't be much loss.
- * It's theoretically possible that they could do all their clipping
- * with clip planes but that'd be a bit odd.
- */
- if (i < dyn->vp.scissor_count) {
- const VkRect2D *scissor = &dyn->vp.scissors[i];
- x_min = MAX2(x_min, scissor->offset.x);
- x_max = MIN2(x_max, scissor->offset.x + scissor->extent.width);
- y_min = MAX2(y_min, scissor->offset.y);
- y_max = MIN2(y_max, scissor->offset.y + scissor->extent.height);
- }
-
- /* Only bother calculating the guardband if our known render area is
- * less than the maximum size. Otherwise, it will calculate [-1, 1]
- * anyway but possibly with precision loss.
- */
- if (x_min > 0 || x_max < fb_size_max ||
- y_min > 0 || y_max < fb_size_max) {
- intel_calculate_guardband_size(x_min, x_max, y_min, y_max,
- sfv.ViewportMatrixElementm00,
- sfv.ViewportMatrixElementm11,
- sfv.ViewportMatrixElementm30,
- sfv.ViewportMatrixElementm31,
- &sfv.XMinClipGuardband,
- &sfv.XMaxClipGuardband,
- &sfv.YMinClipGuardband,
- &sfv.YMaxClipGuardband);
- }
-
- GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, &sfv);
- }
-
- anv_batch_emit(&cmd_buffer->batch,
- GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) {
- clip.SFClipViewportPointer = sf_clip_state.offset;
- }
-}
-
-static void
-cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer)
-{
- const struct vk_dynamic_graphics_state *dyn =
- &cmd_buffer->vk.dynamic_graphics_state;
- uint32_t count = dyn->vp.viewport_count;
- const VkViewport *viewports = dyn->vp.viewports;
- struct anv_state cc_state =
- anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32);
-
- for (uint32_t i = 0; i < count; i++) {
- const VkViewport *vp = &viewports[i];
-
- /* From the Vulkan spec:
- *
- * "It is valid for minDepth to be greater than or equal to
- * maxDepth."
- */
- float min_depth = MIN2(vp->minDepth, vp->maxDepth);
- float max_depth = MAX2(vp->minDepth, vp->maxDepth);
-
- struct GENX(CC_VIEWPORT) cc_viewport = {
- .MinimumDepth = dyn->rs.depth_clamp_enable ? min_depth : 0.0f,
- .MaximumDepth = dyn->rs.depth_clamp_enable ? max_depth : 1.0f,
- };
-
- GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);
- }
-
- anv_batch_emit(&cmd_buffer->batch,
- GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
- cc.CCViewportPointer = cc_state.offset;
- }
-}
-
-static void
-cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer)
-{
- struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
- const struct vk_dynamic_graphics_state *dyn =
- &cmd_buffer->vk.dynamic_graphics_state;
- uint32_t count = dyn->vp.scissor_count;
- const VkRect2D *scissors = dyn->vp.scissors;
- const VkViewport *viewports = dyn->vp.viewports;
-
- /* Wa_1409725701:
- * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
- * stored as an array of up to 16 elements. The location of first
- * element of the array, as specified by Pointer to SCISSOR_RECT, should
- * be aligned to a 64-byte boundary.
- */
- uint32_t alignment = 64;
- struct anv_state scissor_state =
- anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, alignment);
-
- for (uint32_t i = 0; i < count; i++) {
- const VkRect2D *s = &scissors[i];
- const VkViewport *vp = &viewports[i];
-
- /* Since xmax and ymax are inclusive, we have to have xmax < xmin or
- * ymax < ymin for empty clips. In case clip x, y, width height are all
- * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't
- * what we want. Just special case empty clips and produce a canonical
- * empty clip. */
- static const struct GENX(SCISSOR_RECT) empty_scissor = {
- .ScissorRectangleYMin = 1,
- .ScissorRectangleXMin = 1,
- .ScissorRectangleYMax = 0,
- .ScissorRectangleXMax = 0
- };
-
- const int max = 0xffff;
-
- uint32_t y_min = MAX2(s->offset.y, MIN2(vp->y, vp->y + vp->height));
- uint32_t x_min = MAX2(s->offset.x, vp->x);
- int64_t y_max = MIN2(s->offset.y + s->extent.height - 1,
- MAX2(vp->y, vp->y + vp->height) - 1);
- int64_t x_max = MIN2(s->offset.x + s->extent.width - 1,
- vp->x + vp->width - 1);
-
- y_max = CLAMP(y_max, 0, INT16_MAX >> 1);
- x_max = CLAMP(x_max, 0, INT16_MAX >> 1);
-
- /* Do this math using int64_t so overflow gets clamped correctly. */
- if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
- y_min = CLAMP((uint64_t) y_min, gfx->render_area.offset.y, max);
- x_min = CLAMP((uint64_t) x_min, gfx->render_area.offset.x, max);
- y_max = CLAMP((uint64_t) y_max, 0,
- gfx->render_area.offset.y +
- gfx->render_area.extent.height - 1);
- x_max = CLAMP((uint64_t) x_max, 0,
- gfx->render_area.offset.x +
- gfx->render_area.extent.width - 1);
- }
-
- const struct GENX(SCISSOR_RECT) scissor = {
- .ScissorRectangleYMin = y_min,
- .ScissorRectangleXMin = x_min,
- .ScissorRectangleYMax = y_max,
- .ScissorRectangleXMax = x_max
- };
-
- if (s->extent.width <= 0 || s->extent.height <= 0) {
- GENX(SCISSOR_RECT_pack)(NULL, scissor_state.map + i * 8,
- &empty_scissor);
- } else {
- GENX(SCISSOR_RECT_pack)(NULL, scissor_state.map + i * 8, &scissor);
- }
- }
-
- anv_batch_emit(&cmd_buffer->batch,
- GENX(3DSTATE_SCISSOR_STATE_POINTERS), ssp) {
- ssp.ScissorRectPointer = scissor_state.offset;
- }
-}
-
ALWAYS_INLINE void
genX(batch_emit_pipe_control)(struct anv_batch *batch,
const struct intel_device_info *devinfo,
#endif
}
-static void
-genX(streamout_prologue)(struct anv_cmd_buffer *cmd_buffer)
-{
-#if GFX_VERx10 >= 120
- /* Wa_16013994831 - Disable preemption during streamout, enable back
- * again if XFB not used by the current pipeline.
- *
- * Although this workaround applies to Gfx12+, we already disable object
- * level preemption for another reason in genX_state.c so we can skip this
- * for Gfx12.
- */
- if (!intel_needs_workaround(cmd_buffer->device->info, 16013994831))
- return;
-
- if (cmd_buffer->state.gfx.pipeline->uses_xfb) {
- genX(cmd_buffer_set_preemption)(cmd_buffer, false);
- return;
- }
-
- if (!cmd_buffer->state.gfx.object_preemption)
- genX(cmd_buffer_set_preemption)(cmd_buffer, true);
-#endif
-}
-
-static void
-cmd_buffer_emit_streamout(struct anv_cmd_buffer *cmd_buffer)
-{
- const struct vk_dynamic_graphics_state *dyn =
- &cmd_buffer->vk.dynamic_graphics_state;
- struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
-
- genX(streamout_prologue)(cmd_buffer);
-
- anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT),
- pipeline->gfx8.streamout_state, so) {
- so.RenderingDisable = dyn->rs.rasterizer_discard_enable;
- so.RenderStreamSelect = dyn->rs.rasterization_stream;
-#if INTEL_NEEDS_WA_18022508906
- /* Wa_18022508906 :
- *
- * SKL PRMs, Volume 7: 3D-Media-GPGPU, Stream Output Logic (SOL) Stage:
- *
- * SOL_INT::Render_Enable =
- * (3DSTATE_STREAMOUT::Force_Rending == Force_On) ||
- * (
- * (3DSTATE_STREAMOUT::Force_Rending != Force_Off) &&
- * !(3DSTATE_GS::Enable && 3DSTATE_GS::Output Vertex Size == 0) &&
- * !3DSTATE_STREAMOUT::API_Render_Disable &&
- * (
- * 3DSTATE_DEPTH_STENCIL_STATE::Stencil_TestEnable ||
- * 3DSTATE_DEPTH_STENCIL_STATE::Depth_TestEnable ||
- * 3DSTATE_DEPTH_STENCIL_STATE::Depth_WriteEnable ||
- * 3DSTATE_PS_EXTRA::PS_Valid ||
- * 3DSTATE_WM::Legacy Depth_Buffer_Clear ||
- * 3DSTATE_WM::Legacy Depth_Buffer_Resolve_Enable ||
- * 3DSTATE_WM::Legacy Hierarchical_Depth_Buffer_Resolve_Enable
- * )
- * )
- *
- * If SOL_INT::Render_Enable is false, the SO stage will not forward any
- * topologies down the pipeline. Which is not what we want for occlusion
- * queries.
- *
- * Here we force rendering to get SOL_INT::Render_Enable when occlusion
- * queries are active.
- */
- if (!so.RenderingDisable && cmd_buffer->state.gfx.n_occlusion_queries > 0)
- so.ForceRendering = Force_on;
-#endif
-
- switch (dyn->rs.provoking_vertex) {
- case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
- so.ReorderMode = LEADING;
- break;
-
- case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
- so.ReorderMode = TRAILING;
- break;
-
- default:
- unreachable("Invalid provoking vertex mode");
- }
- }
-}
-
ALWAYS_INLINE static void
genX(emit_hs)(struct anv_cmd_buffer *cmd_buffer)
{
dirty & VK_SHADER_STAGE_ALL_GRAPHICS);
}
- cmd_buffer_emit_clip(cmd_buffer);
-
- if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
- ANV_CMD_DIRTY_XFB_ENABLE)) ||
- BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE) ||
- BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZATION_STREAM) ||
- BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX))
- cmd_buffer_emit_streamout(cmd_buffer);
-
- if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
- ANV_CMD_DIRTY_RENDER_TARGETS)) ||
- BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS) ||
- BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS) ||
- BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE) ||
- BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE)) {
- cmd_buffer_emit_viewport(cmd_buffer);
- cmd_buffer_emit_depth_viewport(cmd_buffer);
- cmd_buffer_emit_scissor(cmd_buffer);
- }
-
- if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
- BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY)) {
- uint32_t topology;
- if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
- topology = _3DPRIM_PATCHLIST(dyn->ts.patch_control_points);
- else
- topology = genX(vk_to_intel_primitive_type)[dyn->ia.primitive_topology];
-
- cmd_buffer->state.gfx.primitive_topology = topology;
-
- anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) {
- vft.PrimitiveTopologyType = topology;
- }
- }
-
if (any_dynamic_state_dirty || cmd_buffer->state.gfx.dirty)
genX(cmd_buffer_flush_dynamic_state)(cmd_buffer);
}
#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
+#include "common/intel_guardband.h"
+#include "compiler/brw_prim.h"
void
genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
}
}
+static void
+cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
+{
+ const struct vk_dynamic_graphics_state *dyn =
+ &cmd_buffer->vk.dynamic_graphics_state;
+ struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+
+ if (!(cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) &&
+ !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) &&
+ !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE) &&
+ !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT) &&
+ !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_POLYGON_MODE) &&
+ !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX))
+ return;
+
+ anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_CLIP),
+ pipeline->gfx8.clip, clip) {
+ /* Take dynamic primitive topology in to account with
+ * 3DSTATE_CLIP::ViewportXYClipTestEnable
+ */
+ const VkPolygonMode dynamic_raster_mode =
+ genX(raster_polygon_mode)(pipeline,
+ dyn->rs.polygon_mode,
+ dyn->ia.primitive_topology);
+ const bool xy_clip_test_enable =
+ (dynamic_raster_mode == VK_POLYGON_MODE_FILL);
+
+ clip.APIMode = dyn->vp.depth_clip_negative_one_to_one ?
+ APIMODE_OGL : APIMODE_D3D;
+ clip.ViewportXYClipTestEnable = xy_clip_test_enable;
+
+ ANV_SETUP_PROVOKING_VERTEX(clip, dyn->rs.provoking_vertex);
+
+ /* TODO(mesh): Multiview. */
+ if (anv_pipeline_is_primitive(pipeline)) {
+ const struct brw_vue_prog_data *last =
+ anv_pipeline_get_last_vue_prog_data(pipeline);
+ if (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT) {
+ clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ?
+ dyn->vp.viewport_count - 1 : 0;
+ }
+ } else if (anv_pipeline_is_mesh(pipeline)) {
+ const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
+ if (mesh_prog_data->map.start_dw[VARYING_SLOT_VIEWPORT] >= 0) {
+ clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ?
+ dyn->vp.viewport_count - 1 : 0;
+ }
+ }
+ }
+}
+
+static void
+genX(streamout_prologue)(struct anv_cmd_buffer *cmd_buffer)
+{
+#if GFX_VERx10 >= 120
+ /* Wa_16013994831 - Disable preemption during streamout, enable back
+ * again if XFB not used by the current pipeline.
+ *
+ * Although this workaround applies to Gfx12+, we already disable object
+ * level preemption for another reason in genX_state.c so we can skip this
+ * for Gfx12.
+ */
+ if (!intel_needs_workaround(cmd_buffer->device->info, 16013994831))
+ return;
+
+ if (cmd_buffer->state.gfx.pipeline->uses_xfb) {
+ genX(cmd_buffer_set_preemption)(cmd_buffer, false);
+ return;
+ }
+
+ if (!cmd_buffer->state.gfx.object_preemption)
+ genX(cmd_buffer_set_preemption)(cmd_buffer, true);
+#endif
+}
+
+static void
+cmd_buffer_emit_streamout(struct anv_cmd_buffer *cmd_buffer)
+{
+ const struct vk_dynamic_graphics_state *dyn =
+ &cmd_buffer->vk.dynamic_graphics_state;
+ struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+
+ genX(streamout_prologue)(cmd_buffer);
+
+ anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT),
+ pipeline->gfx8.streamout_state, so) {
+ so.RenderingDisable = dyn->rs.rasterizer_discard_enable;
+ so.RenderStreamSelect = dyn->rs.rasterization_stream;
+#if INTEL_NEEDS_WA_18022508906
+ /* Wa_18022508906 :
+ *
+ * SKL PRMs, Volume 7: 3D-Media-GPGPU, Stream Output Logic (SOL) Stage:
+ *
+ * SOL_INT::Render_Enable =
+ * (3DSTATE_STREAMOUT::Force_Rending == Force_On) ||
+ * (
+ * (3DSTATE_STREAMOUT::Force_Rending != Force_Off) &&
+ * !(3DSTATE_GS::Enable && 3DSTATE_GS::Output Vertex Size == 0) &&
+ * !3DSTATE_STREAMOUT::API_Render_Disable &&
+ * (
+ * 3DSTATE_DEPTH_STENCIL_STATE::Stencil_TestEnable ||
+ * 3DSTATE_DEPTH_STENCIL_STATE::Depth_TestEnable ||
+ * 3DSTATE_DEPTH_STENCIL_STATE::Depth_WriteEnable ||
+ * 3DSTATE_PS_EXTRA::PS_Valid ||
+ * 3DSTATE_WM::Legacy Depth_Buffer_Clear ||
+ * 3DSTATE_WM::Legacy Depth_Buffer_Resolve_Enable ||
+ * 3DSTATE_WM::Legacy Hierarchical_Depth_Buffer_Resolve_Enable
+ * )
+ * )
+ *
+ * If SOL_INT::Render_Enable is false, the SO stage will not forward any
+ * topologies down the pipeline. Which is not what we want for occlusion
+ * queries.
+ *
+ * Here we force rendering to get SOL_INT::Render_Enable when occlusion
+ * queries are active.
+ */
+ if (!so.RenderingDisable && cmd_buffer->state.gfx.n_occlusion_queries > 0)
+ so.ForceRendering = Force_on;
+#endif
+
+ switch (dyn->rs.provoking_vertex) {
+ case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
+ so.ReorderMode = LEADING;
+ break;
+
+ case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
+ so.ReorderMode = TRAILING;
+ break;
+
+ default:
+ unreachable("Invalid provoking vertex mode");
+ }
+ }
+}
+
+static void
+cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer)
+{
+ struct anv_instance *instance = cmd_buffer->device->physical->instance;
+ struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
+ const struct vk_dynamic_graphics_state *dyn =
+ &cmd_buffer->vk.dynamic_graphics_state;
+ uint32_t count = dyn->vp.viewport_count;
+ const VkViewport *viewports = dyn->vp.viewports;
+ struct anv_state sf_clip_state =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64);
+
+ const float scale = dyn->vp.depth_clip_negative_one_to_one ? 0.5f : 1.0f;
+
+ for (uint32_t i = 0; i < count; i++) {
+ const VkViewport *vp = &viewports[i];
+
+ /* The gfx7 state struct has just the matrix and guardband fields, the
+ * gfx8 struct adds the min/max viewport fields. */
+ struct GENX(SF_CLIP_VIEWPORT) sfv = {
+ .ViewportMatrixElementm00 = vp->width / 2,
+ .ViewportMatrixElementm11 = vp->height / 2,
+ .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) * scale,
+ .ViewportMatrixElementm30 = vp->x + vp->width / 2,
+ .ViewportMatrixElementm31 = vp->y + vp->height / 2,
+ .ViewportMatrixElementm32 = dyn->vp.depth_clip_negative_one_to_one ?
+ (vp->minDepth + vp->maxDepth) * scale : vp->minDepth,
+ .XMinClipGuardband = -1.0f,
+ .XMaxClipGuardband = 1.0f,
+ .YMinClipGuardband = -1.0f,
+ .YMaxClipGuardband = 1.0f,
+ .XMinViewPort = vp->x,
+ .XMaxViewPort = vp->x + vp->width - 1,
+ .YMinViewPort = MIN2(vp->y, vp->y + vp->height),
+ .YMaxViewPort = MAX2(vp->y, vp->y + vp->height) - 1,
+ };
+
+ /* Fix depth test misrenderings by lowering translated depth range */
+ if (instance->lower_depth_range_rate != 1.0f)
+ sfv.ViewportMatrixElementm32 *= instance->lower_depth_range_rate;
+
+ const uint32_t fb_size_max = 1 << 14;
+ uint32_t x_min = 0, x_max = fb_size_max;
+ uint32_t y_min = 0, y_max = fb_size_max;
+
+ /* If we have a valid renderArea, include that */
+ if (gfx->render_area.extent.width > 0 &&
+ gfx->render_area.extent.height > 0) {
+ x_min = MAX2(x_min, gfx->render_area.offset.x);
+ x_max = MIN2(x_max, gfx->render_area.offset.x +
+ gfx->render_area.extent.width);
+ y_min = MAX2(y_min, gfx->render_area.offset.y);
+ y_max = MIN2(y_max, gfx->render_area.offset.y +
+ gfx->render_area.extent.height);
+ }
+
+ /* The client is required to have enough scissors for whatever it sets
+ * as ViewportIndex but it's possible that they've got more viewports
+ * set from a previous command. Also, from the Vulkan 1.3.207:
+ *
+ * "The application must ensure (using scissor if necessary) that
+ * all rendering is contained within the render area."
+ *
+ * If the client doesn't set a scissor, that basically means it
+ * guarantees everything is in-bounds already. If we end up using a
+ * guardband of [-1, 1] in that case, there shouldn't be much loss.
+ * It's theoretically possible that they could do all their clipping
+ * with clip planes but that'd be a bit odd.
+ */
+ if (i < dyn->vp.scissor_count) {
+ const VkRect2D *scissor = &dyn->vp.scissors[i];
+ x_min = MAX2(x_min, scissor->offset.x);
+ x_max = MIN2(x_max, scissor->offset.x + scissor->extent.width);
+ y_min = MAX2(y_min, scissor->offset.y);
+ y_max = MIN2(y_max, scissor->offset.y + scissor->extent.height);
+ }
+
+ /* Only bother calculating the guardband if our known render area is
+ * less than the maximum size. Otherwise, it will calculate [-1, 1]
+ * anyway but possibly with precision loss.
+ */
+ if (x_min > 0 || x_max < fb_size_max ||
+ y_min > 0 || y_max < fb_size_max) {
+ intel_calculate_guardband_size(x_min, x_max, y_min, y_max,
+ sfv.ViewportMatrixElementm00,
+ sfv.ViewportMatrixElementm11,
+ sfv.ViewportMatrixElementm30,
+ sfv.ViewportMatrixElementm31,
+ &sfv.XMinClipGuardband,
+ &sfv.XMaxClipGuardband,
+ &sfv.YMinClipGuardband,
+ &sfv.YMaxClipGuardband);
+ }
+
+ GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, &sfv);
+ }
+
+ anv_batch_emit(&cmd_buffer->batch,
+ GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) {
+ clip.SFClipViewportPointer = sf_clip_state.offset;
+ }
+}
+
+static void
+cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer)
+{
+ const struct vk_dynamic_graphics_state *dyn =
+ &cmd_buffer->vk.dynamic_graphics_state;
+ uint32_t count = dyn->vp.viewport_count;
+ const VkViewport *viewports = dyn->vp.viewports;
+ struct anv_state cc_state =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32);
+
+ for (uint32_t i = 0; i < count; i++) {
+ const VkViewport *vp = &viewports[i];
+
+ /* From the Vulkan spec:
+ *
+ * "It is valid for minDepth to be greater than or equal to
+ * maxDepth."
+ */
+ float min_depth = MIN2(vp->minDepth, vp->maxDepth);
+ float max_depth = MAX2(vp->minDepth, vp->maxDepth);
+
+ struct GENX(CC_VIEWPORT) cc_viewport = {
+ .MinimumDepth = dyn->rs.depth_clamp_enable ? min_depth : 0.0f,
+ .MaximumDepth = dyn->rs.depth_clamp_enable ? max_depth : 1.0f,
+ };
+
+ GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);
+ }
+
+ anv_batch_emit(&cmd_buffer->batch,
+ GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
+ cc.CCViewportPointer = cc_state.offset;
+ }
+}
+
+static void
+cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer)
+{
+ struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
+ const struct vk_dynamic_graphics_state *dyn =
+ &cmd_buffer->vk.dynamic_graphics_state;
+ uint32_t count = dyn->vp.scissor_count;
+ const VkRect2D *scissors = dyn->vp.scissors;
+ const VkViewport *viewports = dyn->vp.viewports;
+
+ /* Wa_1409725701:
+ * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
+ * stored as an array of up to 16 elements. The location of first
+ * element of the array, as specified by Pointer to SCISSOR_RECT, should
+ * be aligned to a 64-byte boundary.
+ */
+ uint32_t alignment = 64;
+ struct anv_state scissor_state =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, alignment);
+
+ for (uint32_t i = 0; i < count; i++) {
+ const VkRect2D *s = &scissors[i];
+ const VkViewport *vp = &viewports[i];
+
+ /* Since xmax and ymax are inclusive, we have to have xmax < xmin or
+ * ymax < ymin for empty clips. In case clip x, y, width height are all
+ * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't
+ * what we want. Just special case empty clips and produce a canonical
+ * empty clip. */
+ static const struct GENX(SCISSOR_RECT) empty_scissor = {
+ .ScissorRectangleYMin = 1,
+ .ScissorRectangleXMin = 1,
+ .ScissorRectangleYMax = 0,
+ .ScissorRectangleXMax = 0
+ };
+
+ const int max = 0xffff;
+
+ uint32_t y_min = MAX2(s->offset.y, MIN2(vp->y, vp->y + vp->height));
+ uint32_t x_min = MAX2(s->offset.x, vp->x);
+ int64_t y_max = MIN2(s->offset.y + s->extent.height - 1,
+ MAX2(vp->y, vp->y + vp->height) - 1);
+ int64_t x_max = MIN2(s->offset.x + s->extent.width - 1,
+ vp->x + vp->width - 1);
+
+ y_max = CLAMP(y_max, 0, INT16_MAX >> 1);
+ x_max = CLAMP(x_max, 0, INT16_MAX >> 1);
+
+ /* Do this math using int64_t so overflow gets clamped correctly. */
+ if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
+ y_min = CLAMP((uint64_t) y_min, gfx->render_area.offset.y, max);
+ x_min = CLAMP((uint64_t) x_min, gfx->render_area.offset.x, max);
+ y_max = CLAMP((uint64_t) y_max, 0,
+ gfx->render_area.offset.y +
+ gfx->render_area.extent.height - 1);
+ x_max = CLAMP((uint64_t) x_max, 0,
+ gfx->render_area.offset.x +
+ gfx->render_area.extent.width - 1);
+ }
+
+ const struct GENX(SCISSOR_RECT) scissor = {
+ .ScissorRectangleYMin = y_min,
+ .ScissorRectangleXMin = x_min,
+ .ScissorRectangleYMax = y_max,
+ .ScissorRectangleXMax = x_max
+ };
+
+ if (s->extent.width <= 0 || s->extent.height <= 0) {
+ GENX(SCISSOR_RECT_pack)(NULL, scissor_state.map + i * 8,
+ &empty_scissor);
+ } else {
+ GENX(SCISSOR_RECT_pack)(NULL, scissor_state.map + i * 8, &scissor);
+ }
+ }
+
+ anv_batch_emit(&cmd_buffer->batch,
+ GENX(3DSTATE_SCISSOR_STATE_POINTERS), ssp) {
+ ssp.ScissorRectPointer = scissor_state.offset;
+ }
+}
+
void
genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
{
struct vk_dynamic_graphics_state *dyn =
&cmd_buffer->vk.dynamic_graphics_state;
+ cmd_buffer_emit_clip(cmd_buffer);
+
+ if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
+ ANV_CMD_DIRTY_XFB_ENABLE)) ||
+ BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE) ||
+ BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZATION_STREAM) ||
+ BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX))
+ cmd_buffer_emit_streamout(cmd_buffer);
+
+ if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
+ ANV_CMD_DIRTY_RENDER_TARGETS)) ||
+ BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS) ||
+ BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS) ||
+ BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE) ||
+ BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE)) {
+ cmd_buffer_emit_viewport(cmd_buffer);
+ cmd_buffer_emit_depth_viewport(cmd_buffer);
+ cmd_buffer_emit_scissor(cmd_buffer);
+ }
+
+ if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
+ BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY)) {
+ uint32_t topology;
+ if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
+ topology = _3DPRIM_PATCHLIST(dyn->ts.patch_control_points);
+ else
+ topology = genX(vk_to_intel_primitive_type)[dyn->ia.primitive_topology];
+
+ cmd_buffer->state.gfx.primitive_topology = topology;
+
+ anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) {
+ vft.PrimitiveTopologyType = topology;
+ }
+ }
+
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI)) {
const uint32_t ve_count =