From efff73422049d738b4adfaa34d5fde78a2805d5e Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Tue, 14 Jul 2020 10:38:09 -0400 Subject: [PATCH] turnip: multiViewport and VK_EXT_shader_viewport_index_layer Passes at least: dEQP-VK.dynamic_state.vp_state.viewport_array dEQP-VK.draw.shader_viewport_index.* dEQP-VK.draw.shader_layer.* Signed-off-by: Jonathan Marek Part-of: --- src/freedreno/vulkan/tu_cmd_buffer.c | 16 +-- src/freedreno/vulkan/tu_device.c | 6 +- src/freedreno/vulkan/tu_extensions.py | 1 + src/freedreno/vulkan/tu_pipeline.c | 193 +++++++++++++++++++--------------- src/freedreno/vulkan/tu_private.h | 14 ++- src/freedreno/vulkan/tu_shader.c | 1 + 6 files changed, 136 insertions(+), 95 deletions(-) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index f77f849..7a67f5d 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -2025,11 +2025,13 @@ tu_CmdSetViewport(VkCommandBuffer commandBuffer, const VkViewport *pViewports) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_VIEWPORT, 18); + struct tu_cs cs; - assert(firstViewport == 0 && viewportCount == 1); + memcpy(&cmd->state.viewport[firstViewport], pViewports, viewportCount * sizeof(*pViewports)); + cmd->state.max_viewport = MAX2(cmd->state.max_viewport, firstViewport + viewportCount); - tu6_emit_viewport(&cs, pViewports); + cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_VIEWPORT, 8 + 10 * cmd->state.max_viewport); + tu6_emit_viewport(&cs, cmd->state.viewport, cmd->state.max_viewport); } void @@ -2039,11 +2041,13 @@ tu_CmdSetScissor(VkCommandBuffer commandBuffer, const VkRect2D *pScissors) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_SCISSOR, 3); + struct tu_cs cs; - assert(firstScissor == 0 && scissorCount == 1); + memcpy(&cmd->state.scissor[firstScissor], pScissors, scissorCount * sizeof(*pScissors)); + cmd->state.max_scissor = MAX2(cmd->state.max_scissor, firstScissor + scissorCount); - tu6_emit_scissor(&cs, pScissors); + cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_SCISSOR, 1 + 2 * cmd->state.max_scissor); + tu6_emit_scissor(&cs, cmd->state.scissor, cmd->state.max_scissor); } void diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index ac5c95c..11390fa 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -389,7 +389,7 @@ tu_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, .wideLines = false, .largePoints = true, .alphaToOne = true, - .multiViewport = false, + .multiViewport = true, .samplerAnisotropy = true, .textureCompressionETC2 = true, .textureCompressionASTC_LDR = true, @@ -490,8 +490,8 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, features->vulkanMemoryModel = false; features->vulkanMemoryModelDeviceScope = false; features->vulkanMemoryModelAvailabilityVisibilityChains = false; - features->shaderOutputViewportIndex = false; - features->shaderOutputLayer = false; + features->shaderOutputViewportIndex = true; + features->shaderOutputLayer = true; features->subgroupBroadcastDynamicId = false; break; } diff --git a/src/freedreno/vulkan/tu_extensions.py b/src/freedreno/vulkan/tu_extensions.py index 16722a3..c95167e 100644 --- a/src/freedreno/vulkan/tu_extensions.py +++ b/src/freedreno/vulkan/tu_extensions.py @@ -94,6 +94,7 @@ EXTENSIONS = [ Extension('VK_EXT_custom_border_color', 12, True), Extension('VK_KHR_multiview', 1, True), Extension('VK_EXT_host_query_reset', 1, True), + Extension('VK_EXT_shader_viewport_index_layer', 1, True), ] MAX_API_VERSION = VkVersion(MAX_API_VERSION) diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 4b2be41..f2dbe97 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -831,16 +831,22 @@ tu6_emit_vpc(struct tu_cs *cs, ir3_find_output_regid(last_shader, VARYING_SLOT_PSIZ); const uint32_t layer_regid = ir3_find_output_regid(last_shader, VARYING_SLOT_LAYER); + const uint32_t view_regid = + ir3_find_output_regid(last_shader, VARYING_SLOT_VIEWPORT); uint32_t primitive_regid = gs ? ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID) : regid(63, 0); uint32_t flags_regid = gs ? ir3_find_output_regid(gs, VARYING_SLOT_GS_VERTEX_FLAGS_IR3) : 0; - uint32_t pointsize_loc = 0xff, position_loc = 0xff, layer_loc = 0xff; + uint32_t pointsize_loc = 0xff, position_loc = 0xff, layer_loc = 0xff, view_loc = 0xff; if (layer_regid != regid(63, 0)) { layer_loc = linkage.max_loc; ir3_link_add(&linkage, layer_regid, 0x1, linkage.max_loc); } + if (view_regid != regid(63, 0)) { + view_loc = linkage.max_loc; + ir3_link_add(&linkage, view_regid, 0x1, linkage.max_loc); + } if (position_regid != regid(63, 0)) { position_loc = linkage.max_loc; ir3_link_add(&linkage, position_regid, 0xf, linkage.max_loc); @@ -895,6 +901,7 @@ tu6_emit_vpc(struct tu_cs *cs, tu_cs_emit(cs, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) | CONDREG(pointsize_regid, A6XX_PC_VS_OUT_CNTL_PSIZE) | CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) | + CONDREG(view_regid, A6XX_PC_VS_OUT_CNTL_VIEW) | CONDREG(primitive_regid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID)); tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_primitive_cntl, 1); @@ -902,10 +909,12 @@ tu6_emit_vpc(struct tu_cs *cs, A6XX_SP_GS_PRIMITIVE_CNTL_FLAGS_REGID(flags_regid)); tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl, 1); - tu_cs_emit(cs, A6XX_VPC_GS_LAYER_CNTL_LAYERLOC(layer_loc) | 0xff00); + tu_cs_emit(cs, A6XX_VPC_VS_LAYER_CNTL_LAYERLOC(layer_loc) | + A6XX_VPC_VS_LAYER_CNTL_VIEWLOC(view_loc)); tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_layer_cntl, 1); - tu_cs_emit(cs, CONDREG(layer_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER)); + tu_cs_emit(cs, CONDREG(layer_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER) | + CONDREG(view_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_VIEW)); tu_cs_emit_regs(cs, A6XX_PC_PRIMID_PASSTHRU(primid_passthru)); @@ -1547,63 +1556,73 @@ tu6_emit_vertex_input(struct tu_cs *cs, } void -tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport) +tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewports, uint32_t num_viewport) { - float offsets[3]; - float scales[3]; - scales[0] = viewport->width / 2.0f; - scales[1] = viewport->height / 2.0f; - scales[2] = viewport->maxDepth - viewport->minDepth; - offsets[0] = viewport->x + scales[0]; - offsets[1] = viewport->y + scales[1]; - offsets[2] = viewport->minDepth; - - VkOffset2D min; - VkOffset2D max; - min.x = (int32_t) viewport->x; - max.x = (int32_t) ceilf(viewport->x + viewport->width); - if (viewport->height >= 0.0f) { - min.y = (int32_t) viewport->y; - max.y = (int32_t) ceilf(viewport->y + viewport->height); - } else { - min.y = (int32_t)(viewport->y + viewport->height); - max.y = (int32_t) ceilf(viewport->y); - } - /* the spec allows viewport->height to be 0.0f */ - if (min.y == max.y) - max.y++; - assert(min.x >= 0 && min.x < max.x); - assert(min.y >= 0 && min.y < max.y); - - VkExtent2D guardband_adj; - guardband_adj.width = fd_calc_guardband(offsets[0], scales[0], false); - guardband_adj.height = fd_calc_guardband(offsets[1], scales[1], false); + VkExtent2D guardband = {511, 511}; + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_VPORT_XOFFSET(0), num_viewport * 6); + for (uint32_t i = 0; i < num_viewport; i++) { + const VkViewport *viewport = &viewports[i]; + float offsets[3]; + float scales[3]; + scales[0] = viewport->width / 2.0f; + scales[1] = viewport->height / 2.0f; + scales[2] = viewport->maxDepth - viewport->minDepth; + offsets[0] = viewport->x + scales[0]; + offsets[1] = viewport->y + scales[1]; + offsets[2] = viewport->minDepth; + for (uint32_t j = 0; j < 3; j++) { + tu_cs_emit(cs, fui(offsets[j])); + tu_cs_emit(cs, fui(scales[j])); + } - tu_cs_emit_regs(cs, - A6XX_GRAS_CL_VPORT_XOFFSET(0, offsets[0]), - A6XX_GRAS_CL_VPORT_XSCALE(0, scales[0]), - A6XX_GRAS_CL_VPORT_YOFFSET(0, offsets[1]), - A6XX_GRAS_CL_VPORT_YSCALE(0, scales[1]), - A6XX_GRAS_CL_VPORT_ZOFFSET(0, offsets[2]), - A6XX_GRAS_CL_VPORT_ZSCALE(0, scales[2])); - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(0), 2); - tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_X(min.x) | + guardband.width = + MIN2(guardband.width, fd_calc_guardband(offsets[0], scales[0], false)); + guardband.height = + MIN2(guardband.height, fd_calc_guardband(offsets[1], scales[1], false)); + } + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(0), num_viewport * 2); + for (uint32_t i = 0; i < num_viewport; i++) { + const VkViewport *viewport = &viewports[i]; + VkOffset2D min; + VkOffset2D max; + min.x = (int32_t) viewport->x; + max.x = (int32_t) ceilf(viewport->x + viewport->width); + if (viewport->height >= 0.0f) { + min.y = (int32_t) viewport->y; + max.y = (int32_t) ceilf(viewport->y + viewport->height); + } else { + min.y = (int32_t)(viewport->y + viewport->height); + max.y = (int32_t) ceilf(viewport->y); + } + /* the spec allows viewport->height to be 0.0f */ + if (min.y == max.y) + max.y++; + /* allow viewport->width = 0.0f for un-initialized viewports: */ + if (min.x == max.x) + max.x++; + assert(min.x >= 0 && min.x < max.x); + assert(min.y >= 0 && min.y < max.y); + tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_X(min.x) | A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_Y(min.y)); - tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_X(max.x - 1) | + tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_X(max.x - 1) | A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_Y(max.y - 1)); + } + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_Z_CLAMP(0), num_viewport * 2); + for (uint32_t i = 0; i < num_viewport; i++) { + const VkViewport *viewport = &viewports[i]; + tu_cs_emit(cs, fui(MIN2(viewport->minDepth, viewport->maxDepth))); + tu_cs_emit(cs, fui(MAX2(viewport->minDepth, viewport->maxDepth))); + } tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ, 1); - tu_cs_emit(cs, - A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(guardband_adj.width) | - A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(guardband_adj.height)); + tu_cs_emit(cs, A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(guardband.width) | + A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(guardband.height)); - float z_clamp_min = MIN2(viewport->minDepth, viewport->maxDepth); - float z_clamp_max = MAX2(viewport->minDepth, viewport->maxDepth); - - tu_cs_emit_regs(cs, - A6XX_GRAS_CL_Z_CLAMP_MIN(0, z_clamp_min), - A6XX_GRAS_CL_Z_CLAMP_MAX(0, z_clamp_max)); + /* TODO: what to do about this and multi viewport ? */ + float z_clamp_min = num_viewport ? MIN2(viewports[0].minDepth, viewports[0].maxDepth) : 0; + float z_clamp_max = num_viewport ? MAX2(viewports[0].minDepth, viewports[0].maxDepth) : 0; tu_cs_emit_regs(cs, A6XX_RB_Z_CLAMP_MIN(z_clamp_min), @@ -1611,32 +1630,35 @@ tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport) } void -tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor) +tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissors, uint32_t scissor_count) { - VkOffset2D min = scissor->offset; - VkOffset2D max = { - scissor->offset.x + scissor->extent.width, - scissor->offset.y + scissor->extent.height, - }; + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0), scissor_count * 2); - /* special case for empty scissor with max == 0 to avoid overflow */ - if (max.x == 0) - min.x = max.x = 1; - if (max.y == 0) - min.y = max.y = 1; + for (uint32_t i = 0; i < scissor_count; i++) { + const VkRect2D *scissor = &scissors[i]; - /* avoid overflow with large scissor - * note the max will be limited to min - 1, so that empty scissor works - */ - uint32_t scissor_max = BITFIELD_MASK(15); - min.x = MIN2(scissor_max, min.x); - min.y = MIN2(scissor_max, min.y); - max.x = MIN2(scissor_max, max.x); - max.y = MIN2(scissor_max, max.y); + uint32_t min_x = scissor->offset.x; + uint32_t min_y = scissor->offset.y; + uint32_t max_x = min_x + scissor->extent.width - 1; + uint32_t max_y = min_y + scissor->extent.height - 1; - tu_cs_emit_regs(cs, - A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0, .x = min.x, .y = min.y), - A6XX_GRAS_SC_SCREEN_SCISSOR_BR(0, .x = max.x - 1, .y = max.y - 1)); + if (!scissor->extent.width || !scissor->extent.height) { + min_x = min_y = 1; + max_x = max_y = 0; + } else { + /* avoid overflow */ + uint32_t scissor_max = BITFIELD_MASK(15); + min_x = MIN2(scissor_max, min_x); + min_y = MIN2(scissor_max, min_y); + max_x = MIN2(scissor_max, max_x); + max_y = MIN2(scissor_max, max_y); + } + + tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_X(min_x) | + A6XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(min_y)); + tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_BR_X(max_x) | + A6XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(max_y)); + } } void @@ -2034,9 +2056,16 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, builder->shaders[stage] = shader; } - struct tu_shader *gs = builder->shaders[MESA_SHADER_GEOMETRY]; - key.layer_zero = - !gs || !(gs->ir3_shader->nir->info.outputs_written & VARYING_SLOT_LAYER); + struct tu_shader *last_shader = builder->shaders[MESA_SHADER_GEOMETRY]; + if (!last_shader) + last_shader = builder->shaders[MESA_SHADER_TESS_EVAL]; + if (!last_shader) + last_shader = builder->shaders[MESA_SHADER_VERTEX]; + + uint64_t outputs_written = last_shader->ir3_shader->nir->info.outputs_written; + + key.layer_zero = !(outputs_written & VARYING_BIT_LAYER); + key.view_zero = !(outputs_written & VARYING_BIT_VIEWPORT); pipeline->tess.patch_type = key.tessellation; @@ -2254,11 +2283,11 @@ tu_pipeline_builder_parse_viewport(struct tu_pipeline_builder *builder, struct tu_cs cs; - if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_VIEWPORT, 18)) - tu6_emit_viewport(&cs, vp_info->pViewports); + if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_VIEWPORT, 8 + 10 * vp_info->viewportCount)) + tu6_emit_viewport(&cs, vp_info->pViewports, vp_info->viewportCount); - if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_SCISSOR, 3)) - tu6_emit_scissor(&cs, vp_info->pScissors); + if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_SCISSOR, 1 + 2 * vp_info->scissorCount)) + tu6_emit_scissor(&cs, vp_info->pScissors, vp_info->scissorCount); } static void diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 2902050..811ade7 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -85,7 +85,7 @@ typedef uint32_t xcb_window_t; #define MAX_VERTEX_ATTRIBS 32 #define MAX_RTS 8 #define MAX_VSC_PIPES 32 -#define MAX_VIEWPORTS 1 +#define MAX_VIEWPORTS 16 #define MAX_SCISSORS 16 #define MAX_DISCARD_RECTANGLES 4 #define MAX_PUSH_CONSTANTS_SIZE 128 @@ -859,11 +859,17 @@ struct tu_cmd_state struct tu_pipeline *pipeline; struct tu_pipeline *compute_pipeline; - /* Vertex buffers */ + /* Vertex buffers, viewports, and scissors + * the states for these can be updated partially, so we need to save these + * to be able to emit a complete draw state + */ struct { uint64_t base; uint32_t size; } vb[MAX_VBS]; + VkViewport viewport[MAX_VIEWPORTS]; + VkRect2D scissor[MAX_SCISSORS]; + uint32_t max_viewport, max_scissor; /* for dynamic states that can't be emitted directly */ uint32_t dynamic_stencil_mask; @@ -1115,10 +1121,10 @@ struct tu_pipeline }; void -tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport); +tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport, uint32_t num_viewport); void -tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor); +tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scs, uint32_t scissor_count); void tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc); diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c index 188bd72..4348ea1 100644 --- a/src/freedreno/vulkan/tu_shader.c +++ b/src/freedreno/vulkan/tu_shader.c @@ -68,6 +68,7 @@ tu_spirv_to_nir(struct ir3_compiler *compiler, .variable_pointers = true, .stencil_export = true, .multiview = true, + .shader_viewport_index_layer = true, }, }; const nir_shader_compiler_options *nir_options = -- 2.7.4