From: Matt Coster Date: Mon, 27 Feb 2023 12:33:31 +0000 (+0000) Subject: pvr: Implement ZLS subtile alignment X-Git-Tag: upstream/23.3.3~7665 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d1b17a5edcd493630738681b8f104123eb92710b;p=platform%2Fupstream%2Fmesa.git pvr: Implement ZLS subtile alignment This is a workaround for the edge case where a depth buffer is smaller than a single tile size. Signed-off-by: Matt Coster Reviewed-by: Karmjit Mahil Part-of: --- diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index 1075d49..f1902aa 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -1087,33 +1087,180 @@ pvr_pass_get_pixel_output_width(const struct pvr_render_pass *pass, * to the nearest power-of-two (which will be tile-aligned). If the attachment * is not twiddled, we don't need to worry about tile-alignment at all. */ -static void -pvr_sub_cmd_gfx_align_zls_subtiles(const struct pvr_device_info *dev_info, - const struct pvr_render_job *job, - const struct pvr_image *image) +static bool pvr_sub_cmd_gfx_requires_ds_subtile_alignment( + const struct pvr_device_info *dev_info, + const struct pvr_render_job *job) { + const struct pvr_image *const ds_image = + pvr_image_view_get_image(job->ds.iview); uint32_t zls_tile_size_x; uint32_t zls_tile_size_y; rogue_get_zls_tile_size_xy(dev_info, &zls_tile_size_x, &zls_tile_size_y); - if (image->physical_extent.width >= zls_tile_size_x && - image->physical_extent.height >= zls_tile_size_y) { - return; + if (ds_image->physical_extent.width >= zls_tile_size_x && + ds_image->physical_extent.height >= zls_tile_size_y) { + return false; } + /* If we have the zls_subtile feature, we can skip the alignment iff: + * - The attachment is not multisampled, and + * - The depth and stencil attachments are the same. + */ if (PVR_HAS_FEATURE(dev_info, zls_subtile) && - image->vk.samples == VK_SAMPLE_COUNT_1_BIT && - (job->has_stencil_attachment || !job->has_depth_attachment)) { - return; + ds_image->vk.samples == VK_SAMPLE_COUNT_1_BIT && + job->has_stencil_attachment == job->has_depth_attachment) { + return false; + } + + /* No ZLS functions enabled; nothing to do. */ + if ((!job->has_depth_attachment && !job->has_stencil_attachment) || + (!job->ds.load && !job->ds.store)) { + return false; + } + + return true; +} + +static VkResult +pvr_sub_cmd_gfx_align_ds_subtiles(struct pvr_cmd_buffer *const cmd_buffer, + struct pvr_sub_cmd_gfx *const gfx_sub_cmd) +{ + struct pvr_sub_cmd *const prev_sub_cmd = + container_of(gfx_sub_cmd, struct pvr_sub_cmd, gfx); + struct pvr_ds_attachment *const ds = &gfx_sub_cmd->job.ds; + const struct pvr_image *const ds_image = pvr_image_view_get_image(ds->iview); + const VkFormat copy_format = pvr_get_raw_copy_format(ds_image->vk.format); + + struct pvr_suballoc_bo *buffer; + uint32_t buffer_layer_size; + VkBufferImageCopy2 region; + VkExtent2D zls_tile_size; + VkExtent2D rounded_size; + uint32_t buffer_size; + VkExtent2D scale; + VkResult result; + + /* The operations below assume the last command in the buffer was the target + * gfx subcommand. Assert that this is the case. + */ + assert(list_last_entry(&cmd_buffer->sub_cmds, struct pvr_sub_cmd, link) == + prev_sub_cmd); + + if (!ds->load && !ds->store) + return VK_SUCCESS; + + rogue_get_zls_tile_size_xy(&cmd_buffer->device->pdevice->dev_info, + &zls_tile_size.width, + &zls_tile_size.height); + rogue_get_isp_scale_xy_from_samples(ds_image->vk.samples, + &scale.width, + &scale.height); + + rounded_size = (VkExtent2D){ + .width = ALIGN_POT(ds_image->physical_extent.width, zls_tile_size.width), + .height = + ALIGN_POT(ds_image->physical_extent.height, zls_tile_size.height), + }; + + buffer_layer_size = vk_format_get_blocksize(ds_image->vk.format) * + rounded_size.width * rounded_size.height * scale.width * + scale.height; + + if (ds->iview->vk.layer_count > 1) + buffer_layer_size = ALIGN_POT(buffer_layer_size, ds_image->alignment); + + buffer_size = buffer_layer_size * ds->iview->vk.layer_count; + + result = pvr_cmd_buffer_alloc_mem(cmd_buffer, + cmd_buffer->device->heaps.general_heap, + buffer_size, + 0, + &buffer); + if (result != VK_SUCCESS) + return result; + + region = (VkBufferImageCopy2){ + .sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2, + .pNext = NULL, + .bufferOffset = 0, + .bufferRowLength = rounded_size.width, + .bufferImageHeight = 0, + .imageSubresource = { + .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, + .mipLevel = ds->iview->vk.base_mip_level, + .baseArrayLayer = ds->iview->vk.base_array_layer, + .layerCount = ds->iview->vk.layer_count, + }, + .imageOffset = { 0 }, + .imageExtent = { + .width = ds->iview->vk.extent.width, + .height = ds->iview->vk.extent.height, + .depth = 1, + }, + }; + + if (ds->load) { + result = + pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_TRANSFER); + if (result != VK_SUCCESS) + return result; + + result = pvr_copy_image_to_buffer_region_format(cmd_buffer, + ds_image, + buffer->dev_addr, + ®ion, + copy_format, + copy_format); + if (result != VK_SUCCESS) + return result; + + cmd_buffer->state.current_sub_cmd->transfer.serialize_with_frag = true; + + result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer); + if (result != VK_SUCCESS) + return result; + + /* Now we have to fiddle with cmd_buffer to place this transfer command + * *before* the target gfx subcommand. + */ + list_move_to(&cmd_buffer->state.current_sub_cmd->link, + &prev_sub_cmd->link); + } + + if (ds->store) { + result = + pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_TRANSFER); + if (result != VK_SUCCESS) + return result; + + result = pvr_copy_buffer_to_image_region_format(cmd_buffer, + buffer->dev_addr, + ds_image, + ®ion, + copy_format, + copy_format, + 0); + if (result != VK_SUCCESS) + return result; + + cmd_buffer->state.current_sub_cmd->transfer.serialize_with_frag = true; + + result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer); + if (result != VK_SUCCESS) + return result; } - pvr_finishme("Unaligned ZLS subtile"); - mesa_logd("Image: %ux%u ZLS tile: %ux%u\n", - image->physical_extent.width, - image->physical_extent.height, - zls_tile_size_x, - zls_tile_size_y); + /* Finally, patch up the target graphics sub_cmd to use the correctly-strided + * buffer. + */ + ds->has_alignment_transfers = true; + ds->addr = buffer->dev_addr; + ds->physical_extent = rounded_size; + + gfx_sub_cmd->wait_on_previous_transfer = true; + + return VK_SUCCESS; } struct pvr_emit_state { @@ -1320,15 +1467,18 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, bool d_store = false, s_store = false; bool d_load = false, s_load = false; + job->ds.iview = ds_iview; job->ds.addr = ds_image->dev_addr; job->ds.stride = pvr_stride_from_pitch(level_pitch, ds_iview->vk.format); job->ds.height = ds_iview->vk.extent.height; - job->ds.physical_width = u_minify(ds_image->physical_extent.width, - ds_iview->vk.base_mip_level); - job->ds.physical_height = u_minify(ds_image->physical_extent.height, - ds_iview->vk.base_mip_level); + job->ds.physical_extent = (VkExtent2D){ + .width = u_minify(ds_image->physical_extent.width, + ds_iview->vk.base_mip_level), + .height = u_minify(ds_image->physical_extent.height, + ds_iview->vk.base_mip_level), + }; job->ds.layer_size = ds_image->layer_size; job->ds_clear_value = default_ds_clear_value; @@ -1409,9 +1559,13 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, if (job->ds.load || job->ds.store || store_was_optimised_out) job->process_empty_tiles = true; - } - pvr_sub_cmd_gfx_align_zls_subtiles(dev_info, job, image); + if (pvr_sub_cmd_gfx_requires_ds_subtile_alignment(dev_info, job)) { + result = pvr_sub_cmd_gfx_align_ds_subtiles(cmd_buffer, sub_cmd); + if (result != VK_SUCCESS) + return result; + } + } } else { job->has_depth_attachment = false; job->has_stencil_attachment = false; diff --git a/src/imagination/vulkan/pvr_job_render.c b/src/imagination/vulkan/pvr_job_render.c index d9c54b6..f57d55e 100644 --- a/src/imagination/vulkan/pvr_job_render.c +++ b/src/imagination/vulkan/pvr_job_render.c @@ -1340,21 +1340,29 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_ZLSCTL, value) { if (job->has_depth_attachment) { - uint32_t aligned_width = - ALIGN_POT(job->ds.physical_width, ROGUE_IPF_TILE_SIZE_PIXELS); - uint32_t aligned_height = - ALIGN_POT(job->ds.physical_height, ROGUE_IPF_TILE_SIZE_PIXELS); - - rogue_get_isp_num_tiles_xy(dev_info, - job->samples, - aligned_width, - aligned_height, - &value.zlsextent_x_z, - &value.zlsextent_y_z); + uint32_t alignment_x; + uint32_t alignment_y; + + if (job->ds.has_alignment_transfers) { + rogue_get_zls_tile_size_xy(dev_info, &alignment_x, &alignment_y); + } else { + alignment_x = ROGUE_IPF_TILE_SIZE_PIXELS; + alignment_y = ROGUE_IPF_TILE_SIZE_PIXELS; + } + + rogue_get_isp_num_tiles_xy( + dev_info, + job->samples, + ALIGN_POT(job->ds.physical_extent.width, alignment_x), + ALIGN_POT(job->ds.physical_extent.height, alignment_y), + &value.zlsextent_x_z, + &value.zlsextent_y_z); + value.zlsextent_x_z -= 1; value.zlsextent_y_z -= 1; - if (job->ds.memlayout == PVR_MEMLAYOUT_TWIDDLED) { + if (job->ds.memlayout == PVR_MEMLAYOUT_TWIDDLED && + !job->ds.has_alignment_transfers) { value.loadtwiddled = true; value.storetwiddled = true; } @@ -1380,10 +1388,10 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, } value.zloaden = job->ds.load; - value.forcezload = job->ds.load; + value.forcezload = value.zloaden; value.zstoreen = job->ds.store; - value.forcezstore = job->ds.store; + value.forcezstore = value.zstoreen; zload_format = value.zloadformat; } else { @@ -1392,10 +1400,10 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, if (job->has_stencil_attachment) { value.sstoreen = job->ds.store; - value.forcezstore = job->ds.store; + value.forcezstore = value.sstoreen; value.sloaden = job->ds.load; - value.forcezload = job->ds.load; + value.forcezload = value.sloaden; } } stream_ptr += pvr_cmd_length(CR_ISP_ZLSCTL); @@ -1570,8 +1578,13 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, if (PVR_HAS_FEATURE(dev_info, zls_subtile)) { pvr_csb_pack (stream_ptr, CR_ISP_ZLS_PIXELS, value) { if (job->has_depth_attachment) { - value.x = job->ds.stride - 1; - value.y = job->ds.height - 1; + if (job->ds.has_alignment_transfers) { + value.x = job->ds.physical_extent.width - 1; + value.y = job->ds.physical_extent.height - 1; + } else { + value.x = job->ds.stride - 1; + value.y = job->ds.height - 1; + } } } stream_ptr += pvr_cmd_length(CR_ISP_ZLS_PIXELS); diff --git a/src/imagination/vulkan/pvr_job_render.h b/src/imagination/vulkan/pvr_job_render.h index e4208a2..b82deb7 100644 --- a/src/imagination/vulkan/pvr_job_render.h +++ b/src/imagination/vulkan/pvr_job_render.h @@ -78,8 +78,7 @@ struct pvr_render_job { pvr_dev_addr_t addr; uint32_t stride; uint32_t height; - uint32_t physical_width; - uint32_t physical_height; + VkExtent2D physical_extent; uint32_t layer_size; VkFormat vk_format; /* FIXME: This should be of type 'enum pvr_memlayout', but this is defined @@ -91,6 +90,12 @@ struct pvr_render_job { * included by both this header and pvr_private.h. */ uint32_t memlayout; + + /* TODO: Is this really necessary? Maybe we can extract all useful + * information and drop this member. */ + const struct pvr_image_view *iview; + + bool has_alignment_transfers; } ds; VkClearDepthStencilValue ds_clear_value;