From 983af3a6d11d8d1dcd3c23514f92abceb664642f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 29 Nov 2016 23:52:55 +0000 Subject: [PATCH] radv: add a compute shader implementation for buffer to image This implements the reverse of the current buffer->image path and can be used when we need to do image transfer on compute queues This just adds the code turned off as we don't support separate computes queues yet, and we don't want to use this path on the GFX queues for DCC reasons. Reviewed-by: Bas Nieuwenhuizen --- src/amd/vulkan/radv_meta.h | 6 + src/amd/vulkan/radv_meta_bufimage.c | 302 +++++++++++++++++++++++++++++++++++- src/amd/vulkan/radv_meta_copy.c | 23 ++- 3 files changed, 325 insertions(+), 6 deletions(-) diff --git a/src/amd/vulkan/radv_meta.h b/src/amd/vulkan/radv_meta.h index 97d020c..aa0d30c 100644 --- a/src/amd/vulkan/radv_meta.h +++ b/src/amd/vulkan/radv_meta.h @@ -166,6 +166,12 @@ void radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, unsigned num_rects, struct radv_meta_blit2d_rect *rects); +void radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer, + struct radv_meta_blit2d_buffer *src, + struct radv_meta_blit2d_surf *dst, + unsigned num_rects, + struct radv_meta_blit2d_rect *rects); + void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageSubresourceRange *subresourceRange); diff --git a/src/amd/vulkan/radv_meta_bufimage.c b/src/amd/vulkan/radv_meta_bufimage.c index 3107718..efe02cc 100644 --- a/src/amd/vulkan/radv_meta_bufimage.c +++ b/src/amd/vulkan/radv_meta_bufimage.c @@ -225,10 +225,206 @@ radv_device_finish_meta_itob_state(struct radv_device *device) } } +static nir_shader * +build_nir_btoi_compute_shader(struct radv_device *dev) +{ + nir_builder b; + const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, + false, + false, + GLSL_TYPE_FLOAT); + const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, + false, + false, + GLSL_TYPE_FLOAT); + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); + b.shader->info->name = ralloc_strdup(b.shader, "meta_btoi_cs"); + b.shader->info->cs.local_size[0] = 16; + b.shader->info->cs.local_size[1] = 16; + b.shader->info->cs.local_size[2] = 1; + nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, + buf_type, "s_tex"); + input_img->data.descriptor_set = 0; + input_img->data.binding = 0; + + nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, + img_type, "out_img"); + output_img->data.descriptor_set = 0; + output_img->data.binding = 1; + + nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); + nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); + nir_ssa_def *block_size = nir_imm_ivec4(&b, + b.shader->info->cs.local_size[0], + b.shader->info->cs.local_size[1], + b.shader->info->cs.local_size[2], 0); + + nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); + + nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); + offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); + offset->num_components = 2; + nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset"); + nir_builder_instr_insert(&b, &offset->instr); + + nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); + stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8)); + stride->num_components = 1; + nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride"); + nir_builder_instr_insert(&b, &stride->instr); + + nir_ssa_def *pos_x = nir_channel(&b, global_id, 0); + nir_ssa_def *pos_y = nir_channel(&b, global_id, 1); + + nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa); + tmp = nir_iadd(&b, tmp, pos_x); + + nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp); + + nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa); + + nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2); + tex->sampler_dim = GLSL_SAMPLER_DIM_BUF; + tex->op = nir_texop_txf; + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(buf_coord); + tex->src[1].src_type = nir_tex_src_lod; + tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); + tex->dest_type = nir_type_float; + tex->is_array = false; + tex->coord_components = 1; + tex->texture = nir_deref_var_create(tex, input_img); + tex->sampler = NULL; + + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + + nir_ssa_def *outval = &tex->dest.ssa; + nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store); + store->src[0] = nir_src_for_ssa(img_coord); + store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32)); + store->src[2] = nir_src_for_ssa(outval); + store->variables[0] = nir_deref_var_create(store, output_img); + + nir_builder_instr_insert(&b, &store->instr); + return b.shader; +} + +/* Buffer to image - don't write use image accessors */ +static VkResult +radv_device_init_meta_btoi_state(struct radv_device *device) +{ + VkResult result; + struct radv_shader_module cs = { .nir = NULL }; + + zero(device->meta_state.btoi); + + cs.nir = build_nir_btoi_compute_shader(device); + + /* + * two descriptors one for the image being sampled + * one for the buffer being written. + */ + VkDescriptorSetLayoutCreateInfo ds_create_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 2, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL + }, + } + }; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), + &ds_create_info, + &device->meta_state.alloc, + &device->meta_state.btoi.img_ds_layout); + if (result != VK_SUCCESS) + goto fail; + + + VkPipelineLayoutCreateInfo pl_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.btoi.img_ds_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 12}, + }; + + result = radv_CreatePipelineLayout(radv_device_to_handle(device), + &pl_create_info, + &device->meta_state.alloc, + &device->meta_state.btoi.img_p_layout); + if (result != VK_SUCCESS) + goto fail; + + /* compute shader */ + + VkPipelineShaderStageCreateInfo pipeline_shader_stage = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = radv_shader_module_to_handle(&cs), + .pName = "main", + .pSpecializationInfo = NULL, + }; + + VkComputePipelineCreateInfo vk_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = pipeline_shader_stage, + .flags = 0, + .layout = device->meta_state.btoi.img_p_layout, + }; + + result = radv_CreateComputePipelines(radv_device_to_handle(device), + radv_pipeline_cache_to_handle(&device->meta_state.cache), + 1, &vk_pipeline_info, NULL, + &device->meta_state.btoi.pipeline); + if (result != VK_SUCCESS) + goto fail; + + ralloc_free(cs.nir); + return VK_SUCCESS; +fail: + ralloc_free(cs.nir); + return result; +} + +static void +radv_device_finish_meta_btoi_state(struct radv_device *device) +{ + if (device->meta_state.btoi.img_p_layout) { + radv_DestroyPipelineLayout(radv_device_to_handle(device), + device->meta_state.btoi.img_p_layout, + &device->meta_state.alloc); + } + if (device->meta_state.btoi.img_ds_layout) { + radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), + device->meta_state.btoi.img_ds_layout, + &device->meta_state.alloc); + } + if (device->meta_state.btoi.pipeline) { + radv_DestroyPipeline(radv_device_to_handle(device), + device->meta_state.btoi.pipeline, + &device->meta_state.alloc); + } +} + void radv_device_finish_meta_bufimage_state(struct radv_device *device) { radv_device_finish_meta_itob_state(device); + radv_device_finish_meta_btoi_state(device); } VkResult @@ -239,6 +435,12 @@ radv_device_init_meta_bufimage_state(struct radv_device *device) result = radv_device_init_meta_itob_state(device); if (result != VK_SUCCESS) return result; + + result = radv_device_init_meta_btoi_state(device); + if (result != VK_SUCCESS) { + radv_device_finish_meta_itob_state(device); + return result; + } return VK_SUCCESS; } @@ -351,7 +553,7 @@ itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, } static void -bind_pipeline(struct radv_cmd_buffer *cmd_buffer) +itob_bind_pipeline(struct radv_cmd_buffer *cmd_buffer) { VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline; @@ -376,7 +578,7 @@ radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &temps.dst_bview); itob_bind_descriptors(cmd_buffer, &temps); - bind_pipeline(cmd_buffer); + itob_bind_pipeline(cmd_buffer); for (unsigned r = 0; r < num_rects; ++r) { unsigned push_constants[3] = { @@ -393,3 +595,99 @@ radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, } radv_temp_descriptor_set_destroy(cmd_buffer->device, temps.set); } + +struct btoi_temps { + struct radv_buffer_view src_bview; + struct radv_image_view dst_iview; + VkDescriptorSet set; +}; + +static void +btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, + struct btoi_temps *tmp) +{ + struct radv_device *device = cmd_buffer->device; + VkDevice vk_device = radv_device_to_handle(cmd_buffer->device); + + radv_temp_descriptor_set_create(device, cmd_buffer, + device->meta_state.btoi.img_ds_layout, + &tmp->set); + + radv_UpdateDescriptorSets(vk_device, + 2, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = tmp->set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(&tmp->src_bview) }, + }, + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = tmp->set, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = NULL, + .imageView = radv_image_view_to_handle(&tmp->dst_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + } + }, 0, NULL); + + radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_COMPUTE, + device->meta_state.btoi.img_p_layout, 0, 1, + &tmp->set, 0, NULL); +} + +static void +btoi_bind_pipeline(struct radv_cmd_buffer *cmd_buffer) +{ + VkPipeline pipeline = + cmd_buffer->device->meta_state.btoi.pipeline; + + if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) { + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + } +} + +void +radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer, + struct radv_meta_blit2d_buffer *src, + struct radv_meta_blit2d_surf *dst, + unsigned num_rects, + struct radv_meta_blit2d_rect *rects) +{ + struct radv_device *device = cmd_buffer->device; + struct btoi_temps temps; + + create_bview(cmd_buffer, src->buffer, src->offset, src->format, &temps.src_bview); + create_iview(cmd_buffer, dst, VK_IMAGE_USAGE_STORAGE_BIT, &temps.dst_iview); + btoi_bind_descriptors(cmd_buffer, &temps); + + btoi_bind_pipeline(cmd_buffer); + + for (unsigned r = 0; r < num_rects; ++r) { + unsigned push_constants[3] = { + rects[r].dst_x, + rects[r].dst_y, + src->pitch + }; + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), + device->meta_state.btoi.img_p_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, 12, + push_constants); + + radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); + } + radv_temp_descriptor_set_destroy(cmd_buffer->device, temps.set); +} diff --git a/src/amd/vulkan/radv_meta_copy.c b/src/amd/vulkan/radv_meta_copy.c index d81fc48..2073a2c 100644 --- a/src/amd/vulkan/radv_meta_copy.c +++ b/src/amd/vulkan/radv_meta_copy.c @@ -100,6 +100,11 @@ blit_surf_for_image_level_layer(struct radv_image *image, }; } +union meta_saved_state { + struct radv_meta_saved_state gfx; + struct radv_meta_saved_compute_state compute; +}; + static void meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer* buffer, @@ -107,14 +112,18 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, uint32_t regionCount, const VkBufferImageCopy* pRegions) { - struct radv_meta_saved_state saved_state; + bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE; + union meta_saved_state saved_state; /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to * VK_SAMPLE_COUNT_1_BIT." */ assert(image->samples == 1); - radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer); + if (cs) + radv_meta_begin_bufimage(cmd_buffer, &saved_state.compute); + else + radv_meta_save_graphics_reset_vport_scissor(&saved_state.gfx, cmd_buffer); for (unsigned r = 0; r < regionCount; r++) { @@ -172,7 +181,10 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, /* Perform Blit */ - radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect); + if (cs) + radv_meta_buffer_to_image_cs(cmd_buffer, &buf_bsurf, &img_bsurf, 1, &rect); + else + radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect); /* Once we've done the blit, all of the actual information about * the image is embedded in the command buffer so we can just @@ -188,7 +200,10 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, slice_array++; } } - radv_meta_restore(&saved_state, cmd_buffer); + if (cs) + radv_meta_end_bufimage(cmd_buffer, &saved_state.compute); + else + radv_meta_restore(&saved_state.gfx, cmd_buffer); } void radv_CmdCopyBufferToImage( -- 2.7.4