From bd56ce8ce5045a181b9fc8a7f24251931e343f9c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 12 Jan 2019 18:30:47 -0600 Subject: [PATCH] anv: Implement VK_KHR_shader_atomic_int64 Reviewed-by: Lionel Landwerlin Reviewed-by: Caio Marcelo de Oliveira Filho --- src/intel/compiler/brw_eu_defines.h | 1 + src/intel/compiler/brw_fs.cpp | 12 +++++++++++- src/intel/compiler/brw_fs_nir.cpp | 17 +++++++++++++++-- src/intel/compiler/brw_shader.cpp | 3 +++ src/intel/vulkan/anv_device.c | 8 ++++++++ src/intel/vulkan/anv_extensions.py | 2 ++ src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 12 ++++++++++-- src/intel/vulkan/anv_pipeline.c | 1 + 8 files changed, 51 insertions(+), 5 deletions(-) diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 3ce7cca..da72330 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -423,6 +423,7 @@ enum opcode { SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL, SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL, SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL, + SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL, SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL, SHADER_OPCODE_TYPED_ATOMIC_LOGICAL, diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 0a390e5..9f82946 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -855,6 +855,7 @@ fs_inst::components_read(unsigned i) const return i == 1 ? src[2].ud : 1; case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: assert(src[2].file == IMM); if (i == 1) { /* Data source */ @@ -5298,7 +5299,7 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst) if (devinfo->gen >= 9) { /* On Skylake and above, we have SENDS */ mlen = 2 * (inst->exec_size / 8); - ex_mlen = src_comps * (inst->exec_size / 8); + ex_mlen = src_comps * type_sz(src.type) * inst->exec_size / REG_SIZE; payload = retype(bld.move_to_vgrf(addr, 1), BRW_REGISTER_TYPE_UD); payload2 = retype(bld.move_to_vgrf(src, src_comps), BRW_REGISTER_TYPE_UD); @@ -5350,6 +5351,13 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst) !inst->dst.is_null()); break; + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: + desc = brw_dp_a64_untyped_atomic_desc(devinfo, inst->exec_size, 64, + arg, /* atomic_op */ + !inst->dst.is_null()); + break; + + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, inst->exec_size, arg, /* atomic_op */ @@ -5558,6 +5566,7 @@ fs_visitor::lower_logical_sends() case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: lower_a64_logical_send(ibld, inst); break; @@ -6147,6 +6156,7 @@ get_lowered_simd_width(const struct gen_device_info *devinfo, return devinfo->gen <= 8 ? 8 : MIN2(16, inst->exec_size); case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: return 8; diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index cf044a3..4bf85bf 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4928,6 +4928,13 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld, if (stage == MESA_SHADER_FRAGMENT) brw_wm_prog_data(prog_data)->has_side_effects = true; + /* The BTI untyped atomic messages only support 32-bit atomics. If you + * just look at the big table of messages in the Vol 7 of the SKL PRM, they + * appear to exist. However, if you look at Vol 2a, there are no message + * descriptors provided for Qword atomic ops except for A64 messages. + */ + assert(nir_dest_bit_size(instr->dest) == 32); + fs_reg dest; if (nir_intrinsic_infos[instr->intrinsic].has_dest) dest = get_nir_dest(instr->dest); @@ -5092,8 +5099,14 @@ fs_visitor::nir_emit_global_atomic(const fs_builder &bld, data = tmp; } - bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL, - dest, addr, data, brw_imm_ud(op)); + if (nir_dest_bit_size(instr->dest) == 64) { + bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL, + dest, addr, data, brw_imm_ud(op)); + } else { + assert(nir_dest_bit_size(instr->dest) == 32); + bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL, + dest, addr, data, brw_imm_ud(op)); + } } void diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index f7ed108..1f98bd0 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -308,6 +308,8 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) return "a64_byte_scattered_write_logical"; case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: return "a64_untyped_atomic_logical"; + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: + return "a64_untyped_atomic_int64_logical"; case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: return "a64_untyped_atomic_float_logical"; case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: @@ -1044,6 +1046,7 @@ backend_instruction::has_side_effects() const case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index de56926..d56e359 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1051,6 +1051,14 @@ void anv_GetPhysicalDeviceFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR: { + VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *features = (void *)ext; + features->shaderBufferInt64Atomics = + pdevice->info.gen >= 9 && pdevice->use_softpin; + features->shaderSharedInt64Atomics = VK_FALSE; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: { VkPhysicalDeviceShaderDrawParametersFeatures *features = (void *)ext; features->shaderDrawParameters = true; diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py index 9d398f1..d937e4e 100644 --- a/src/intel/vulkan/anv_extensions.py +++ b/src/intel/vulkan/anv_extensions.py @@ -104,6 +104,8 @@ EXTENSIONS = [ Extension('VK_KHR_relaxed_block_layout', 1, True), Extension('VK_KHR_sampler_mirror_clamp_to_edge', 1, True), Extension('VK_KHR_sampler_ycbcr_conversion', 1, True), + Extension('VK_KHR_shader_atomic_int64', 1, + 'device->info.gen >= 9 && device->use_softpin'), Extension('VK_KHR_shader_draw_parameters', 1, True), Extension('VK_KHR_shader_float16_int8', 1, 'device->info.gen >= 8'), Extension('VK_KHR_storage_buffer_storage_class', 1, True), diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 356a56e..9d25b94 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -253,7 +253,7 @@ build_index_offset_for_deref(nir_deref_instr *deref, } static bool -try_lower_direct_buffer_intrinsic(nir_intrinsic_instr *intrin, +try_lower_direct_buffer_intrinsic(nir_intrinsic_instr *intrin, bool is_atomic, struct apply_pipeline_layout_state *state) { nir_builder *b = &state->builder; @@ -262,6 +262,12 @@ try_lower_direct_buffer_intrinsic(nir_intrinsic_instr *intrin, if (deref->mode != nir_var_mem_ssbo) return false; + /* 64-bit atomics only support A64 messages so we can't lower them to the + * index+offset model. + */ + if (is_atomic && nir_dest_bit_size(intrin->dest) == 64) + return false; + if (!nir_deref_find_descriptor(deref, state)) return false; @@ -286,6 +292,8 @@ lower_direct_buffer_access(nir_function_impl *impl, switch (intrin->intrinsic) { case nir_intrinsic_load_deref: case nir_intrinsic_store_deref: + try_lower_direct_buffer_intrinsic(intrin, false, state); + break; case nir_intrinsic_deref_atomic_add: case nir_intrinsic_deref_atomic_imin: case nir_intrinsic_deref_atomic_umin: @@ -299,7 +307,7 @@ lower_direct_buffer_access(nir_function_impl *impl, case nir_intrinsic_deref_atomic_fmin: case nir_intrinsic_deref_atomic_fmax: case nir_intrinsic_deref_atomic_fcomp_swap: - try_lower_direct_buffer_intrinsic(intrin, state); + try_lower_direct_buffer_intrinsic(intrin, true, state); break; case nir_intrinsic_get_buffer_size: { diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index b0ed218..09abf4e 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -147,6 +147,7 @@ anv_shader_compile_to_nir(struct anv_device *device, .int8 = pdevice->info.gen >= 8, .int16 = pdevice->info.gen >= 8, .int64 = pdevice->info.gen >= 8, + .int64_atomics = pdevice->info.gen >= 9 && pdevice->use_softpin, .min_lod = true, .multiview = true, .physical_storage_buffer_address = pdevice->has_a64_buffer_access, -- 2.7.4