From 7ffcaf91876528cc047d76e7d424a21fe395aa8b Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Fri, 25 Feb 2022 18:21:00 +0100 Subject: [PATCH] aco: Implement image_store d16. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Georg Lehmann Reviewed-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 28 +++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 7a91d9e..96f6e32 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -6050,12 +6050,15 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr) const enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr); bool is_array = nir_intrinsic_image_array(instr); Temp data = get_ssa_temp(ctx, instr->src[3].ssa); + bool d16 = instr->src[3].ssa->bit_size == 16; /* only R64_UINT and R64_SINT supported */ if (instr->src[3].ssa->bit_size == 64 && data.bytes() > 8) data = emit_extract_vector(ctx, data, 0, RegClass(data.type(), 2)); data = as_vgpr(ctx, data); + uint32_t num_components = d16 ? instr->src[3].ssa->num_components : data.size(); + memory_sync_info sync = get_memory_sync_info(instr, storage_image, 0); unsigned access = nir_intrinsic_access(instr); bool glc = ctx->options->chip_class == GFX6 || @@ -6067,12 +6070,22 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr) Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); Temp vindex = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[1].ssa), 0, v1); aco_opcode opcode; - switch (data.size()) { - case 1: opcode = aco_opcode::buffer_store_format_x; break; - case 2: opcode = aco_opcode::buffer_store_format_xy; break; - case 3: opcode = aco_opcode::buffer_store_format_xyz; break; - case 4: opcode = aco_opcode::buffer_store_format_xyzw; break; - default: unreachable(">4 channel buffer image store"); + if (!d16) { + switch (num_components) { + case 1: opcode = aco_opcode::buffer_store_format_x; break; + case 2: opcode = aco_opcode::buffer_store_format_xy; break; + case 3: opcode = aco_opcode::buffer_store_format_xyz; break; + case 4: opcode = aco_opcode::buffer_store_format_xyzw; break; + default: unreachable(">4 channel buffer image store"); + } + } else { + switch (num_components) { + case 1: opcode = aco_opcode::buffer_store_format_d16_x; break; + case 2: opcode = aco_opcode::buffer_store_format_d16_xy; break; + case 3: opcode = aco_opcode::buffer_store_format_d16_xyz; break; + case 4: opcode = aco_opcode::buffer_store_format_d16_xyzw; break; + default: unreachable(">4 channel buffer image store"); + } } aco_ptr store{ create_instruction(opcode, Format::MUBUF, 4, 0)}; @@ -6097,7 +6110,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr) bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0; aco_opcode opcode = level_zero ? aco_opcode::image_store : aco_opcode::image_store_mip; - uint32_t dmask = BITFIELD_MASK(data.size()); + uint32_t dmask = BITFIELD_MASK(num_components); /* remove zero/undef elements from data, components which aren't in dmask * are zeroed anyway */ @@ -6136,6 +6149,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr) store->glc = glc; store->dlc = false; store->dim = ac_get_image_dim(ctx->options->chip_class, dim, is_array); + store->d16 = d16; store->dmask = dmask; store->unrm = true; store->da = should_declare_array(ctx, dim, is_array); -- 2.7.4