From e56d8b0b2e0022f72875f8850476b3ec0a0df742 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 15 Dec 2021 11:18:03 +0100 Subject: [PATCH] aco: use explicit zero-padding for 64bit image loads in expand_vector() MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Previously, this only worked because of regClass mismatches in the allocated vector. Reviewed-by: Timur Kristóf Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 90f72e7..b04e10d 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -403,14 +403,15 @@ emit_split_vector(isel_context* ctx, Temp vec_src, unsigned num_components) /* This vector expansion uses a mask to determine which elements in the new vector * come from the original vector. The other elements are undefined. */ void -expand_vector(isel_context* ctx, Temp vec_src, Temp dst, unsigned num_components, unsigned mask) +expand_vector(isel_context* ctx, Temp vec_src, Temp dst, unsigned num_components, unsigned mask, + bool zero_padding = false) { assert(vec_src.type() == RegType::vgpr); Builder bld(ctx->program, ctx->block); if (dst.type() == RegType::sgpr && num_components > dst.size()) { Temp tmp_dst = bld.tmp(RegClass::get(RegType::vgpr, 2 * num_components)); - expand_vector(ctx, vec_src, tmp_dst, num_components, mask); + expand_vector(ctx, vec_src, tmp_dst, num_components, mask, zero_padding); bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp_dst); ctx->allocated_vec[dst.id()] = ctx->allocated_vec[tmp_dst.id()]; return; @@ -430,24 +431,30 @@ expand_vector(isel_context* ctx, Temp vec_src, Temp dst, unsigned num_components } unsigned component_bytes = dst.bytes() / num_components; - RegClass rc = RegClass::get(RegType::vgpr, component_bytes); - assert(dst.type() == RegType::vgpr || !rc.is_subdword()); + RegClass src_rc = RegClass::get(RegType::vgpr, component_bytes); + RegClass dst_rc = RegClass::get(dst.type(), component_bytes); + assert(dst.type() == RegType::vgpr || !src_rc.is_subdword()); std::array elems; + Temp padding = Temp(0, dst_rc); + if (zero_padding) + padding = bld.copy(bld.def(dst_rc), Operand::zero(component_bytes)); + aco_ptr vec{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)}; vec->definitions[0] = Definition(dst); unsigned k = 0; for (unsigned i = 0; i < num_components; i++) { if (mask & (1 << i)) { - Temp src = emit_extract_vector(ctx, vec_src, k++, rc); + Temp src = emit_extract_vector(ctx, vec_src, k++, src_rc); if (dst.type() == RegType::sgpr) src = bld.as_uniform(src); vec->operands[i] = Operand(src); + elems[i] = src; } else { vec->operands[i] = Operand::zero(component_bytes); + elems[i] = padding; } - elems[i] = vec->operands[i].getTemp(); } ctx->block->instructions.emplace_back(std::move(vec)); ctx->allocated_vec.emplace(dst.id(), elems); @@ -6245,7 +6252,8 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr) Operand::zero()); } - expand_vector(ctx, tmp, dst, instr->dest.ssa.num_components, expand_mask); + expand_vector(ctx, tmp, dst, instr->dest.ssa.num_components, expand_mask, + instr->dest.ssa.bit_size == 64); } void -- 2.7.4