From 364ad2815b88fbf49141bf9fe871ea23c1020aba Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 14 Apr 2023 17:49:46 +0100 Subject: [PATCH] aco: implement texture samples with strict WQM coordinates Signed-off-by: Rhys Perry Reviewed-by: Georg Lehmann Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 37 ++++++++++++++++++++++++-- src/amd/compiler/aco_instruction_selection.h | 1 + 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 1a9a296..fb6abaa 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -5933,7 +5933,14 @@ emit_mimg(Builder& bld, aco_opcode op, Temp dst, Temp rsrc, Operand samp, std::v size_t nsa_size = bld.program->dev.max_nsa_vgprs; nsa_size = bld.program->gfx_level >= GFX11 || coords.size() <= nsa_size ? nsa_size : 0; + const bool strict_wqm = coords[0].regClass().is_linear_vgpr(); + if (strict_wqm) + nsa_size = coords.size(); + for (unsigned i = 0; i < std::min(coords.size(), nsa_size); i++) { + if (!coords[i].id()) + continue; + coords[i] = as_vgpr(bld, coords[i]); } @@ -5973,6 +5980,7 @@ emit_mimg(Builder& bld, aco_opcode op, Temp dst, Temp rsrc, Operand samp, std::v mimg->operands[2] = vdata; for (unsigned i = 0; i < coords.size(); i++) mimg->operands[3 + i] = Operand(coords[i]); + mimg->strict_wqm = strict_wqm; MIMG_instruction* res = mimg.get(); bld.insert(std::move(mimg)); @@ -9102,10 +9110,10 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) Builder bld(ctx->program, ctx->block); bool has_bias = false, has_lod = false, level_zero = false, has_compare = false, has_offset = false, has_ddx = false, has_ddy = false, has_derivs = false, - has_sample_index = false, has_clamped_lod = false; + has_sample_index = false, has_clamped_lod = false, has_wqm_coord = false; Temp resource, sampler, bias = Temp(), compare = Temp(), sample_index = Temp(), lod = Temp(), offset = Temp(), ddx = Temp(), ddy = Temp(), clamped_lod = Temp(), - coord = Temp(); + coord = Temp(), wqm_coord = Temp(); std::vector coords; std::vector derivs; nir_const_value* const_offset[4] = {NULL, NULL, NULL, NULL}; @@ -9144,6 +9152,12 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) coord = get_ssa_temp_tex(ctx, instr->src[i].src.ssa, a16); break; } + case nir_tex_src_backend1: { + assert(instr->src[i].src.ssa->bit_size == 32); + wqm_coord = get_ssa_temp(ctx, instr->src[i].src.ssa); + has_wqm_coord = true; + break; + } case nir_tex_src_bias: assert(instr->src[i].src.ssa->bit_size == (a16 ? 16 : 32)); /* Doesn't need get_ssa_temp_tex because we pack it into its own dword anyway. */ @@ -9173,6 +9187,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) } break; case nir_tex_src_offset: + case nir_tex_src_backend2: assert(instr->src[i].src.ssa->bit_size == 32); offset = get_ssa_temp(ctx, instr->src[i].src.ssa); get_const_vec(instr->src[i].src.ssa, const_offset); @@ -9199,6 +9214,12 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) } } + if (has_wqm_coord) { + assert(instr->op == nir_texop_tex || instr->op == nir_texop_txb || instr->op == nir_texop_lod); + assert(wqm_coord.regClass().is_linear_vgpr()); + assert(!a16 && !g16); + } + if (instr->op == nir_texop_tg4 && !has_lod && !instr->is_gather_implicit_lod) level_zero = true; @@ -9467,6 +9488,11 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) /* gather MIMG address components */ std::vector args; + if (has_wqm_coord) { + args.emplace_back(wqm_coord); + if (!(ctx->block->kind & block_kind_top_level)) + ctx->unended_linear_vgprs.push_back(wqm_coord); + } if (has_offset) args.emplace_back(offset); if (has_bias) @@ -10043,6 +10069,13 @@ visit_jump(isel_context* ctx, nir_jump_instr* instr) void visit_block(isel_context* ctx, nir_block* block) { + if (ctx->block->kind & block_kind_top_level) { + Builder bld(ctx->program, ctx->block); + for (Temp tmp : ctx->unended_linear_vgprs) + bld.pseudo(aco_opcode::p_end_linear_vgpr, tmp); + ctx->unended_linear_vgprs.clear(); + } + ctx->block->instructions.reserve(ctx->block->instructions.size() + exec_list_length(&block->instr_list) * 2); nir_foreach_instr (instr, block) { diff --git a/src/amd/compiler/aco_instruction_selection.h b/src/amd/compiler/aco_instruction_selection.h index 771c608..f1b1b04 100644 --- a/src/amd/compiler/aco_instruction_selection.h +++ b/src/amd/compiler/aco_instruction_selection.h @@ -62,6 +62,7 @@ struct isel_context { Block* block; uint32_t first_temp_id; std::unordered_map> allocated_vec; + std::vector unended_linear_vgprs; Stage stage; struct { bool has_branch; -- 2.7.4