From: Rhys Perry Date: Mon, 5 Apr 2021 13:10:25 +0000 (+0100) Subject: aco: set TRUNC_COORD=0 for nir_texop_tg4 X-Git-Tag: upstream/21.2.3~4546 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3cbe9894f74e064e7e6136a146aa4793c77fee3e;p=platform%2Fupstream%2Fmesa.git aco: set TRUNC_COORD=0 for nir_texop_tg4 Fixes black squares in Assassin's Creed: Valhalla and rendering of FidelityFX-CACAO demo. fossil-db (sienna cichlid): Totals from 3052 (2.09% of 146267) affected shaders: SpillSGPRs: 8437 -> 8646 (+2.48%) CodeSize: 30993832 -> 31116916 (+0.40%); split: -0.00%, +0.40% Instrs: 5869934 -> 5886783 (+0.29%); split: -0.00%, +0.29% Latency: 250330521 -> 250463770 (+0.05%); split: -0.00%, +0.05% InvThroughput: 59797617 -> 59814584 (+0.03%); split: -0.00%, +0.03% VClause: 92114 -> 92132 (+0.02%) SClause: 197373 -> 197338 (-0.02%); split: -0.02%, +0.01% Copies: 479482 -> 482394 (+0.61%); split: -0.01%, +0.61% Branches: 219629 -> 219635 (+0.00%) PreSGPRs: 248970 -> 249366 (+0.16%) fossil-db (polaris10): Totals from 3050 (2.06% of 147787) affected shaders: SGPRs: 282864 -> 282912 (+0.02%); split: -0.01%, +0.02% VGPRs: 242572 -> 242612 (+0.02%) SpillSGPRs: 10387 -> 10675 (+2.77%) CodeSize: 31872460 -> 31996128 (+0.39%) MaxWaves: 10924 -> 10925 (+0.01%) Instrs: 6222217 -> 6239072 (+0.27%) Latency: 317482545 -> 317773685 (+0.09%); split: -0.00%, +0.09% InvThroughput: 156149624 -> 156242072 (+0.06%); split: -0.00%, +0.06% VClause: 92295 -> 92254 (-0.04%); split: -0.05%, +0.01% SClause: 243342 -> 243321 (-0.01%); split: -0.01%, +0.00% Copies: 678902 -> 681700 (+0.41%); split: -0.00%, +0.41% Branches: 219698 -> 219703 (+0.00%) PreSGPRs: 244251 -> 244644 (+0.16%) Signed-off-by: Rhys Perry Reviewed-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Fixes: 58f25098a0d ("radv: Use TRUNC_COORD on samplers") Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3110 Part-of: --- diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 510020a..97d80eb 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -5437,8 +5437,10 @@ Temp get_sampler_desc(isel_context *ctx, nir_deref_instr *deref_instr, constant_index = 0; const uint32_t *samplers = radv_immutable_samplers(layout, binding); + uint32_t dword0_mask = tex_instr->op == nir_texop_tg4 ? + C_008F30_TRUNC_COORD : 0xffffffffu; return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), - Operand(samplers[constant_index * 4 + 0]), + Operand(samplers[constant_index * 4 + 0] & dword0_mask), Operand(samplers[constant_index * 4 + 1]), Operand(samplers[constant_index * 4 + 2]), Operand(samplers[constant_index * 4 + 3])); @@ -5500,6 +5502,23 @@ Temp get_sampler_desc(isel_context *ctx, nir_deref_instr *deref_instr, res = bld.pseudo(aco_opcode::p_create_vector, bld.def(s8), components[0], components[1], components[2], components[3], components[4], components[5], components[6], components[7]); + } else if (desc_type == ACO_DESC_SAMPLER && tex_instr->op == nir_texop_tg4) { + Temp components[4]; + for (unsigned i = 0; i < 4; i++) + components[i] = bld.tmp(s1); + + bld.pseudo(aco_opcode::p_split_vector, + Definition(components[0]), Definition(components[1]), + Definition(components[2]), Definition(components[3]), res); + + /* We want to always use the linear filtering truncation behaviour for + * nir_texop_tg4, even if the sampler uses nearest/point filtering. + */ + components[0] = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), + components[0], Operand((uint32_t)C_008F30_TRUNC_COORD)); + + res = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), + components[0], components[1], components[2], components[3]); } return res;