From: Daniel Schürmann Date: Fri, 18 Sep 2020 16:34:37 +0000 (+0100) Subject: aco: use v_cvt_pkrtz_f16_f32 for pack_half_2x16 X-Git-Tag: upstream/21.0.0~3726 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=dae1e6f7568dcf6eb536098931478f6b5b4af4b2;p=platform%2Fupstream%2Fmesa.git aco: use v_cvt_pkrtz_f16_f32 for pack_half_2x16 Apparently, we forgot to remove some debug code. This patch also fixes the round mode check to consider the destination bit width. Totals from 2218 (1.62% of 136546) affected shaders (RAVEN): SGPRs: 100848 -> 100280 (-0.56%) VGPRs: 68536 -> 66044 (-3.64%); split: -3.68%, +0.05% CodeSize: 4882296 -> 4837220 (-0.92%); split: -0.94%, +0.01% MaxWaves: 18990 -> 19019 (+0.15%); split: +0.19%, -0.04% Instrs: 938150 -> 930388 (-0.83%); split: -0.83%, +0.00% Cycles: 8699824 -> 8667648 (-0.37%); split: -0.38%, +0.01% VMEM: 1144502 -> 1059680 (-7.41%); split: +0.06%, -7.48% SMEM: 170076 -> 167999 (-1.22%); split: +0.22%, -1.44% VClause: 18428 -> 18422 (-0.03%) SClause: 41375 -> 41353 (-0.05%); split: -0.06%, +0.00% Copies: 60008 -> 60054 (+0.08%); split: -0.31%, +0.39% PreVGPRs: 56163 -> 56142 (-0.04%) Reviewed-by: Rhys Perry Part-of: --- diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 87ff6e4..f26947a 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2615,7 +2615,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) Temp src0 = bld.tmp(v1); Temp src1 = bld.tmp(v1); bld.pseudo(aco_opcode::p_split_vector, Definition(src0), Definition(src1), src); - if (0 && (!ctx->block->fp_mode.care_about_round32 || ctx->block->fp_mode.round32 == fp_round_tz)) { + if (!ctx->block->fp_mode.care_about_round16_64 || ctx->block->fp_mode.round16_64 == fp_round_tz) { bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32, Definition(dst), src0, src1); } else { src0 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), src0);