From 2e895f8b0496f9f15359a5d98ef722d3d9753bc4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Sch=C3=BCrmann?= Date: Thu, 7 Oct 2021 20:20:23 +0200 Subject: [PATCH] radv: vectorize nir_op_fabs Totals from 4 (0.00% of 134913) affected shaders: (GFX10.3) CodeSize: 37868 -> 36576 (-3.41%) Instrs: 5332 -> 5169 (-3.06%) Latency: 24452 -> 24174 (-1.14%) InvThroughput: 9784 -> 9462 (-3.29%) VClause: 54 -> 50 (-7.41%) Copies: 520 -> 519 (-0.19%) PreVGPRs: 266 -> 264 (-0.75%) Reviewed-by: Georg Lehmann Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 10 ++++++++++ src/amd/vulkan/radv_pipeline.c | 1 + 2 files changed, 11 insertions(+) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index e0a8a7b..34e82a1 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2522,6 +2522,16 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) break; } case nir_op_fabs: { + if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) { + Temp src = get_alu_src_vop3p(ctx, instr->src[0]); + Instruction* vop3p = + bld.vop3p(aco_opcode::v_pk_max_f16, Definition(dst), src, src, + instr->src[0].swizzle[0] & 1 ? 3 : 0, instr->src[0].swizzle[1] & 1 ? 3 : 0) + .instr; + vop3p->vop3p().neg_lo[1] = true; + vop3p->vop3p().neg_hi[1] = true; + break; + } Temp src = get_alu_src(ctx, instr->src[0]); if (dst.regClass() == v2b) { Instruction* mul = bld.vop2_e64(aco_opcode::v_mul_f16, Definition(dst), diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index ebe72c1..f0ef432 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -4072,6 +4072,7 @@ opt_vectorize_callback(const nir_instr *instr, const void *_) case nir_op_ffma: case nir_op_fdiv: case nir_op_flrp: + case nir_op_fabs: case nir_op_fneg: case nir_op_fsat: case nir_op_fmin: -- 2.7.4