From 712d35b417dd1beb373c51a9076db18811433d59 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 15 Nov 2022 17:49:53 -0800 Subject: [PATCH] GlobalISel: Fold some idempotent operations This makes the existing fabs_fabs fold redundant, which I thought was using more tablegen matching, but apparently not. I'm not sure how to make match work with multiple opcodes. There are a few more this could handle, but these are the ones that legalization are more likely to introduce. --- llvm/include/llvm/Target/GlobalISel/Combine.td | 25 +++--- .../postlegalizer-combiner-fcanonicalize.mir | 96 ++++++++++++++++++++++ .../GlobalISel/postlegalizer-combiner-freeze.mir | 96 ++++++++++++++++++++++ 3 files changed, 206 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-fcanonicalize.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-freeze.mir diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index dd5d929..a737b1c 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -131,6 +131,18 @@ def copy_prop : GICombineRule< [{ return Helper.matchCombineCopy(*${mi}); }]), (apply [{ Helper.applyCombineCopy(*${mi}); }])>; +// idempotent operations +// Fold (freeze (freeze x)) -> (freeze x). +// Fold (fabs (fabs x)) -> (fabs x). +// Fold (fcanonicalize (fcanonicalize x)) -> (fcanonicalize x). +def idempotent_prop : GICombineRule< + (defs root:$mi), + (match (wip_match_opcode G_FREEZE, G_FABS, G_FCANONICALIZE):$mi, + [{ return MRI.getVRegDef(${mi}->getOperand(1).getReg())->getOpcode() == + ${mi}->getOpcode(); }]), + (apply [{ Helper.replaceSingleDefInstWithOperand(*${mi}, 1); }])>; + + def extending_loads : GICombineRule< (defs root:$root, extending_load_matchdata:$matchinfo), (match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD):$root, @@ -565,15 +577,6 @@ def merge_unmerge : GICombineRule< (apply [{ Helper.replaceSingleDefInstWithReg(*${d}, ${matchinfo}); }]) >; -// Fold (fabs (fabs x)) -> (fabs x). -def fabs_fabs_fold: GICombineRule< - (defs root:$dst, register_matchinfo:$matchinfo), - (match (G_FABS $abs, $src), - (G_FABS $dst, $abs):$mi, - [{ ${matchinfo} = ${abs}.getReg(); }]), - (apply [{ return Helper.replaceSingleDefInstWithReg(*${mi}, ${matchinfo}); }]) ->; - // Fold (fabs (fneg x)) -> (fabs x). def fabs_fneg_fold: GICombineRule < (defs root:$root, build_fn_matchinfo:$matchinfo), @@ -1038,7 +1041,7 @@ def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp, select_to_logical]>; def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd, - mul_by_neg_one]>; + mul_by_neg_one, idempotent_prop]>; def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma, combine_fadd_fpext_fmul_to_fmad_or_fma, combine_fadd_fma_fmul_to_fmad_or_fma, @@ -1055,7 +1058,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines, width_reduction_combines, select_combines, known_bits_simplifications, ext_ext_fold, not_cmp_fold, opt_brcond_by_inverting_cond, - unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc, + unmerge_merge, unmerge_cst, unmerge_dead_to_trunc, unmerge_zext_to_zext, merge_unmerge, trunc_ext_fold, trunc_shl, const_combines, xor_of_and_with_same_reg, ptr_add_with_zero, shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-fcanonicalize.mir new file mode 100644 index 0000000..cd3c005 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-fcanonicalize.mir @@ -0,0 +1,96 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: test_fcanonicalize_fcanonicalize_s32 +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_fcanonicalize_fcanonicalize_s32 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0 + ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE %fcanonicalize0(s32), %ptr(p1) :: (store (s32), addrspace 1) + %src0:_(s32) = COPY $vgpr0 + %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0 + %fcanonicalize1:_(s32) = G_FCANONICALIZE %fcanonicalize0 + %ptr:_(p1) = G_IMPLICIT_DEF + G_STORE %fcanonicalize1, %ptr :: (store (s32), addrspace 1, align 4) +... + +--- +name: test_fcanonicalize_fcanonicalize_fcanonicalize_s32 +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_fcanonicalize_fcanonicalize_fcanonicalize_s32 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0 + ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE %fcanonicalize0(s32), %ptr(p1) :: (store (s32), addrspace 1) + %src0:_(s32) = COPY $vgpr0 + %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0 + %fcanonicalize1:_(s32) = G_FCANONICALIZE %fcanonicalize0 + %fcanonicalize2:_(s32) = G_FCANONICALIZE %fcanonicalize1 + %ptr:_(p1) = G_IMPLICIT_DEF + G_STORE %fcanonicalize2, %ptr :: (store (s32), addrspace 1, align 4) +... + +--- +name: test_fcanonicalize_fcanonicalize_s32_multi_use +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_fcanonicalize_fcanonicalize_s32_multi_use + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0 + ; CHECK-NEXT: G_STORE %fcanonicalize0(s32), %ptr(p1) :: (volatile store (s32), addrspace 1) + ; CHECK-NEXT: G_STORE %fcanonicalize0(s32), %ptr(p1) :: (volatile store (s32), addrspace 1) + ; CHECK-NEXT: G_STORE %fcanonicalize0(s32), %ptr(p1) :: (volatile store (s32), addrspace 1) + %src0:_(s32) = COPY $vgpr0 + %ptr:_(p1) = G_IMPLICIT_DEF + + %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0 + G_STORE %fcanonicalize0, %ptr :: (volatile store (s32), addrspace 1, align 4) + + %fcanonicalize1:_(s32) = G_FCANONICALIZE %fcanonicalize0 + G_STORE %fcanonicalize1, %ptr :: (volatile store (s32), addrspace 1, align 4) + + G_STORE %fcanonicalize1, %ptr :: (volatile store (s32), addrspace 1, align 4) +... + +--- +name: test_fcanonicalize_copy_fcanonicalize_s32 +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_fcanonicalize_copy_fcanonicalize_s32 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0 + ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE %fcanonicalize0(s32), %ptr(p1) :: (store (s32), addrspace 1) + %src0:_(s32) = COPY $vgpr0 + %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0 + %copy:_(s32) = COPY %fcanonicalize0 + %fcanonicalize1:_(s32) = G_FCANONICALIZE %copy + %ptr:_(p1) = G_IMPLICIT_DEF + G_STORE %fcanonicalize1, %ptr :: (store (s32), addrspace 1, align 4) +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-freeze.mir new file mode 100644 index 0000000..9f6d85b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-freeze.mir @@ -0,0 +1,96 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: test_freeze_freeze_s32 +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_freeze_freeze_s32 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %freeze0:_(s32) = G_FREEZE %src0 + ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE %freeze0(s32), %ptr(p1) :: (store (s32), addrspace 1) + %src0:_(s32) = COPY $vgpr0 + %freeze0:_(s32) = G_FREEZE %src0 + %freeze1:_(s32) = G_FREEZE %freeze0 + %ptr:_(p1) = G_IMPLICIT_DEF + G_STORE %freeze1, %ptr :: (store (s32), addrspace 1, align 4) +... + +--- +name: test_freeze_freeze_freeze_s32 +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_freeze_freeze_freeze_s32 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %freeze0:_(s32) = G_FREEZE %src0 + ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE %freeze0(s32), %ptr(p1) :: (store (s32), addrspace 1) + %src0:_(s32) = COPY $vgpr0 + %freeze0:_(s32) = G_FREEZE %src0 + %freeze1:_(s32) = G_FREEZE %freeze0 + %freeze2:_(s32) = G_FREEZE %freeze1 + %ptr:_(p1) = G_IMPLICIT_DEF + G_STORE %freeze2, %ptr :: (store (s32), addrspace 1, align 4) +... + +--- +name: test_freeze_freeze_s32_multi_use +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_freeze_freeze_s32_multi_use + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: %freeze0:_(s32) = G_FREEZE %src0 + ; CHECK-NEXT: G_STORE %freeze0(s32), %ptr(p1) :: (volatile store (s32), addrspace 1) + ; CHECK-NEXT: G_STORE %freeze0(s32), %ptr(p1) :: (volatile store (s32), addrspace 1) + ; CHECK-NEXT: G_STORE %freeze0(s32), %ptr(p1) :: (volatile store (s32), addrspace 1) + %src0:_(s32) = COPY $vgpr0 + %ptr:_(p1) = G_IMPLICIT_DEF + + %freeze0:_(s32) = G_FREEZE %src0 + G_STORE %freeze0, %ptr :: (volatile store (s32), addrspace 1, align 4) + + %freeze1:_(s32) = G_FREEZE %freeze0 + G_STORE %freeze1, %ptr :: (volatile store (s32), addrspace 1, align 4) + + G_STORE %freeze1, %ptr :: (volatile store (s32), addrspace 1, align 4) +... + +--- +name: test_freeze_copy_freeze_s32 +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_freeze_copy_freeze_s32 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %freeze0:_(s32) = G_FREEZE %src0 + ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE %freeze0(s32), %ptr(p1) :: (store (s32), addrspace 1) + %src0:_(s32) = COPY $vgpr0 + %freeze0:_(s32) = G_FREEZE %src0 + %copy:_(s32) = COPY %freeze0 + %freeze1:_(s32) = G_FREEZE %copy + %ptr:_(p1) = G_IMPLICIT_DEF + G_STORE %freeze1, %ptr :: (store (s32), addrspace 1, align 4) +... -- 2.7.4