From 3c36de55f5e60dee8f1bc04bd201f6dd762b3423 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 15 Nov 2022 14:07:07 -0800 Subject: [PATCH] GlobalISel: Try to fold G_SEXT_INREG to G_AND with mask Copies the same transform from the DAG. Helps eliminate some codegen diffs to allowed shared checks in a future change. Not sure if apply supports anything better than C++ fragments for the result. It's also not really reasonable that every combine has to set the default insertion point. --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 4 + llvm/include/llvm/Target/GlobalISel/Combine.td | 19 +++- .../prelegalizer-combiner-sext_inreg-to-and.mir | 111 +++++++++++++++++++++ 3 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-sext_inreg-to-and.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 5c54f0e..0f7bd93 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -129,6 +129,10 @@ public: return KB; } + MachineIRBuilder &getBuilder() const { + return Builder; + } + const TargetLowering &getTargetLowering() const; /// \returns true if the combiner is running pre-legalization. diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index a737b1c..f65b1ed 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -169,6 +169,22 @@ def sext_inreg_of_load : GICombineRule< [{ return Helper.matchSextInRegOfLoad(*${root}, ${matchinfo}); }]), (apply [{ Helper.applySextInRegOfLoad(*${root}, ${matchinfo}); }])>; +def sext_inreg_to_zext_inreg : GICombineRule< + (defs root:$dst), + (match + (G_SEXT_INREG $dst, $src, $imm):$root, + [{ + unsigned BitWidth = MRI.getType(${src}.getReg()).getScalarSizeInBits(); + return Helper.getKnownBits()->maskedValueIsZero(${src}.getReg(), + APInt::getOneBitSet(BitWidth, ${imm}.getImm() - 1)); }]), + (apply [{ + Helper.getBuilder().setInstrAndDebugLoc(*${root}); + Helper.getBuilder().buildZExtInReg(${dst}, ${src}, ${imm}.getImm()); + ${root}->eraseFromParent(); + return true; + }]) +>; + def combine_indexed_load_store : GICombineRule< (defs root:$root, indexed_load_store_matchdata:$matchinfo), (match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD, G_STORE):$root, @@ -1030,7 +1046,8 @@ def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p, def known_bits_simplifications : GICombineGroup<[ redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask, - zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits]>; + zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits, + sext_inreg_to_zext_inreg]>; def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend, narrow_binop_feeding_and]>; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-sext_inreg-to-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-sext_inreg-to-and.mir new file mode 100644 index 0000000..c6e7853 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-sext_inreg-to-and.mir @@ -0,0 +1,111 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: sext_inreg_i32_8_and_neg255 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: sext_inreg_i32_8_and_neg255 + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %load:_(s32) = G_LOAD %ptr(p1) :: (volatile load (s32), addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %inreg:_(s32) = G_AND %load, [[C]] + ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32) + %ptr:_(p1) = COPY $vgpr0_vgpr1 + %load:_(s32) = G_LOAD %ptr :: (volatile load (s32), addrspace 1, align 4) + %mask:_(s32) = G_CONSTANT i32 -255 + %and:_(s32) = G_AND %load, %mask + %inreg:_(s32) = G_SEXT_INREG %and, 8 + $vgpr0 = COPY %inreg + +... + +--- +name: sext_inreg_i32_8_and_255 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: sext_inreg_i32_8_and_255 + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %load:_(s32) = G_LOAD %ptr(p1) :: (volatile load (s32), addrspace 1) + ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: %and:_(s32) = G_AND %load, %mask + ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %and, 8 + ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32) + %ptr:_(p1) = COPY $vgpr0_vgpr1 + %load:_(s32) = G_LOAD %ptr :: (volatile load (s32), addrspace 1, align 4) + %mask:_(s32) = G_CONSTANT i32 255 + %and:_(s32) = G_AND %load, %mask + %inreg:_(s32) = G_SEXT_INREG %and, 8 + $vgpr0 = COPY %inreg + +... + +--- +name: sext_inreg_v2i32_8_and_neg255 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: sext_inreg_v2i32_8_and_neg255 + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %load:_(<2 x s32>) = G_LOAD %ptr(p1) :: (volatile load (<2 x s32>), addrspace 1) + ; CHECK-NEXT: %mask_elt:_(s32) = G_CONSTANT i32 -255 + ; CHECK-NEXT: %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_elt(s32), %mask_elt(s32) + ; CHECK-NEXT: %and:_(<2 x s32>) = G_AND %load, %mask + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK-NEXT: %inreg:_(<2 x s32>) = G_AND %and, [[BUILD_VECTOR]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(<2 x s32>) + %ptr:_(p1) = COPY $vgpr0_vgpr1 + %load:_(<2 x s32>) = G_LOAD %ptr :: (volatile load (<2 x s32>), addrspace 1, align 8) + %mask_elt:_(s32) = G_CONSTANT i32 -255 + %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_elt, %mask_elt + %and:_(<2 x s32>) = G_AND %load, %mask + %inreg:_(<2 x s32>) = G_SEXT_INREG %and, 8 + $vgpr0_vgpr1 = COPY %inreg + +... + +--- +name: sext_inreg_v2i32_8_and_255 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: sext_inreg_v2i32_8_and_255 + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %load:_(<2 x s32>) = G_LOAD %ptr(p1) :: (volatile load (<2 x s32>), addrspace 1) + ; CHECK-NEXT: %mask_elt:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_elt(s32), %mask_elt(s32) + ; CHECK-NEXT: %and:_(<2 x s32>) = G_AND %load, %mask + ; CHECK-NEXT: %inreg:_(<2 x s32>) = G_SEXT_INREG %and, 8 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(<2 x s32>) + %ptr:_(p1) = COPY $vgpr0_vgpr1 + %load:_(<2 x s32>) = G_LOAD %ptr :: (volatile load (<2 x s32>), addrspace 1, align 8) + %mask_elt:_(s32) = G_CONSTANT i32 255 + %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_elt, %mask_elt + %and:_(<2 x s32>) = G_AND %load, %mask + %inreg:_(<2 x s32>) = G_SEXT_INREG %and, 8 + $vgpr0_vgpr1 = COPY %inreg + +... -- 2.7.4