From: Amara Emerson Date: Sun, 27 Jun 2021 06:36:46 +0000 (-0700) Subject: [GlobalISel] Add re-association combine for G_PTR_ADD to allow better addressing... X-Git-Tag: llvmorg-14-init~2330 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0111da2ef80adc516f13d7c7f4125eb9eea6c08f;p=platform%2Fupstream%2Fllvm.git [GlobalISel] Add re-association combine for G_PTR_ADD to allow better addressing mode usage. We're trying to match a few pointer computation patterns here for re-association opportunities. 1) Isolating a constant operand to be on the RHS, e.g.: G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C) 2) Folding two constants in each sub-tree as long as such folding doesn't break a legal addressing mode. G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2) AArch64 code size improvements on CTMark with -Os: Program before after diff pairlocalalign 251048 251044 -0.0% consumer-typeset 421820 421812 -0.0% kc 431348 431320 -0.0% SPASS 413404 413300 -0.0% clamscan 384396 384220 -0.0% tramp3d-v4 370640 370412 -0.1% lencod 432096 431772 -0.1% bullet 479400 478796 -0.1% sqlite3 288504 288072 -0.1% 7zip-benchmark 573796 570768 -0.5% Geomean difference -0.1% Differential Revision: https://reviews.llvm.org/D105069 --- diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 2c8b6b1..6e62ef7 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -511,8 +511,14 @@ public: SmallVectorImpl> &MatchInfo); /// Use a function which takes in a MachineIRBuilder to perform a combine. + /// By default, it erases the instruction \p MI from the function. bool applyBuildFn(MachineInstr &MI, std::function &MatchInfo); + /// Use a function which takes in a MachineIRBuilder to perform a combine. + /// This variant does not erase \p MI after calling the build function. + bool applyBuildFnNoErase(MachineInstr &MI, + std::function &MatchInfo); + bool matchFunnelShiftToRotate(MachineInstr &MI); void applyFunnelShiftToRotate(MachineInstr &MI); bool matchRotateOutOfRange(MachineInstr &MI); @@ -528,6 +534,11 @@ public: bool matchBitfieldExtractFromAnd( MachineInstr &MI, std::function &MatchInfo); + /// Reassociate pointer calculations with G_ADD involved, to allow better + /// addressing mode usage. + bool matchReassocPtrAdd(MachineInstr &MI, + std::function &MatchInfo); + /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); @@ -589,6 +600,11 @@ private: SmallDenseMap &MemOffset2Idx, const SmallVector &RegsToVisit, const unsigned MemSizeInBits); + + /// Examines the G_PTR_ADD instruction \p PtrAdd and determines if performing + /// a re-association of its operands would break an existing legal addressing + /// mode that the address computation currently represents. + bool reassociationCanBreakAddressingModePattern(MachineInstr &PtrAdd); }; } // namespace llvm diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index a01d9b2..678855b 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -645,6 +645,13 @@ def bitfield_extract_from_sext_inreg : GICombineRule< def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg, bitfield_extract_from_and]>; +def reassoc_ptradd : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_PTR_ADD):$root, + [{ return Helper.matchReassocPtrAdd(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>; + +def reassocs : GICombineGroup<[reassoc_ptradd]>; // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, @@ -678,9 +685,10 @@ def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd, mul_by_neg_one]>; def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines, - extract_vec_elt_combines, ptr_add_immed_chain, combines_for_extload, + extract_vec_elt_combines, combines_for_extload, combine_indexed_load_store, undef_combines, identity_combines, phi_combines, simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, + reassocs, ptr_add_immed_chain, shl_ashr_to_sext_inreg, sext_inreg_of_load, width_reduction_combines, select_combines, known_bits_simplifications, ext_ext_fold, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index bb02c10..9a71500 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1720,6 +1720,13 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, if (!MaybeImmVal) return false; + // Don't do this combine if there multiple uses of the first PTR_ADD, + // since we may be able to compute the second PTR_ADD as an immediate + // offset anyway. Folding the first offset into the second may cause us + // to go beyond the bounds of our legal addressing modes. + if (!MRI.hasOneNonDBGUse(Add2)) + return false; + MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2); if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD) return false; @@ -3955,6 +3962,13 @@ bool CombinerHelper::applyBuildFn( return true; } +bool CombinerHelper::applyBuildFnNoErase( + MachineInstr &MI, std::function &MatchInfo) { + Builder.setInstrAndDebugLoc(MI); + MatchInfo(Builder); + return true; +} + /// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate. bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) { unsigned Opc = MI.getOpcode(); @@ -4129,6 +4143,129 @@ bool CombinerHelper::matchBitfieldExtractFromAnd( return true; } +bool CombinerHelper::reassociationCanBreakAddressingModePattern( + MachineInstr &PtrAdd) { + assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD); + + Register Src1Reg = PtrAdd.getOperand(1).getReg(); + MachineInstr *Src1Def = getOpcodeDef(TargetOpcode::G_PTR_ADD, Src1Reg, MRI); + if (!Src1Def) + return false; + + Register Src2Reg = PtrAdd.getOperand(2).getReg(); + + if (MRI.hasOneNonDBGUse(Src1Reg)) + return false; + + auto C1 = getConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI); + if (!C1) + return false; + auto C2 = getConstantVRegVal(Src2Reg, MRI); + if (!C2) + return false; + + const APInt &C1APIntVal = *C1; + const APInt &C2APIntVal = *C2; + const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue(); + + for (auto &UseMI : MRI.use_nodbg_instructions(Src1Reg)) { + // This combine may end up running before ptrtoint/inttoptr combines + // manage to eliminate redundant conversions, so try to look through them. + MachineInstr *ConvUseMI = &UseMI; + unsigned ConvUseOpc = ConvUseMI->getOpcode(); + while (ConvUseOpc == TargetOpcode::G_INTTOPTR || + ConvUseOpc == TargetOpcode::G_PTRTOINT) { + Register DefReg = ConvUseMI->getOperand(0).getReg(); + if (!MRI.hasOneNonDBGUse(DefReg)) + break; + ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg); + } + auto LoadStore = ConvUseOpc == TargetOpcode::G_LOAD || + ConvUseOpc == TargetOpcode::G_STORE; + if (!LoadStore) + continue; + // Is x[offset2] already not a legal addressing mode? If so then + // reassociating the constants breaks nothing (we test offset2 because + // that's the one we hope to fold into the load or store). + TargetLoweringBase::AddrMode AM; + AM.HasBaseReg = true; + AM.BaseOffs = C2APIntVal.getSExtValue(); + unsigned AS = + MRI.getType(ConvUseMI->getOperand(1).getReg()).getAddressSpace(); + Type *AccessTy = + getTypeForLLT(MRI.getType(ConvUseMI->getOperand(0).getReg()), + PtrAdd.getMF()->getFunction().getContext()); + const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering(); + if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM, + AccessTy, AS)) + continue; + + // Would x[offset1+offset2] still be a legal addressing mode? + AM.BaseOffs = CombinedValue; + if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM, + AccessTy, AS)) + return true; + } + + return false; +} + +bool CombinerHelper::matchReassocPtrAdd( + MachineInstr &MI, std::function &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD); + // We're trying to match a few pointer computation patterns here for + // re-association opportunities. + // 1) Isolating a constant operand to be on the RHS, e.g.: + // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C) + // + // 2) Folding two constants in each sub-tree as long as such folding + // doesn't break a legal addressing mode. + // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2) + Register Src1Reg = MI.getOperand(1).getReg(); + Register Src2Reg = MI.getOperand(2).getReg(); + MachineInstr *LHS = MRI.getVRegDef(Src1Reg); + MachineInstr *RHS = MRI.getVRegDef(Src2Reg); + + if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) { + // Try to match example 1). + if (RHS->getOpcode() != TargetOpcode::G_ADD) + return false; + auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI); + if (!C2) + return false; + + MatchInfo = [=,&MI](MachineIRBuilder &B) { + LLT PtrTy = MRI.getType(MI.getOperand(0).getReg()); + + auto NewBase = + Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg()); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(NewBase.getReg(0)); + MI.getOperand(2).setReg(RHS->getOperand(2).getReg()); + Observer.changedInstr(MI); + }; + } else { + // Try to match example 2. + Register LHSSrc1 = LHS->getOperand(1).getReg(); + Register LHSSrc2 = LHS->getOperand(2).getReg(); + auto C1 = getConstantVRegVal(LHSSrc2, MRI); + if (!C1) + return false; + auto C2 = getConstantVRegVal(Src2Reg, MRI); + if (!C2) + return false; + + MatchInfo = [=, &MI](MachineIRBuilder &B) { + auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(LHSSrc1); + MI.getOperand(2).setReg(NewCst.getReg(0)); + Observer.changedInstr(MI); + }; + } + return !reassociationCanBreakAddressingModePattern(MI); +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir new file mode 100644 index 0000000..1c5ed0e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir @@ -0,0 +1,153 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s +--- +name: test1_noreassoc_legal_already_new_is_illegal +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: test1_noreassoc_legal_already_new_is_illegal + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4777 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32)) + ; CHECK: G_STORE [[C2]](s32), [[PTR_ADD]](p0) :: (store (s32)) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %2:_(s64) = G_CONSTANT i64 4777 + %4:_(s64) = G_CONSTANT i64 6 + %9:_(s32) = G_CONSTANT i32 0 + %10:_(p0) = G_PTR_ADD %0, %2(s64) + %11:_(p0) = G_PTR_ADD %10, %4(s64) + %7:_(s32) = G_LOAD %11(p0) :: (load 4) + G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10 + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test2_reassoc_already_legal_new_also_legal +alignment: 4 +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: test2_reassoc_already_legal_new_also_legal + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32)) + ; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32)) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %2:_(s64) = G_CONSTANT i64 10 + %4:_(s64) = G_CONSTANT i64 6 + %9:_(s32) = G_CONSTANT i32 0 + %10:_(p0) = G_PTR_ADD %0, %2(s64) + %11:_(p0) = G_PTR_ADD %10, %4(s64) + %7:_(s32) = G_LOAD %11(p0) :: (load 4) + G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10 + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test3_noreassoc_only_oneuse +alignment: 4 +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: test3_noreassoc_only_oneuse + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4783 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32)) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %10:_(s64) = G_CONSTANT i64 4783 + %9:_(p0) = G_PTR_ADD %0, %10(s64) + %7:_(s32) = G_LOAD %9(p0) :: (load 4) + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test4_reassoc_existing_is_already_illegal +alignment: 4 +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: test4_reassoc_existing_is_already_illegal + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 17 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4096 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32)) + ; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32)) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %2:_(s64) = G_CONSTANT i64 17 + %4:_(s64) = G_CONSTANT i64 4079 + %9:_(s32) = G_CONSTANT i32 0 + %10:_(p0) = G_PTR_ADD %0, %2(s64) + %11:_(p0) = G_PTR_ADD %10, %4(s64) + %7:_(s32) = G_LOAD %11(p0) :: (load 4) + G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10 + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test5_add_on_rhs +alignment: 4 +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test5_add_on_rhs + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[COPY1]](s64) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C]](s64) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8)) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = G_CONSTANT i64 1 + %3:_(s64) = G_ADD %1, %2 + %4:_(p0) = G_PTR_ADD %0, %3(s64) + %7:_(s32) = G_LOAD %4(p0) :: (load 1) + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 + +...