SmallVectorImpl<std::pair<Register, MachineInstr *>> &MatchInfo);
/// Use a function which takes in a MachineIRBuilder to perform a combine.
+ /// By default, it erases the instruction \p MI from the function.
bool applyBuildFn(MachineInstr &MI,
std::function<void(MachineIRBuilder &)> &MatchInfo);
+ /// Use a function which takes in a MachineIRBuilder to perform a combine.
+ /// This variant does not erase \p MI after calling the build function.
+ bool applyBuildFnNoErase(MachineInstr &MI,
+ std::function<void(MachineIRBuilder &)> &MatchInfo);
+
bool matchFunnelShiftToRotate(MachineInstr &MI);
void applyFunnelShiftToRotate(MachineInstr &MI);
bool matchRotateOutOfRange(MachineInstr &MI);
bool matchBitfieldExtractFromAnd(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
+ /// Reassociate pointer calculations with G_ADD involved, to allow better
+ /// addressing mode usage.
+ bool matchReassocPtrAdd(MachineInstr &MI,
+ std::function<void(MachineIRBuilder &)> &MatchInfo);
+
/// Try to transform \p MI by using all of the above
/// combine functions. Returns true if changed.
bool tryCombine(MachineInstr &MI);
SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
const SmallVector<Register, 8> &RegsToVisit,
const unsigned MemSizeInBits);
+
+ /// Examines the G_PTR_ADD instruction \p PtrAdd and determines if performing
+ /// a re-association of its operands would break an existing legal addressing
+ /// mode that the address computation currently represents.
+ bool reassociationCanBreakAddressingModePattern(MachineInstr &PtrAdd);
};
} // namespace llvm
def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg,
bitfield_extract_from_and]>;
+def reassoc_ptradd : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_PTR_ADD):$root,
+ [{ return Helper.matchReassocPtrAdd(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+
+def reassocs : GICombineGroup<[reassoc_ptradd]>;
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
mul_by_neg_one]>;
def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
- extract_vec_elt_combines, ptr_add_immed_chain, combines_for_extload,
+ extract_vec_elt_combines, combines_for_extload,
combine_indexed_load_store, undef_combines, identity_combines, phi_combines,
simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands,
+ reassocs, ptr_add_immed_chain,
shl_ashr_to_sext_inreg, sext_inreg_of_load,
width_reduction_combines, select_combines,
known_bits_simplifications, ext_ext_fold,
if (!MaybeImmVal)
return false;
+ // Don't do this combine if there multiple uses of the first PTR_ADD,
+ // since we may be able to compute the second PTR_ADD as an immediate
+ // offset anyway. Folding the first offset into the second may cause us
+ // to go beyond the bounds of our legal addressing modes.
+ if (!MRI.hasOneNonDBGUse(Add2))
+ return false;
+
MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
return false;
return true;
}
+bool CombinerHelper::applyBuildFnNoErase(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ Builder.setInstrAndDebugLoc(MI);
+ MatchInfo(Builder);
+ return true;
+}
+
/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
return true;
}
+bool CombinerHelper::reassociationCanBreakAddressingModePattern(
+ MachineInstr &PtrAdd) {
+ assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);
+
+ Register Src1Reg = PtrAdd.getOperand(1).getReg();
+ MachineInstr *Src1Def = getOpcodeDef(TargetOpcode::G_PTR_ADD, Src1Reg, MRI);
+ if (!Src1Def)
+ return false;
+
+ Register Src2Reg = PtrAdd.getOperand(2).getReg();
+
+ if (MRI.hasOneNonDBGUse(Src1Reg))
+ return false;
+
+ auto C1 = getConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
+ if (!C1)
+ return false;
+ auto C2 = getConstantVRegVal(Src2Reg, MRI);
+ if (!C2)
+ return false;
+
+ const APInt &C1APIntVal = *C1;
+ const APInt &C2APIntVal = *C2;
+ const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
+
+ for (auto &UseMI : MRI.use_nodbg_instructions(Src1Reg)) {
+ // This combine may end up running before ptrtoint/inttoptr combines
+ // manage to eliminate redundant conversions, so try to look through them.
+ MachineInstr *ConvUseMI = &UseMI;
+ unsigned ConvUseOpc = ConvUseMI->getOpcode();
+ while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
+ ConvUseOpc == TargetOpcode::G_PTRTOINT) {
+ Register DefReg = ConvUseMI->getOperand(0).getReg();
+ if (!MRI.hasOneNonDBGUse(DefReg))
+ break;
+ ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
+ }
+ auto LoadStore = ConvUseOpc == TargetOpcode::G_LOAD ||
+ ConvUseOpc == TargetOpcode::G_STORE;
+ if (!LoadStore)
+ continue;
+ // Is x[offset2] already not a legal addressing mode? If so then
+ // reassociating the constants breaks nothing (we test offset2 because
+ // that's the one we hope to fold into the load or store).
+ TargetLoweringBase::AddrMode AM;
+ AM.HasBaseReg = true;
+ AM.BaseOffs = C2APIntVal.getSExtValue();
+ unsigned AS =
+ MRI.getType(ConvUseMI->getOperand(1).getReg()).getAddressSpace();
+ Type *AccessTy =
+ getTypeForLLT(MRI.getType(ConvUseMI->getOperand(0).getReg()),
+ PtrAdd.getMF()->getFunction().getContext());
+ const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
+ if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
+ AccessTy, AS))
+ continue;
+
+ // Would x[offset1+offset2] still be a legal addressing mode?
+ AM.BaseOffs = CombinedValue;
+ if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
+ AccessTy, AS))
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchReassocPtrAdd(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
+ // We're trying to match a few pointer computation patterns here for
+ // re-association opportunities.
+ // 1) Isolating a constant operand to be on the RHS, e.g.:
+ // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
+ //
+ // 2) Folding two constants in each sub-tree as long as such folding
+ // doesn't break a legal addressing mode.
+ // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
+ Register Src1Reg = MI.getOperand(1).getReg();
+ Register Src2Reg = MI.getOperand(2).getReg();
+ MachineInstr *LHS = MRI.getVRegDef(Src1Reg);
+ MachineInstr *RHS = MRI.getVRegDef(Src2Reg);
+
+ if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) {
+ // Try to match example 1).
+ if (RHS->getOpcode() != TargetOpcode::G_ADD)
+ return false;
+ auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
+ if (!C2)
+ return false;
+
+ MatchInfo = [=,&MI](MachineIRBuilder &B) {
+ LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
+
+ auto NewBase =
+ Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(NewBase.getReg(0));
+ MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
+ Observer.changedInstr(MI);
+ };
+ } else {
+ // Try to match example 2.
+ Register LHSSrc1 = LHS->getOperand(1).getReg();
+ Register LHSSrc2 = LHS->getOperand(2).getReg();
+ auto C1 = getConstantVRegVal(LHSSrc2, MRI);
+ if (!C1)
+ return false;
+ auto C2 = getConstantVRegVal(Src2Reg, MRI);
+ if (!C2)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(LHSSrc1);
+ MI.getOperand(2).setReg(NewCst.getReg(0));
+ Observer.changedInstr(MI);
+ };
+ }
+ return !reassociationCanBreakAddressingModePattern(MI);
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
+---
+name: test1_noreassoc_legal_already_new_is_illegal
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+body: |
+ bb.1:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: test1_noreassoc_legal_already_new_is_illegal
+ ; CHECK: liveins: $x0
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4777
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
+ ; CHECK: G_STORE [[C2]](s32), [[PTR_ADD]](p0) :: (store (s32))
+ ; CHECK: $w0 = COPY [[LOAD]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:_(p0) = COPY $x0
+ %2:_(s64) = G_CONSTANT i64 4777
+ %4:_(s64) = G_CONSTANT i64 6
+ %9:_(s32) = G_CONSTANT i32 0
+ %10:_(p0) = G_PTR_ADD %0, %2(s64)
+ %11:_(p0) = G_PTR_ADD %10, %4(s64)
+ %7:_(s32) = G_LOAD %11(p0) :: (load 4)
+ G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: test2_reassoc_already_legal_new_also_legal
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+body: |
+ bb.1:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: test2_reassoc_already_legal_new_also_legal
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
+ ; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32))
+ ; CHECK: $w0 = COPY [[LOAD]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:_(p0) = COPY $x0
+ %2:_(s64) = G_CONSTANT i64 10
+ %4:_(s64) = G_CONSTANT i64 6
+ %9:_(s32) = G_CONSTANT i32 0
+ %10:_(p0) = G_PTR_ADD %0, %2(s64)
+ %11:_(p0) = G_PTR_ADD %10, %4(s64)
+ %7:_(s32) = G_LOAD %11(p0) :: (load 4)
+ G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: test3_noreassoc_only_oneuse
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+body: |
+ bb.1:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: test3_noreassoc_only_oneuse
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4783
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32))
+ ; CHECK: $w0 = COPY [[LOAD]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:_(p0) = COPY $x0
+ %10:_(s64) = G_CONSTANT i64 4783
+ %9:_(p0) = G_PTR_ADD %0, %10(s64)
+ %7:_(s32) = G_LOAD %9(p0) :: (load 4)
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: test4_reassoc_existing_is_already_illegal
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+body: |
+ bb.1:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: test4_reassoc_existing_is_already_illegal
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 17
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4096
+ ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
+ ; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32))
+ ; CHECK: $w0 = COPY [[LOAD]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:_(p0) = COPY $x0
+ %2:_(s64) = G_CONSTANT i64 17
+ %4:_(s64) = G_CONSTANT i64 4079
+ %9:_(s32) = G_CONSTANT i32 0
+ %10:_(p0) = G_PTR_ADD %0, %2(s64)
+ %11:_(p0) = G_PTR_ADD %10, %4(s64)
+ %7:_(s32) = G_LOAD %11(p0) :: (load 4)
+ G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: test5_add_on_rhs
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: test5_add_on_rhs
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[COPY1]](s64)
+ ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C]](s64)
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8))
+ ; CHECK: $w0 = COPY [[LOAD]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:_(p0) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = G_CONSTANT i64 1
+ %3:_(s64) = G_ADD %1, %2
+ %4:_(p0) = G_PTR_ADD %0, %3(s64)
+ %7:_(s32) = G_LOAD %4(p0) :: (load 1)
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...