#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMSubtarget.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+
using namespace llvm;
+static cl::opt<int> DataBankMask("arm-data-bank-mask", cl::init(-1),
+ cl::Hidden);
+static cl::opt<bool> AssumeITCMConflict("arm-assume-itcm-bankconflict",
+ cl::init(false), cl::Hidden);
+
static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
const TargetRegisterInfo &TRI) {
// FIXME: Detect integer instructions properly.
void ARMHazardRecognizerFPMLx::RecedeCycle() {
llvm_unreachable("reverse ARM hazard checking unsupported");
}
+
+///////// Bank conflicts handled as hazards //////////////
+
+static bool getBaseOffset(const MachineInstr &MI, const MachineOperand *&BaseOp,
+ int64_t &Offset) {
+
+ uint64_t TSFlags = MI.getDesc().TSFlags;
+ unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
+ unsigned IndexMode =
+ (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+
+ // Address mode tells us what we want to know about operands for T2
+ // instructions (but not size). It tells us size (but not about operands)
+ // for T1 instructions.
+ switch (AddrMode) {
+ default:
+ return false;
+ case ARMII::AddrModeT2_i8:
+ // t2LDRBT, t2LDRB_POST, t2LDRB_PRE, t2LDRBi8,
+ // t2LDRHT, t2LDRH_POST, t2LDRH_PRE, t2LDRHi8,
+ // t2LDRSBT, t2LDRSB_POST, t2LDRSB_PRE, t2LDRSBi8,
+ // t2LDRSHT, t2LDRSH_POST, t2LDRSH_PRE, t2LDRSHi8,
+ // t2LDRT, t2LDR_POST, t2LDR_PRE, t2LDRi8
+ BaseOp = &MI.getOperand(1);
+ Offset = (IndexMode == ARMII::IndexModePost)
+ ? 0
+ : (IndexMode == ARMII::IndexModePre ||
+ IndexMode == ARMII::IndexModeUpd)
+ ? MI.getOperand(3).getImm()
+ : MI.getOperand(2).getImm();
+ return true;
+ case ARMII::AddrModeT2_i12:
+ // t2LDRBi12, t2LDRHi12
+ // t2LDRSBi12, t2LDRSHi12
+ // t2LDRi12
+ BaseOp = &MI.getOperand(1);
+ Offset = MI.getOperand(2).getImm();
+ return true;
+ case ARMII::AddrModeT2_i8s4:
+ // t2LDRD_POST, t2LDRD_PRE, t2LDRDi8
+ BaseOp = &MI.getOperand(2);
+ Offset = (IndexMode == ARMII::IndexModePost)
+ ? 0
+ : (IndexMode == ARMII::IndexModePre ||
+ IndexMode == ARMII::IndexModeUpd)
+ ? MI.getOperand(4).getImm()
+ : MI.getOperand(3).getImm();
+ return true;
+ case ARMII::AddrModeT1_1:
+ // tLDRBi, tLDRBr (watch out!), TLDRSB
+ case ARMII::AddrModeT1_2:
+ // tLDRHi, tLDRHr (watch out!), TLDRSH
+ case ARMII::AddrModeT1_4:
+ // tLDRi, tLDRr (watch out!)
+ BaseOp = &MI.getOperand(1);
+ Offset = MI.getOperand(2).isImm() ? MI.getOperand(2).getImm() : 0;
+ return MI.getOperand(2).isImm();
+ }
+ return false;
+}
+
+ARMBankConflictHazardRecognizer::ARMBankConflictHazardRecognizer(
+ const ScheduleDAG *DAG, int64_t CPUBankMask, bool CPUAssumeITCMConflict)
+ : ScheduleHazardRecognizer(), MF(DAG->MF), DL(DAG->MF.getDataLayout()),
+ DataMask(DataBankMask.getNumOccurrences() ? int64_t(DataBankMask)
+ : CPUBankMask),
+ AssumeITCMBankConflict(AssumeITCMConflict.getNumOccurrences()
+ ? AssumeITCMConflict
+ : CPUAssumeITCMConflict) {
+ MaxLookAhead = 1;
+}
+
+ScheduleHazardRecognizer::HazardType
+ARMBankConflictHazardRecognizer::CheckOffsets(unsigned O0, unsigned O1) {
+ return (((O0 ^ O1) & DataMask) != 0) ? NoHazard : Hazard;
+}
+
+ScheduleHazardRecognizer::HazardType
+ARMBankConflictHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ MachineInstr &L0 = *SU->getInstr();
+ if (!L0.mayLoad() || L0.mayStore() || L0.getNumMemOperands() != 1)
+ return NoHazard;
+
+ auto MO0 = *L0.memoperands().begin();
+ auto BaseVal0 = MO0->getValue();
+ auto BasePseudoVal0 = MO0->getPseudoValue();
+ int64_t Offset0 = 0;
+
+ if (MO0->getSize() > 4)
+ return NoHazard;
+
+ bool SPvalid = false;
+ const MachineOperand *SP = nullptr;
+ int64_t SPOffset0 = 0;
+
+ for (auto L1 : Accesses) {
+ auto MO1 = *L1->memoperands().begin();
+ auto BaseVal1 = MO1->getValue();
+ auto BasePseudoVal1 = MO1->getPseudoValue();
+ int64_t Offset1 = 0;
+
+ // Pointers to the same object
+ if (BaseVal0 && BaseVal1) {
+ const Value *Ptr0, *Ptr1;
+ Ptr0 = GetPointerBaseWithConstantOffset(BaseVal0, Offset0, DL, true);
+ Ptr1 = GetPointerBaseWithConstantOffset(BaseVal1, Offset1, DL, true);
+ if (Ptr0 == Ptr1 && Ptr0)
+ return CheckOffsets(Offset0, Offset1);
+ }
+
+ if (BasePseudoVal0 && BasePseudoVal1 &&
+ BasePseudoVal0->kind() == BasePseudoVal1->kind() &&
+ BasePseudoVal0->kind() == PseudoSourceValue::FixedStack) {
+ // Spills/fills
+ auto FS0 = cast<FixedStackPseudoSourceValue>(BasePseudoVal0);
+ auto FS1 = cast<FixedStackPseudoSourceValue>(BasePseudoVal1);
+ Offset0 = MF.getFrameInfo().getObjectOffset(FS0->getFrameIndex());
+ Offset1 = MF.getFrameInfo().getObjectOffset(FS1->getFrameIndex());
+ return CheckOffsets(Offset0, Offset1);
+ }
+
+ // Constant pools (likely in ITCM)
+ if (BasePseudoVal0 && BasePseudoVal1 &&
+ BasePseudoVal0->kind() == BasePseudoVal1->kind() &&
+ BasePseudoVal0->isConstantPool() && AssumeITCMBankConflict)
+ return Hazard;
+
+ // Is this a stack pointer-relative access? We could in general try to
+ // use "is this the same register and is it unchanged?", but the
+ // memory operand tracking is highly likely to have already found that.
+ // What we're after here is bank conflicts between different objects in
+ // the stack frame.
+ if (!SPvalid) { // set up SP
+ if (!getBaseOffset(L0, SP, SPOffset0) || SP->getReg().id() != ARM::SP)
+ SP = nullptr;
+ SPvalid = true;
+ }
+ if (SP) {
+ int64_t SPOffset1;
+ const MachineOperand *SP1;
+ if (getBaseOffset(*L1, SP1, SPOffset1) && SP1->getReg().id() == ARM::SP)
+ return CheckOffsets(SPOffset0, SPOffset1);
+ }
+ }
+
+ return NoHazard;
+}
+
+void ARMBankConflictHazardRecognizer::Reset() { Accesses.clear(); }
+
+void ARMBankConflictHazardRecognizer::EmitInstruction(SUnit *SU) {
+ MachineInstr &MI = *SU->getInstr();
+ if (!MI.mayLoad() || MI.mayStore() || MI.getNumMemOperands() != 1)
+ return;
+
+ auto MO = *MI.memoperands().begin();
+ uint64_t Size1 = MO->getSize();
+ if (Size1 > 4)
+ return;
+ Accesses.push_back(&MI);
+}
+
+void ARMBankConflictHazardRecognizer::AdvanceCycle() { Accesses.clear(); }
+
+void ARMBankConflictHazardRecognizer::RecedeCycle() { Accesses.clear(); }
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=thumbv7m-none-eabi -mcpu=cortex-m7 | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -mtriple=thumbv7m-none-eabi -mcpu=cortex-m7 -arm-data-bank-mask=-1 | FileCheck %s --check-prefix=NOBANK
+
+; This tests the cortex-m7 bank conflict hazard recognizer.
+; Normally both loads would be scheduled early (both in the first cycle) due to
+; their latency. But will bank conflict to TCM so are scheduled in different
+; cycles.
+
+define i32 @test(i32* %x0, i32 %y, i32 %z) {
+; CHECK-LABEL: test:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: ldr r3, [r0]
+; CHECK-NEXT: subs r1, r3, r1
+; CHECK-NEXT: ldr r0, [r0, #8]
+; CHECK-NEXT: subs r1, r1, r2
+; CHECK-NEXT: adds r1, #1
+; CHECK-NEXT: muls r0, r1, r0
+; CHECK-NEXT: bx lr
+; NOBANK-LABEL: test:
+; NOBANK: @ %bb.0: @ %entry
+; NOBANK-NEXT: ldr r3, [r0]
+; NOBANK-NEXT: ldr r0, [r0, #8]
+; NOBANK-NEXT: subs r1, r3, r1
+; NOBANK-NEXT: subs r1, r1, r2
+; NOBANK-NEXT: adds r1, #1
+; NOBANK-NEXT: muls r0, r1, r0
+; NOBANK-NEXT: bx lr
+entry:
+ %0 = load i32, i32* %x0, align 4
+ %mul3 = add nsw i32 %0, 1
+ %mul = sub nsw i32 %mul3, %y
+ %sub = sub nsw i32 %mul, %z
+ %arrayidx1 = getelementptr inbounds i32, i32* %x0, i32 2
+ %1 = load i32, i32* %arrayidx1, align 4
+ %mul2 = mul nsw i32 %sub, %1
+ ret i32 %mul2
+}