From eaee28b5caec0972aa35479fecbe4196528c8fb8 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Mon, 19 Sep 2016 09:11:09 +0000 Subject: [PATCH] ARM: check alignment before transforming ldr -> ldm (or similar). ldm and stm instructions always require 4-byte alignment on the pointer, but we weren't checking this before trying to reduce code-size by replacing a post-indexed load/store with them. Unfortunately, we were also dropping this incormation in DAG ISel too, but that's easy enough to fix. llvm-svn: 281893 --- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 32 +++++++++++++++++++++-------- llvm/lib/Target/ARM/Thumb2SizeReduction.cpp | 4 ++++ llvm/test/CodeGen/ARM/thumb2-size-opt.ll | 13 ++++++++++++ 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index cb01b63..977a082 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -193,6 +193,8 @@ public: #include "ARMGenDAGISel.inc" private: + void transferMemOperands(SDNode *Src, SDNode *Dst); + /// Indexed (pre/post inc/dec) load matching code for ARM. bool tryARMIndexedLoad(SDNode *N); bool tryT1IndexedLoad(SDNode *N); @@ -1471,6 +1473,12 @@ static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); } +void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + cast(Result)->setMemRefs(MemOp, MemOp + 1); +} + bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { LoadSDNode *LD = cast(N); ISD::MemIndexedMode AM = LD->getAddressingMode(); @@ -1529,16 +1537,20 @@ bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), CurDAG->getRegister(0, MVT::i32), Chain }; - ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, - MVT::i32, MVT::Other, Ops)); + SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, + MVT::Other, Ops); + transferMemOperands(N, New); + ReplaceNode(N, New); return true; } else { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), CurDAG->getRegister(0, MVT::i32), Chain }; - ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, - MVT::i32, MVT::Other, Ops)); + SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, + MVT::Other, Ops); + transferMemOperands(N, New); + ReplaceNode(N, New); return true; } } @@ -1566,8 +1578,10 @@ bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), CurDAG->getRegister(0, MVT::i32), Chain }; - ReplaceNode(N, CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, MVT::i32, - MVT::Other, Ops)); + SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, + MVT::i32, MVT::Other, Ops); + transferMemOperands(N, New); + ReplaceNode(N, New); return true; } @@ -1612,8 +1626,10 @@ bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), CurDAG->getRegister(0, MVT::i32), Chain }; - ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, - MVT::Other, Ops)); + SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, + MVT::Other, Ops); + transferMemOperands(N, New); + ReplaceNode(N, New); return true; } diff --git a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp index 5a60893..07158f9 100644 --- a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -430,6 +430,10 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, if (!MBB.getParent()->getFunction()->optForMinSize()) return false; + if (!MI->hasOneMemOperand() || + (*MI->memoperands_begin())->getAlignment() < 4) + return false; + // We're creating a completely different type of load/store - LDM from LDR. // For this reason we can't reuse the logic at the end of this function; we // have to implement the MI building here. diff --git a/llvm/test/CodeGen/ARM/thumb2-size-opt.ll b/llvm/test/CodeGen/ARM/thumb2-size-opt.ll index aba0276..f8d6489 100644 --- a/llvm/test/CodeGen/ARM/thumb2-size-opt.ll +++ b/llvm/test/CodeGen/ARM/thumb2-size-opt.ll @@ -98,3 +98,16 @@ false: store i32* %next, i32** %addr2 ret i32 %res } + +; ldm instructions fault on misaligned accesses so we mustn't convert +; this post-indexed ldr into one. +define i32* @misaligned_post(i32* %src, i32* %dest) minsize { +; CHECK-LABEL: misaligned_post: +; CHECK: ldr [[VAL:.*]], [r0], #4 +; CHECK: str [[VAL]], [r1] + + %val = load i32, i32* %src, align 1 + store i32 %val, i32* %dest + %next = getelementptr i32, i32* %src, i32 1 + ret i32* %next +} -- 2.7.4