From 2581905f8149fa79e3ca75e33f8a08d4d6a23546 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Fri, 8 Feb 2013 21:35:47 +0000 Subject: [PATCH] DAGCombiner: Constant folding around pre-increment loads/stores Previously, even when a pre-increment load or store was generated, we often needed to keep a copy of the original base register for use with other offsets. If all of these offsets are constants (including the offset which was combined into the addressing mode), then this is clearly unnecessary. This change adjusts these other offsets to use the new incremented address. llvm-svn: 174746 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 89 +++++++++++++++++++++++++++ llvm/test/CodeGen/PowerPC/stdux-constuse.ll | 47 ++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/stdux-constuse.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d694bc7..472919c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6917,6 +6917,16 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { ISD::MemIndexedMode AM = ISD::UNINDEXED; if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) return false; + + // Backends without true r+i pre-indexed forms may need to pass a + // constant base with a variable offset so that constant coercion + // will work with the patterns in canonical form. + bool Swapped = false; + if (isa(BasePtr)) { + std::swap(BasePtr, Offset); + Swapped = true; + } + // Don't create a indexed load / store with zero offset. if (isa(Offset) && cast(Offset)->isNullValue()) @@ -6942,6 +6952,48 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { return false; } + // If the offset is a constant, there may be other adds of constants that + // can be folded with this one. We should do this to avoid having to keep + // a copy of the original base pointer. + SmallVector OtherUses; + if (isa(Offset)) + for (SDNode::use_iterator I = BasePtr.getNode()->use_begin(), + E = BasePtr.getNode()->use_end(); I != E; ++I) { + SDNode *Use = *I; + if (Use == Ptr.getNode()) + continue; + + if (Use->isPredecessorOf(N)) + continue; + + if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) { + OtherUses.clear(); + break; + } + + SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1); + if (Op1.getNode() == BasePtr.getNode()) + std::swap(Op0, Op1); + assert(Op0.getNode() == BasePtr.getNode() && + "Use of ADD/SUB but not an operand"); + + if (!isa(Op1)) { + OtherUses.clear(); + break; + } + + // FIXME: In some cases, we can be smarter about this. + if (Op1.getValueType() != Offset.getValueType()) { + OtherUses.clear(); + break; + } + + OtherUses.push_back(Use); + } + + if (Swapped) + std::swap(BasePtr, Offset); + // Now check for #3 and #4. bool RealUse = false; @@ -6991,6 +7043,43 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Finally, since the node is now dead, remove it from the graph. DAG.DeleteNode(N); + if (Swapped) + std::swap(BasePtr, Offset); + + // Replace other uses of BasePtr that can be updated to use Ptr + for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) { + unsigned OffsetIdx = 1; + if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode()) + OffsetIdx = 0; + assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == + BasePtr.getNode() && "Expected BasePtr operand"); + + APInt OV = + cast(Offset)->getAPIntValue(); + if (AM == ISD::PRE_DEC) + OV = -OV; + + ConstantSDNode *CN = + cast(OtherUses[i]->getOperand(OffsetIdx)); + APInt CNV = CN->getAPIntValue(); + if (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) + CNV += OV; + else + CNV -= OV; + + SDValue NewOp1 = Result.getValue(isLoad ? 1 : 0); + SDValue NewOp2 = DAG.getConstant(CNV, CN->getValueType(0)); + if (OffsetIdx == 0) + std::swap(NewOp1, NewOp2); + + SDValue NewUse = DAG.getNode(OtherUses[i]->getOpcode(), + OtherUses[i]->getDebugLoc(), + OtherUses[i]->getValueType(0), NewOp1, NewOp2); + DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); + removeFromWorkList(OtherUses[i]); + DAG.DeleteNode(OtherUses[i]); + } + // Replace the uses of Ptr with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); removeFromWorkList(Ptr.getNode()); diff --git a/llvm/test/CodeGen/PowerPC/stdux-constuse.ll b/llvm/test/CodeGen/PowerPC/stdux-constuse.ll new file mode 100644 index 0000000..e62d438 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/stdux-constuse.ll @@ -0,0 +1,47 @@ +; RUN: llc -mcpu=a2 -disable-lsr < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define i32 @test1(i64 %add, i64* %ptr) nounwind { +entry: + %p1 = getelementptr i64* %ptr, i64 144115188075855 + br label %for.cond2.preheader + +for.cond2.preheader: + %nl.018 = phi i32 [ 0, %entry ], [ %inc9, %for.end ] + br label %for.body4 + +for.body4: + %lsr.iv = phi i32 [ %lsr.iv.next, %for.body4 ], [ 16000, %for.cond2.preheader ] + %i0 = phi i64* [ %p1, %for.cond2.preheader ], [ %i6, %for.body4 ] + %i6 = getelementptr i64* %i0, i64 400000 + %i7 = getelementptr i64* %i6, i64 300000 + %i8 = getelementptr i64* %i6, i64 200000 + %i9 = getelementptr i64* %i6, i64 100000 + store i64 %add, i64* %i6, align 32 + store i64 %add, i64* %i7, align 32 + store i64 %add, i64* %i8, align 32 + store i64 %add, i64* %i9, align 32 + %lsr.iv.next = add i32 %lsr.iv, -16 + %exitcond.15 = icmp eq i32 %lsr.iv.next, 0 + br i1 %exitcond.15, label %for.end, label %for.body4 + +; Make sure that we generate the most compact form of this loop with no +; unnecessary moves +; CHECK: @test1 +; CHECK: mtctr +; CHECK: stdux +; CHECK-NEXT: stdx +; CHECK-NEXT: stdx +; CHECK-NEXT: stdx +; CHECK-NEXT: bdnz + +for.end: + %inc9 = add nsw i32 %nl.018, 1 + %exitcond = icmp eq i32 %inc9, 400000 + br i1 %exitcond, label %for.end10, label %for.cond2.preheader + +for.end10: + ret i32 0 +} + -- 2.7.4