Follow up to r165072. Try a different approach: only move the load when it's going...

author Evan Cheng <evan.cheng@apple.com>

Fri, 5 Oct 2012 01:48:22 +0000 (01:48 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Fri, 5 Oct 2012 01:48:22 +0000 (01:48 +0000)
author Evan Cheng <evan.cheng@apple.com>
Fri, 5 Oct 2012 01:48:22 +0000 (01:48 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Fri, 5 Oct 2012 01:48:22 +0000 (01:48 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

index 98778c3..b79dd99 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -387,21 +387,12 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
    CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
                               Load.getOperand(1), Load.getOperand(2));
  
-  bool IsGlued = Call.getOperand(0).getNode()->getGluedUser() == Call.getNode();
    unsigned NumOps = Call.getNode()->getNumOperands();
    Ops.clear();
    Ops.push_back(SDValue(Load.getNode(), 1));
    for (unsigned i = 1, e = NumOps; i != e; ++i)
      Ops.push_back(Call.getOperand(i));
-  if (!IsGlued)
-    CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], NumOps);
-  else
-    // If call's chain was glued to the call (tailcall), and now the load
-    // is moved between them. Remove the glue to avoid a cycle (where the
-    // call is glued to its old chain and the load is using the old chain
-    // as its new chain).
-    CurDAG->MorphNodeTo(Call.getNode(), Call.getOpcode(),
-                        Call.getNode()->getVTList(), &Ops[0], NumOps-1);
+  CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], NumOps);
  }
  
  /// isCalleeLoad - Return true if call address is a load and it can be
@@ -410,6 +401,10 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
  /// In the case of a tail call, there isn't a callseq node between the call
  /// chain and the load.
  static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
+  // The transformation is somewhat dangerous if the call's chain was glued to
+  // the call. After MoveBelowOrigChain the load is moved between the call and
+  // the chain, this can create a cycle if the load is not folded. So it is
+  // *really* important that we are sure the load will be folded.
    if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
      return false;
    LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
@@ -447,7 +442,10 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
  
      if (OptLevel != CodeGenOpt::None &&
          (N->getOpcode() == X86ISD::CALL ||
-         N->getOpcode() == X86ISD::TC_RETURN)) {
+         (N->getOpcode() == X86ISD::TC_RETURN &&
+          // Only does this if load can be foled into TC_RETURN.
+          (Subtarget->is64Bit() ||
+           getTargetMachine().getRelocationModel() != Reloc::PIC_)))) {
        /// Also try moving call address load from outside callseq_start to just
        /// before the call to allow it to be folded.
        ///
diff --git a/llvm/test/CodeGen/X86/2012-10-02-DAGCycle.ll b/llvm/test/CodeGen/X86/2012-10-02-DAGCycle.ll

index 9d2b7ea..8d914db 100644 (file)
--- a/llvm/test/CodeGen/X86/2012-10-02-DAGCycle.ll
+++ b/llvm/test/CodeGen/X86/2012-10-02-DAGCycle.ll
@@ -1,4 +1,6 @@
  ; RUN: llc -mtriple=i386-apple-macosx -relocation-model=pic < %s
+; RUN: llc -mtriple=x86_64-apple-macosx -relocation-model=pic < %s
+
  ; rdar://12393897
  
  %TRp = type { i32, %TRH*, i32, i32 }
@@ -14,3 +16,37 @@ entry:
    %call = tail call i32 %1(%TRp* inreg %rp) nounwind optsize
    ret i32 %call
  }
+
+%btConeShape = type { %btConvexInternalShape, float, float, float, [3 x i32] }
+%btConvexInternalShape = type { %btConvexShape, %btVector, %btVector, float, float }
+%btConvexShape = type { %btCollisionShape }
+%btCollisionShape = type { i32 (...)**, i32, i8* }
+%btVector = type { [4 x float] }
+
+define { <2 x float>, <2 x float> } @t2(%btConeShape* %this) unnamed_addr uwtable ssp align 2 {
+entry:
+  %0 = getelementptr inbounds %btConeShape* %this, i64 0, i32 0
+  br i1 undef, label %if.then, label %if.end17
+
+if.then:                                          ; preds = %entry
+  %vecnorm.sroa.2.8.copyload = load float* undef, align 4
+  %cmp4 = fcmp olt float undef, 0x3D10000000000000
+  %vecnorm.sroa.2.8.copyload36 = select i1 %cmp4, float -1.000000e+00, float %vecnorm.sroa.2.8.copyload
+  %call.i.i.i = tail call float @sqrtf(float 0.000000e+00) nounwind readnone
+  %div.i.i = fdiv float 1.000000e+00, %call.i.i.i
+  %mul7.i.i.i = fmul float %div.i.i, %vecnorm.sroa.2.8.copyload36
+  %1 = load float (%btConvexInternalShape*)** undef, align 8
+  %call12 = tail call float %1(%btConvexInternalShape* %0)
+  %mul7.i.i = fmul float %call12, %mul7.i.i.i
+  %retval.sroa.0.4.insert = insertelement <2 x float> zeroinitializer, float undef, i32 1
+  %add13.i = fadd float undef, %mul7.i.i
+  %retval.sroa.1.8.insert = insertelement <2 x float> undef, float %add13.i, i32 0
+  br label %if.end17
+
+if.end17:                                         ; preds = %if.then, %entry
+  %retval.sroa.1.8.load3338 = phi <2 x float> [ %retval.sroa.1.8.insert, %if.then ], [ undef, %entry ]
+  %retval.sroa.0.0.load3137 = phi <2 x float> [ %retval.sroa.0.4.insert, %if.then ], [ undef, %entry ]
+  ret { <2 x float>, <2 x float> } undef
+}
+
+declare float @sqrtf(float) nounwind readnone
author	Evan Cheng <evan.cheng@apple.com>
	Fri, 5 Oct 2012 01:48:22 +0000 (01:48 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Fri, 5 Oct 2012 01:48:22 +0000 (01:48 +0000)
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/2012-10-02-DAGCycle.ll		patch \| blob \| history