DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
}
+/// \brief Analyze profitability of direct move
+/// prefer float load to int load plus direct move
+/// when there is no integer use of int load
+static bool directMoveIsProfitable(const SDValue &Op) {
+ SDNode *Origin = Op.getOperand(0).getNode();
+ if (Origin->getOpcode() != ISD::LOAD)
+ return true;
+
+ for (SDNode::use_iterator UI = Origin->use_begin(),
+ UE = Origin->use_end();
+ UI != UE; ++UI) {
+
+ // Only look at the users of the loaded value.
+ if (UI.getUse().get().getResNo() != 0)
+ continue;
+
+ if (UI->getOpcode() != ISD::SINT_TO_FP &&
+ UI->getOpcode() != ISD::UINT_TO_FP)
+ return true;
+ }
+
+ return false;
+}
+
/// \brief Custom lowers integer to floating point conversions to use
/// the direct move instructions available in ISA 2.07 to avoid the
/// need for load/store combinations.
// If we have direct moves, we can do all the conversion, skip the store/load
// however, without FPCVT we can't do most conversions.
- if (Subtarget.hasDirectMove() && Subtarget.isPPC64() && Subtarget.hasFPCVT())
+ if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
+ Subtarget.isPPC64() && Subtarget.hasFPCVT())
return LowerINT_TO_FPDirectMove(Op, DAG, dl);
assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
--- /dev/null
+; RUN: llc -O2 -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+
+; Function Attrs: norecurse nounwind
+define void @test1(float* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readnone %c, i32 signext %n) #0 {
+
+; CHECK-LABEL: test1
+
+entry:
+ %idxprom = sext i32 %n to i64
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4, !tbaa !1
+ %conv = sitofp i32 %0 to float
+ %mul = fmul float %conv, 0x4002916880000000
+ %arrayidx2 = getelementptr inbounds float, float* %a, i64 %idxprom
+ store float %mul, float* %arrayidx2, align 4, !tbaa !5
+ ret void
+
+; CHECK-NOT: mtvsrwa
+; CHECK-NOT: mtfprwa
+; CHECK: lxsiwax [[REG:[0-9]+]], {{.*}}
+; CHECK-NOT: mtvsrwa
+; CHECK-NOT: mtfprwa
+; CHECK: xscvsxdsp {{.*}}, [[REG]]
+; CHECK-NOT: mtvsrwa
+; CHECK-NOT: mtfprwa
+; CHECK: blr
+
+}
+
+; Function Attrs: norecurse nounwind readonly
+define float @test2(i32* nocapture readonly %b) #0 {
+
+; CHECK-LABEL: test2
+
+entry:
+ %0 = load i32, i32* %b, align 4, !tbaa !1
+ %conv = sitofp i32 %0 to float
+ %mul = fmul float %conv, 0x40030A3D80000000
+ ret float %mul
+
+; CHECK-NOT: mtvsrwa
+; CHECK-NOT: mtfprwa
+; CHECK: lxsiwax [[REG:[0-9]+]], {{.*}}
+; CHECK-NOT: mtvsrwa
+; CHECK-NOT: mtfprwa
+; CHECK: xscvsxdsp {{.*}}, [[REG]]
+; CHECK-NOT: mtvsrwa
+; CHECK-NOT: mtfprwa
+; CHECK: blr
+
+}
+
+; Function Attrs: norecurse nounwind
+define void @test3(float* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture %c, i32 signext %n) #0 {
+
+; CHECK-LABEL: test3
+
+entry:
+ %idxprom = sext i32 %n to i64
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4, !tbaa !1
+ %conv = sitofp i32 %0 to float
+ %mul = fmul float %conv, 0x4002916880000000
+ %arrayidx2 = getelementptr inbounds float, float* %a, i64 %idxprom
+ store float %mul, float* %arrayidx2, align 4, !tbaa !5
+ %arrayidx6 = getelementptr inbounds i32, i32* %c, i64 %idxprom
+ %1 = load i32, i32* %arrayidx6, align 4, !tbaa !1
+ %add = add nsw i32 %1, %0
+ store i32 %add, i32* %arrayidx6, align 4, !tbaa !1
+ ret void
+
+; CHECK: mtvsrwa
+; CHECK: blr
+
+}
+
+!0 = !{!"clang version 3.9.0 (http://llvm.org/git/clang.git b88a395e7ba26c0fb96cd99a2a004d76f4f41d0c) (http://llvm.org/git/llvm.git 1ac3fbac0f5b037c17c0b0f9d271c32c4d7ca1b5)"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C++ TBAA"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"float", !3, i64 0}