[PowerPC] Fix the PPCInstrInfo::getInstrLatency implementation

author Hal Finkel <hfinkel@anl.gov>

Tue, 14 Jul 2015 20:02:02 +0000 (20:02 +0000)

committer Hal Finkel <hfinkel@anl.gov>

Tue, 14 Jul 2015 20:02:02 +0000 (20:02 +0000)
author Hal Finkel <hfinkel@anl.gov>
Tue, 14 Jul 2015 20:02:02 +0000 (20:02 +0000)
committer Hal Finkel <hfinkel@anl.gov>
Tue, 14 Jul 2015 20:02:02 +0000 (20:02 +0000)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

index 696a838..bf6e402 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -57,6 +57,10 @@ static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
  cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
  cl::Hidden);
  
+static cl::opt<bool>
+UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
+  cl::desc("Use the old (incorrect) instruction latency calculation"));
+
  // Pin the vtable to this file.
  void PPCInstrInfo::anchor() {}
  
@@ -103,6 +107,35 @@ PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
    return new ScoreboardHazardRecognizer(II, DAG);
  }
  
+unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                       const MachineInstr *MI,
+                                       unsigned *PredCost) const {
+  if (!ItinData || UseOldLatencyCalc)
+    return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost);
+
+  // The default implementation of getInstrLatency calls getStageLatency, but
+  // getStageLatency does not do the right thing for us. While we have
+  // itinerary, most cores are fully pipelined, and so the itineraries only
+  // express the first part of the pipeline, not every stage. Instead, we need
+  // to use the listed output operand cycle number (using operand 0 here, which
+  // is an output).
+
+  unsigned Latency = 1;
+  unsigned DefClass = MI->getDesc().getSchedClass();
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
+      continue;
+
+    int Cycle = ItinData->getOperandCycle(DefClass, i);
+    if (Cycle < 0)
+      continue;
+
+    Latency = std::max(Latency, (unsigned) Cycle);
+  }
+
+  return Latency;
+}
  
  int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
                                      const MachineInstr *DefMI, unsigned DefIdx,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h

index e2d6346..40badae 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -95,6 +95,10 @@ public:
    CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
                                       const ScheduleDAG *DAG) const override;
  
+  unsigned getInstrLatency(const InstrItineraryData *ItinData,
+                           const MachineInstr *MI,
+                           unsigned *PredCost = nullptr) const override;
+
    int getOperandLatency(const InstrItineraryData *ItinData,
                          const MachineInstr *DefMI, unsigned DefIdx,
                          const MachineInstr *UseMI,
diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/llvm/lib/Target/PowerPC/PPCScheduleP7.td

index 635d154..267f567 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCScheduleP7.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP7.td
@@ -315,6 +315,10 @@ def P7Itineraries : ProcessorItineraries<
                                                    P7_DU3, P7_DU4], 0>,
                                     InstrStage<1, [P7_VS1, P7_VS2]>],
                                    [5, 1, 1]>,
+  InstrItinData<IIC_FPAddSub    , [InstrStage<1, [P7_DU1, P7_DU2,
+                                                  P7_DU3, P7_DU4], 0>,
+                                   InstrStage<1, [P7_VS1, P7_VS2]>],
+                                  [5, 1, 1]>,
    InstrItinData<IIC_FPCompare   , [InstrStage<1, [P7_DU1, P7_DU2,
                                                    P7_DU3, P7_DU4], 0>,
                                     InstrStage<1, [P7_VS1, P7_VS2]>],
diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP8.td b/llvm/lib/Target/PowerPC/PPCScheduleP8.td

index 020739b..69e6d05 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCScheduleP8.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP8.td
@@ -323,6 +323,10 @@ def P8Itineraries : ProcessorItineraries<
                                                    P8_DU4, P8_DU5, P8_DU6], 0>,
                                     InstrStage<1, [P8_FPU1, P8_FPU2]>],
                                    [5, 1, 1]>,
+  InstrItinData<IIC_FPAddSub    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
+                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
+                                  [5, 1, 1]>,
    InstrItinData<IIC_FPCompare   , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
                                                    P8_DU4, P8_DU5, P8_DU6], 0>,
                                     InstrStage<1, [P8_FPU1, P8_FPU2]>],
diff --git a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp

index f352fa6..58d3c3d 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -136,6 +136,16 @@ protected:
          // source of the copy, it must still be live here.  We can't use
          // interval testing for a physical register, so as long as we're
          // walking the MIs we may as well test liveness here.
+        //
+        // FIXME: There is a case that occurs in practice, like this:
+        //   %vreg9<def> = COPY %F1; VSSRC:%vreg9
+        //   ...
+        //   %vreg6<def> = COPY %vreg9; VSSRC:%vreg6,%vreg9
+        //   %vreg7<def> = COPY %vreg9; VSSRC:%vreg7,%vreg9
+        //   %vreg9<def,tied1> = XSMADDASP %vreg9<tied0>, %vreg1, %vreg4; VSSRC:
+        //   %vreg6<def,tied1> = XSMADDASP %vreg6<tied0>, %vreg1, %vreg2; VSSRC:
+        //   %vreg7<def,tied1> = XSMADDASP %vreg7<tied0>, %vreg1, %vreg3; VSSRC:
+        // which prevents an otherwise-profitable transformation.
          bool OtherUsers = false, KillsAddendSrc = false;
          for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
               J != JE; --J) {
diff --git a/llvm/test/CodeGen/PowerPC/ppc-crbits-onoff.ll b/llvm/test/CodeGen/PowerPC/ppc-crbits-onoff.ll

index 88648df..c69f300 100644 (file)
--- a/llvm/test/CodeGen/PowerPC/ppc-crbits-onoff.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-crbits-onoff.ll
@@ -15,8 +15,8 @@ entry:
  ; CHECK-DAG: cmplwi {{[0-9]+}}, 3, 0
  ; CHECK-DAG: li [[REG2:[0-9]+]], 1
  ; CHECK-DAG: cntlzw [[REG3:[0-9]+]],
-; CHECK: isel 3, 0, [[REG2]]
-; CHECK: and 3, 3, [[REG3]]
+; CHECK: isel [[REG4:[0-9]+]], 0, [[REG2]]
+; CHECK: and 3, [[REG4]], [[REG3]]
  ; CHECK: blr
  }
  
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll b/llvm/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll

index f905198..92d6d55 100644 (file)
--- a/llvm/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll
@@ -35,7 +35,7 @@ define fastcc double @f2(i64 %g1, double %f1, i64 %g2, double %f2, i64 %g3, doub
  }
  
  define void @cg2(i64 %v) #0 {
-  tail call fastcc i64 @g1(i64 0, double 0.0, i64 %v, double 0.0, i64 0, double 0.0, i64 0, double 0.0)
+  call fastcc i64 @g1(i64 0, double 0.0, i64 %v, double 0.0, i64 0, double 0.0, i64 0, double 0.0)
    ret void
  
  ; CHECK-LABEL: @cg2
@@ -44,11 +44,11 @@ define void @cg2(i64 %v) #0 {
  }
  
  define void @cf2(double %v) #0 {
-  tail call fastcc i64 @g1(i64 0, double 0.0, i64 0, double %v, i64 0, double 0.0, i64 0, double 0.0)
+  call fastcc i64 @g1(i64 0, double 0.0, i64 0, double %v, i64 0, double 0.0, i64 0, double 0.0)
    ret void
  
  ; CHECK-LABEL: @cf2
-; CHECK: mr 2, 1
+; CHECK: fmr 2, 1
  ; CHECK: blr
  }
  
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-fastcc.ll b/llvm/test/CodeGen/PowerPC/ppc64-fastcc.ll

index bb1365a..69e15d1 100644 (file)
--- a/llvm/test/CodeGen/PowerPC/ppc64-fastcc.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-fastcc.ll
@@ -521,8 +521,9 @@ define void @cv13(<4 x i32> %v) #0 {
    ret void
  
  ; CHECK-LABEL: @cv13
-; CHECK: li [[REG1:[0-9]+]], 96
-; CHECK: stvx 2, 1, [[REG1]]
+; CHECK-DAG: li [[REG1:[0-9]+]], 96
+; CHECK-DAG: vor [[REG2:[0-9]+]], 2, 2
+; CHECK: stvx [[REG2]], 1, [[REG1]]
  ; CHECK: blr
  }
  
@@ -531,8 +532,9 @@ define void @cv14(<4 x i32> %v) #0 {
    ret void
  
  ; CHECK-LABEL: @cv14
-; CHECK: li [[REG1:[0-9]+]], 128
-; CHECK: stvx 2, 1, [[REG1]]
+; CHECK-DAG: li [[REG1:[0-9]+]], 128
+; CHECK-DAG: vor [[REG2:[0-9]+]], 2, 2
+; CHECK: stvx [[REG2]], 1, [[REG1]]
  ; CHECK: blr
  }
  
diff --git a/llvm/test/CodeGen/PowerPC/sjlj.ll b/llvm/test/CodeGen/PowerPC/sjlj.ll

index 62403e7..dcbdd69 100644 (file)
--- a/llvm/test/CodeGen/PowerPC/sjlj.ll
+++ b/llvm/test/CodeGen/PowerPC/sjlj.ll
@@ -18,10 +18,10 @@ entry:
  ; CHECK: addi [[REG]], [[REG]], env_sigill@toc@l
  ; CHECK: ld 31, 0([[REG]])
  ; CHECK: ld [[REG2:[0-9]+]], 8([[REG]])
-; CHECK: ld 1, 16([[REG]])
-; CHECK: mtctr [[REG2]]
-; CHECK: ld 30, 32([[REG]])
-; CHECK: ld 2, 24([[REG]])
+; CHECK-DAG: ld 1, 16([[REG]])
+; CHECK-DAG: mtctr [[REG2]]
+; CHECK-DAG: ld 30, 32([[REG]])
+; CHECK-DAG: ld 2, 24([[REG]])
  ; CHECK: bctr
  
  return:                                           ; No predecessors!
diff --git a/llvm/test/CodeGen/PowerPC/tls-store2.ll b/llvm/test/CodeGen/PowerPC/tls-store2.ll

index e9aa17e..6495086 100644 (file)
--- a/llvm/test/CodeGen/PowerPC/tls-store2.ll
+++ b/llvm/test/CodeGen/PowerPC/tls-store2.ll
@@ -29,6 +29,8 @@ entry:
  ; CHECK: addi 3, {{[0-9]+}}, __once_call@got@tlsgd@l
  ; CHECK: bl __tls_get_addr(__once_call@tlsgd)
  ; CHECK-NEXT: nop
-; CHECK: std {{[0-9]+}}, 0(3)
+; FIXME: We don't really need the copy here either, we could move the store up.
+; CHECK: mr [[REG1:[0-9]+]], 3
+; CHECK: std {{[0-9]+}}, 0([[REG1]])
  
  declare void @__once_call_impl()
diff --git a/llvm/test/CodeGen/PowerPC/vsx-fma-m.ll b/llvm/test/CodeGen/PowerPC/vsx-fma-m.ll

index d859273..4f556b6 100644 (file)
--- a/llvm/test/CodeGen/PowerPC/vsx-fma-m.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-fma-m.ll
@@ -49,12 +49,13 @@ entry:
  ; CHECK-LABEL: @test2
  ; CHECK-DAG: li [[C1:[0-9]+]], 8
  ; CHECK-DAG: li [[C2:[0-9]+]], 16
-; CHECK-DAG: xsmaddmdp 3, 2, 1
-; CHECK-DAG: xsmaddmdp 4, 2, 1
-; CHECK-DAG: xsmaddadp 1, 2, 5
-; CHECK-DAG: stxsdx 3, 0, 8
-; CHECK-DAG: stxsdx 4, 8, [[C1]]
-; CHECK-DAG: stxsdx 1, 8, [[C2]]
+; FIXME: We no longer get this because of copy ordering at the MI level.
+; CHECX-DAG: xsmaddmdp 3, 2, 1
+; CHECX-DAG: xsmaddmdp 4, 2, 1
+; CHECX-DAG: xsmaddadp 1, 2, 5
+; CHECX-DAG: stxsdx 3, 0, 8
+; CHECX-DAG: stxsdx 4, 8, [[C1]]
+; CHECX-DAG: stxsdx 1, 8, [[C2]]
  ; CHECK: blr
  
  ; CHECK-FISL-LABEL: @test2
@@ -213,14 +214,15 @@ entry:
    ret void
  
  ; CHECK-LABEL: @testv2
-; CHECK-DAG: xvmaddmdp 36, 35, 34
-; CHECK-DAG: xvmaddmdp 37, 35, 34
-; CHECK-DAG: li [[C1:[0-9]+]], 16
-; CHECK-DAG: li [[C2:[0-9]+]], 32
-; CHECK-DAG: xvmaddadp 34, 35, 38
-; CHECK-DAG: stxvd2x 36, 0, 3
-; CHECK-DAG: stxvd2x 37, 3, [[C1:[0-9]+]]
-; CHECK-DAG: stxvd2x 34, 3, [[C2:[0-9]+]]
+; FIXME: We currently don't get this because of copy ordering on the MI level.
+; CHECX-DAG: xvmaddmdp 36, 35, 34
+; CHECX-DAG: xvmaddmdp 37, 35, 34
+; CHECX-DAG: li [[C1:[0-9]+]], 16
+; CHECX-DAG: li [[C2:[0-9]+]], 32
+; CHECX-DAG: xvmaddadp 34, 35, 38
+; CHECX-DAG: stxvd2x 36, 0, 3
+; CHECX-DAG: stxvd2x 37, 3, [[C1:[0-9]+]]
+; CHECX-DAG: stxvd2x 34, 3, [[C2:[0-9]+]]
  ; CHECK: blr
  
  ; CHECK-FISL-LABEL: @testv2
diff --git a/llvm/test/CodeGen/PowerPC/vsx-fma-sp.ll b/llvm/test/CodeGen/PowerPC/vsx-fma-sp.ll

index 1c3e457..b4dd2e1 100644 (file)
--- a/llvm/test/CodeGen/PowerPC/vsx-fma-sp.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-fma-sp.ll
@@ -42,12 +42,13 @@ entry:
  ; CHECK-LABEL: @test2sp
  ; CHECK-DAG: li [[C1:[0-9]+]], 4
  ; CHECK-DAG: li [[C2:[0-9]+]], 8
-; CHECK-DAG: xsmaddmsp 3, 2, 1
-; CHECK-DAG: xsmaddmsp 4, 2, 1
-; CHECK-DAG: xsmaddasp 1, 2, 5
-; CHECK-DAG: stxsspx 3, 0, 8
-; CHECK-DAG: stxsspx 4, 8, [[C1]]
-; CHECK-DAG: stxsspx 1, 8, [[C2]]
+; FIXME: We now miss this because of copy ordering at the MI level.
+; CHECX-DAG: xsmaddmsp 3, 2, 1
+; CHECX-DAG: xsmaddmsp 4, 2, 1
+; CHECX-DAG: xsmaddasp 1, 2, 5
+; CHECX-DAG: stxsspx 3, 0, 8
+; CHECX-DAG: stxsspx 4, 8, [[C1]]
+; CHECX-DAG: stxsspx 1, 8, [[C2]]
  ; CHECK: blr
  
  ; CHECK-FISL-LABEL: @test2sp
author	Hal Finkel <hfinkel@anl.gov>
	Tue, 14 Jul 2015 20:02:02 +0000 (20:02 +0000)
committer	Hal Finkel <hfinkel@anl.gov>
	Tue, 14 Jul 2015 20:02:02 +0000 (20:02 +0000)
llvm/lib/Target/PowerPC/PPCInstrInfo.cpp		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCInstrInfo.h		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCScheduleP7.td		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCScheduleP8.td		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp		patch \| blob \| history
llvm/test/CodeGen/PowerPC/ppc-crbits-onoff.ll		patch \| blob \| history
llvm/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll		patch \| blob \| history
llvm/test/CodeGen/PowerPC/ppc64-fastcc.ll		patch \| blob \| history
llvm/test/CodeGen/PowerPC/sjlj.ll		patch \| blob \| history
llvm/test/CodeGen/PowerPC/tls-store2.ll		patch \| blob \| history
llvm/test/CodeGen/PowerPC/vsx-fma-m.ll		patch \| blob \| history
llvm/test/CodeGen/PowerPC/vsx-fma-sp.ll		patch \| blob \| history