From 0c122d5a41cd038b8e29a0d8d671257938da1633 Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Thu, 19 Jul 2018 19:34:18 +0000 Subject: [PATCH] [Power9] Code Cleanup - Remove needsAggressiveScheduling() As we already return true from needsAggressiveScheduling() for the most recent hardware it would be cleaner to just return true for all PowerPC hardware. Differential Revision: https://reviews.llvm.org/D48663 llvm-svn: 337488 --- llvm/lib/Target/PowerPC/PPCSubtarget.cpp | 35 +-- llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll | 252 ++++++++++----------- .../CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll | 2 +- .../test/CodeGen/PowerPC/MergeConsecutiveStores.ll | 16 +- llvm/test/CodeGen/PowerPC/coalesce-ext.ll | 4 +- llvm/test/CodeGen/PowerPC/i1-to-double.ll | 10 +- llvm/test/CodeGen/PowerPC/lsa.ll | 4 +- llvm/test/CodeGen/PowerPC/ppc32-vacopy.ll | 13 +- llvm/test/CodeGen/PowerPC/ppc64-gep-opt.ll | 29 --- llvm/test/CodeGen/PowerPC/save-cr-ppc32svr4.ll | 11 +- llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll | 11 +- llvm/test/CodeGen/PowerPC/tls.ll | 2 +- llvm/test/CodeGen/PowerPC/unal-altivec.ll | 9 +- 13 files changed, 172 insertions(+), 226 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 1b5f43c..c0cbfd7 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -175,27 +175,8 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const { return false; } -// Embedded cores need aggressive scheduling (and some others also benefit). -static bool needsAggressiveScheduling(unsigned Directive) { - switch (Directive) { - default: return false; - case PPC::DIR_440: - case PPC::DIR_A2: - case PPC::DIR_E500mc: - case PPC::DIR_E5500: - case PPC::DIR_PWR7: - case PPC::DIR_PWR8: - // FIXME: Same as P8 until POWER9 scheduling info is available - case PPC::DIR_PWR9: - return true; - } -} - bool PPCSubtarget::enableMachineScheduler() const { - // Enable MI scheduling for the embedded cores. - // FIXME: Enable this for all cores (some additional modeling - // may be necessary). - return needsAggressiveScheduling(DarwinDirective); + return true; } // This overrides the PostRAScheduler bit in the SchedModel for each CPU. @@ -213,19 +194,19 @@ void PPCSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const { void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const { - if (needsAggressiveScheduling(DarwinDirective)) { - Policy.OnlyTopDown = false; - Policy.OnlyBottomUp = false; - } - + // The GenericScheduler that we use defaults to scheduling bottom up only. + // We want to schedule from both the top and the bottom and so we set + // OnlyBottomUp to false. + // We want to do bi-directional scheduling since it provides a more balanced + // schedule leading to better performance. + Policy.OnlyBottomUp = false; // Spilling is generally expensive on all PPC cores, so always enable // register-pressure tracking. Policy.ShouldTrackPressure = true; } bool PPCSubtarget::useAA() const { - // Use AA during code generation for the embedded cores. - return needsAggressiveScheduling(DarwinDirective); + return true; } bool PPCSubtarget::enableSubRegLiveness() const { diff --git a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll index 442580c..4343812 100644 --- a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll +++ b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll @@ -8,54 +8,54 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: stw 0, 4(1) ; CHECK-NEXT: stwu 1, -464(1) ; CHECK-NEXT: mfcr 12 -; CHECK-NEXT: lis 3, .LCPI0_0@ha ; CHECK-NEXT: stw 29, 412(1) # 4-byte Folded Spill ; CHECK-NEXT: stw 30, 416(1) # 4-byte Folded Spill +; CHECK-NEXT: lis 3, .LCPI0_0@ha ; CHECK-NEXT: stw 12, 408(1) ; CHECK-NEXT: stfd 27, 424(1) # 8-byte Folded Spill ; CHECK-NEXT: stfd 28, 432(1) # 8-byte Folded Spill ; CHECK-NEXT: stfd 29, 440(1) # 8-byte Folded Spill ; CHECK-NEXT: stfd 30, 448(1) # 8-byte Folded Spill ; CHECK-NEXT: stfd 31, 456(1) # 8-byte Folded Spill -; CHECK-NEXT: lfs 27, .LCPI0_0@l(3) ; CHECK-NEXT: stfd 2, 376(1) +; CHECK-NEXT: lfs 27, .LCPI0_0@l(3) ; CHECK-NEXT: stfd 1, 384(1) +; CHECK-NEXT: lwz 4, 380(1) ; CHECK-NEXT: fcmpu 0, 2, 27 -; CHECK-NEXT: lwz 3, 380(1) -; CHECK-NEXT: lwz 4, 376(1) -; CHECK-NEXT: lwz 5, 388(1) -; CHECK-NEXT: lwz 6, 384(1) ; CHECK-NEXT: fcmpu 1, 1, 27 ; CHECK-NEXT: crand 20, 6, 0 +; CHECK-NEXT: stw 4, 396(1) ; CHECK-NEXT: cror 20, 4, 20 -; CHECK-NEXT: stw 3, 396(1) +; CHECK-NEXT: lwz 4, 376(1) ; CHECK-NEXT: stw 4, 392(1) -; CHECK-NEXT: stw 5, 404(1) -; CHECK-NEXT: stw 6, 400(1) +; CHECK-NEXT: lwz 4, 388(1) +; CHECK-NEXT: stw 4, 404(1) +; CHECK-NEXT: lwz 3, 384(1) +; CHECK-NEXT: stw 3, 400(1) ; CHECK-NEXT: bc 4, 20, .LBB0_2 ; CHECK-NEXT: # %bb.1: # %bb5 ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: li 4, 0 ; CHECK-NEXT: b .LBB0_16 ; CHECK-NEXT: .LBB0_2: # %bb1 -; CHECK-NEXT: lfd 0, 392(1) -; CHECK-NEXT: lfd 1, 400(1) +; CHECK-NEXT: lfd 0, 400(1) +; CHECK-NEXT: lfd 1, 392(1) ; CHECK-NEXT: li 29, 0 ; CHECK-NEXT: lis 3, 15856 -; CHECK-NEXT: stfd 1, 304(1) -; CHECK-NEXT: stfd 0, 296(1) -; CHECK-NEXT: lwz 4, 308(1) -; CHECK-NEXT: lwz 5, 304(1) -; CHECK-NEXT: lwz 6, 300(1) -; CHECK-NEXT: lwz 7, 296(1) +; CHECK-NEXT: stfd 0, 304(1) +; CHECK-NEXT: stfd 1, 296(1) ; CHECK-NEXT: stw 29, 340(1) ; CHECK-NEXT: stw 3, 336(1) ; CHECK-NEXT: stw 29, 332(1) ; CHECK-NEXT: stw 29, 328(1) -; CHECK-NEXT: stw 4, 324(1) -; CHECK-NEXT: stw 5, 320(1) -; CHECK-NEXT: stw 6, 316(1) -; CHECK-NEXT: stw 7, 312(1) +; CHECK-NEXT: lwz 3, 308(1) +; CHECK-NEXT: stw 3, 324(1) +; CHECK-NEXT: lwz 3, 304(1) +; CHECK-NEXT: stw 3, 320(1) +; CHECK-NEXT: lwz 3, 300(1) +; CHECK-NEXT: stw 3, 316(1) +; CHECK-NEXT: lwz 3, 296(1) +; CHECK-NEXT: stw 3, 312(1) ; CHECK-NEXT: lfd 31, 320(1) ; CHECK-NEXT: lfd 30, 312(1) ; CHECK-NEXT: lfd 3, 336(1) @@ -63,52 +63,52 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: fmr 1, 31 ; CHECK-NEXT: fmr 2, 30 ; CHECK-NEXT: bl __gcc_qmul@PLT -; CHECK-NEXT: stfd 1, 280(1) -; CHECK-NEXT: lis 3, .LCPI0_1@ha -; CHECK-NEXT: stfd 2, 288(1) -; CHECK-NEXT: lfs 0, .LCPI0_1@l(3) ; CHECK-NEXT: lis 3, 16864 -; CHECK-NEXT: lwz 4, 284(1) -; CHECK-NEXT: lwz 5, 280(1) -; CHECK-NEXT: lwz 6, 292(1) -; CHECK-NEXT: lwz 7, 288(1) +; CHECK-NEXT: stfd 1, 280(1) ; CHECK-NEXT: fmr 29, 1 +; CHECK-NEXT: stfd 2, 288(1) +; CHECK-NEXT: fmr 28, 2 ; CHECK-NEXT: stw 29, 372(1) ; CHECK-NEXT: stw 3, 368(1) -; CHECK-NEXT: fmr 28, 2 ; CHECK-NEXT: stw 29, 364(1) ; CHECK-NEXT: stw 29, 360(1) -; CHECK-NEXT: fcmpu 0, 2, 27 -; CHECK-NEXT: stw 4, 356(1) -; CHECK-NEXT: stw 5, 352(1) -; CHECK-NEXT: fcmpu 1, 1, 0 -; CHECK-NEXT: stw 6, 348(1) -; CHECK-NEXT: stw 7, 344(1) -; CHECK-NEXT: crandc 20, 6, 0 +; CHECK-NEXT: lwz 3, 284(1) +; CHECK-NEXT: stw 3, 356(1) +; CHECK-NEXT: lwz 3, 280(1) +; CHECK-NEXT: stw 3, 352(1) +; CHECK-NEXT: lwz 3, 292(1) +; CHECK-NEXT: stw 3, 348(1) +; CHECK-NEXT: lwz 3, 288(1) +; CHECK-NEXT: stw 3, 344(1) ; CHECK-NEXT: lfd 3, 368(1) ; CHECK-NEXT: lfd 4, 360(1) ; CHECK-NEXT: lfd 1, 352(1) ; CHECK-NEXT: lfd 2, 344(1) -; CHECK-NEXT: cror 8, 5, 20 ; CHECK-NEXT: bl __gcc_qsub@PLT ; CHECK-NEXT: mffs 0 ; CHECK-NEXT: mtfsb1 31 +; CHECK-NEXT: lis 3, .LCPI0_1@ha +; CHECK-NEXT: fcmpu 0, 28, 27 ; CHECK-NEXT: mtfsb0 30 ; CHECK-NEXT: fadd 1, 2, 1 ; CHECK-NEXT: mtfsf 1, 0 -; CHECK-NEXT: mffs 0 -; CHECK-NEXT: mtfsb1 31 -; CHECK-NEXT: mtfsb0 30 -; CHECK-NEXT: fadd 2, 28, 29 -; CHECK-NEXT: mtfsf 1, 0 ; CHECK-NEXT: fctiwz 0, 1 -; CHECK-NEXT: fctiwz 1, 2 +; CHECK-NEXT: mffs 1 ; CHECK-NEXT: stfd 0, 160(1) -; CHECK-NEXT: stfd 1, 152(1) +; CHECK-NEXT: mtfsb1 31 +; CHECK-NEXT: mtfsb0 30 +; CHECK-NEXT: fadd 0, 28, 29 +; CHECK-NEXT: mtfsf 1, 1 +; CHECK-NEXT: lfs 1, .LCPI0_1@l(3) +; CHECK-NEXT: fctiwz 0, 0 +; CHECK-NEXT: stfd 0, 152(1) +; CHECK-NEXT: fcmpu 1, 29, 1 ; CHECK-NEXT: lwz 3, 164(1) ; CHECK-NEXT: lwz 4, 156(1) +; CHECK-NEXT: crandc 20, 6, 0 +; CHECK-NEXT: cror 20, 5, 20 ; CHECK-NEXT: addis 3, 3, -32768 -; CHECK-NEXT: bc 12, 8, .LBB0_4 +; CHECK-NEXT: bc 12, 20, .LBB0_4 ; CHECK-NEXT: # %bb.3: # %bb1 ; CHECK-NEXT: ori 30, 4, 0 ; CHECK-NEXT: b .LBB0_5 @@ -118,24 +118,24 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: li 4, 0 ; CHECK-NEXT: mr 3, 30 ; CHECK-NEXT: bl __floatditf@PLT -; CHECK-NEXT: stfd 1, 208(1) ; CHECK-NEXT: lis 3, 17392 +; CHECK-NEXT: stfd 1, 208(1) +; CHECK-NEXT: fmr 29, 1 ; CHECK-NEXT: stfd 2, 200(1) -; CHECK-NEXT: fmr 28, 1 -; CHECK-NEXT: lwz 4, 212(1) -; CHECK-NEXT: lwz 5, 208(1) -; CHECK-NEXT: lwz 6, 204(1) -; CHECK-NEXT: lwz 7, 200(1) -; CHECK-NEXT: fmr 29, 2 +; CHECK-NEXT: fmr 28, 2 ; CHECK-NEXT: stw 29, 244(1) ; CHECK-NEXT: stw 3, 240(1) ; CHECK-NEXT: cmpwi 2, 30, 0 ; CHECK-NEXT: stw 29, 236(1) ; CHECK-NEXT: stw 29, 232(1) -; CHECK-NEXT: stw 4, 228(1) -; CHECK-NEXT: stw 5, 224(1) -; CHECK-NEXT: stw 6, 220(1) -; CHECK-NEXT: stw 7, 216(1) +; CHECK-NEXT: lwz 3, 212(1) +; CHECK-NEXT: stw 3, 228(1) +; CHECK-NEXT: lwz 3, 208(1) +; CHECK-NEXT: stw 3, 224(1) +; CHECK-NEXT: lwz 3, 204(1) +; CHECK-NEXT: stw 3, 220(1) +; CHECK-NEXT: lwz 3, 200(1) +; CHECK-NEXT: stw 3, 216(1) ; CHECK-NEXT: lfd 3, 240(1) ; CHECK-NEXT: lfd 4, 232(1) ; CHECK-NEXT: lfd 1, 224(1) @@ -143,24 +143,24 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: bl __gcc_qadd@PLT ; CHECK-NEXT: blt 2, .LBB0_7 ; CHECK-NEXT: # %bb.6: # %bb1 -; CHECK-NEXT: fmr 1, 28 +; CHECK-NEXT: fmr 2, 28 ; CHECK-NEXT: .LBB0_7: # %bb1 -; CHECK-NEXT: stfd 1, 184(1) ; CHECK-NEXT: blt 2, .LBB0_9 ; CHECK-NEXT: # %bb.8: # %bb1 -; CHECK-NEXT: fmr 2, 29 +; CHECK-NEXT: fmr 1, 29 ; CHECK-NEXT: .LBB0_9: # %bb1 -; CHECK-NEXT: stfd 2, 192(1) +; CHECK-NEXT: stfd 1, 184(1) ; CHECK-NEXT: fmr 1, 31 -; CHECK-NEXT: lwz 3, 188(1) -; CHECK-NEXT: lwz 4, 184(1) -; CHECK-NEXT: lwz 5, 196(1) -; CHECK-NEXT: lwz 6, 192(1) +; CHECK-NEXT: stfd 2, 192(1) ; CHECK-NEXT: fmr 2, 30 +; CHECK-NEXT: lwz 3, 188(1) ; CHECK-NEXT: stw 3, 260(1) -; CHECK-NEXT: stw 4, 256(1) -; CHECK-NEXT: stw 5, 252(1) -; CHECK-NEXT: stw 6, 248(1) +; CHECK-NEXT: lwz 3, 184(1) +; CHECK-NEXT: stw 3, 256(1) +; CHECK-NEXT: lwz 3, 196(1) +; CHECK-NEXT: stw 3, 252(1) +; CHECK-NEXT: lwz 3, 192(1) +; CHECK-NEXT: stw 3, 248(1) ; CHECK-NEXT: lfd 3, 256(1) ; CHECK-NEXT: lfd 4, 248(1) ; CHECK-NEXT: bl __gcc_qsub@PLT @@ -169,67 +169,67 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: stfd 1, 168(1) ; CHECK-NEXT: fcmpu 1, 1, 27 ; CHECK-NEXT: lwz 3, 180(1) -; CHECK-NEXT: lwz 4, 176(1) -; CHECK-NEXT: lwz 5, 172(1) -; CHECK-NEXT: lwz 6, 168(1) ; CHECK-NEXT: crandc 20, 6, 0 -; CHECK-NEXT: stw 3, 268(1) -; CHECK-NEXT: stw 4, 264(1) ; CHECK-NEXT: cror 21, 5, 7 -; CHECK-NEXT: stw 5, 276(1) -; CHECK-NEXT: stw 6, 272(1) ; CHECK-NEXT: cror 20, 21, 20 +; CHECK-NEXT: stw 3, 268(1) +; CHECK-NEXT: lwz 3, 176(1) +; CHECK-NEXT: stw 3, 264(1) +; CHECK-NEXT: lwz 3, 172(1) +; CHECK-NEXT: stw 3, 276(1) +; CHECK-NEXT: lwz 3, 168(1) +; CHECK-NEXT: stw 3, 272(1) ; CHECK-NEXT: lfd 30, 264(1) ; CHECK-NEXT: lfd 31, 272(1) ; CHECK-NEXT: bc 12, 20, .LBB0_13 ; CHECK-NEXT: # %bb.10: # %bb2 -; CHECK-NEXT: fneg 29, 31 -; CHECK-NEXT: fneg 28, 30 -; CHECK-NEXT: stfd 29, 48(1) +; CHECK-NEXT: fneg 29, 30 +; CHECK-NEXT: fneg 28, 31 ; CHECK-NEXT: li 29, 0 -; CHECK-NEXT: stfd 28, 40(1) ; CHECK-NEXT: lis 3, 16864 -; CHECK-NEXT: lwz 4, 52(1) -; CHECK-NEXT: lwz 5, 48(1) -; CHECK-NEXT: lwz 6, 44(1) -; CHECK-NEXT: lwz 7, 40(1) +; CHECK-NEXT: stfd 28, 48(1) +; CHECK-NEXT: stfd 29, 40(1) ; CHECK-NEXT: stw 29, 84(1) ; CHECK-NEXT: stw 3, 80(1) ; CHECK-NEXT: stw 29, 76(1) ; CHECK-NEXT: stw 29, 72(1) -; CHECK-NEXT: stw 4, 68(1) -; CHECK-NEXT: stw 5, 64(1) -; CHECK-NEXT: stw 6, 60(1) -; CHECK-NEXT: stw 7, 56(1) +; CHECK-NEXT: lwz 3, 52(1) +; CHECK-NEXT: stw 3, 68(1) +; CHECK-NEXT: lwz 3, 48(1) +; CHECK-NEXT: stw 3, 64(1) +; CHECK-NEXT: lwz 3, 44(1) +; CHECK-NEXT: stw 3, 60(1) +; CHECK-NEXT: lwz 3, 40(1) +; CHECK-NEXT: stw 3, 56(1) ; CHECK-NEXT: lfd 3, 80(1) ; CHECK-NEXT: lfd 4, 72(1) ; CHECK-NEXT: lfd 1, 64(1) ; CHECK-NEXT: lfd 2, 56(1) ; CHECK-NEXT: bl __gcc_qsub@PLT -; CHECK-NEXT: lis 3, .LCPI0_2@ha -; CHECK-NEXT: lfs 0, .LCPI0_2@l(3) -; CHECK-NEXT: lis 4, .LCPI0_3@ha -; CHECK-NEXT: lfs 3, .LCPI0_3@l(4) -; CHECK-NEXT: fcmpu 0, 30, 0 ; CHECK-NEXT: mffs 0 ; CHECK-NEXT: mtfsb1 31 -; CHECK-NEXT: fcmpu 1, 31, 3 -; CHECK-NEXT: crandc 20, 6, 1 +; CHECK-NEXT: lis 3, .LCPI0_2@ha ; CHECK-NEXT: mtfsb0 30 -; CHECK-NEXT: cror 20, 4, 20 ; CHECK-NEXT: fadd 1, 2, 1 ; CHECK-NEXT: mtfsf 1, 0 -; CHECK-NEXT: mffs 0 -; CHECK-NEXT: mtfsb1 31 -; CHECK-NEXT: mtfsb0 30 -; CHECK-NEXT: fadd 2, 28, 29 -; CHECK-NEXT: mtfsf 1, 0 ; CHECK-NEXT: fctiwz 0, 1 -; CHECK-NEXT: fctiwz 1, 2 +; CHECK-NEXT: mffs 1 ; CHECK-NEXT: stfd 0, 32(1) -; CHECK-NEXT: stfd 1, 24(1) +; CHECK-NEXT: mtfsb1 31 +; CHECK-NEXT: lfs 0, .LCPI0_2@l(3) +; CHECK-NEXT: lis 3, .LCPI0_3@ha +; CHECK-NEXT: mtfsb0 30 +; CHECK-NEXT: fadd 2, 29, 28 +; CHECK-NEXT: mtfsf 1, 1 +; CHECK-NEXT: lfs 1, .LCPI0_3@l(3) +; CHECK-NEXT: fcmpu 0, 30, 0 +; CHECK-NEXT: fctiwz 2, 2 +; CHECK-NEXT: stfd 2, 24(1) +; CHECK-NEXT: fcmpu 1, 31, 1 ; CHECK-NEXT: lwz 3, 36(1) ; CHECK-NEXT: lwz 4, 28(1) +; CHECK-NEXT: crandc 20, 6, 1 +; CHECK-NEXT: cror 20, 4, 20 ; CHECK-NEXT: addis 3, 3, -32768 ; CHECK-NEXT: bc 12, 20, .LBB0_12 ; CHECK-NEXT: # %bb.11: # %bb2 @@ -240,51 +240,51 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: subfe 3, 29, 30 ; CHECK-NEXT: b .LBB0_16 ; CHECK-NEXT: .LBB0_13: # %bb3 -; CHECK-NEXT: stfd 31, 112(1) ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: stfd 30, 104(1) ; CHECK-NEXT: lis 4, 16864 -; CHECK-NEXT: lwz 5, 116(1) -; CHECK-NEXT: lwz 6, 112(1) -; CHECK-NEXT: lwz 7, 108(1) -; CHECK-NEXT: lwz 8, 104(1) +; CHECK-NEXT: stfd 31, 112(1) +; CHECK-NEXT: stfd 30, 104(1) ; CHECK-NEXT: stw 3, 148(1) ; CHECK-NEXT: stw 4, 144(1) ; CHECK-NEXT: stw 3, 140(1) ; CHECK-NEXT: stw 3, 136(1) -; CHECK-NEXT: stw 5, 132(1) -; CHECK-NEXT: stw 6, 128(1) -; CHECK-NEXT: stw 7, 124(1) -; CHECK-NEXT: stw 8, 120(1) +; CHECK-NEXT: lwz 3, 116(1) +; CHECK-NEXT: stw 3, 132(1) +; CHECK-NEXT: lwz 3, 112(1) +; CHECK-NEXT: stw 3, 128(1) +; CHECK-NEXT: lwz 3, 108(1) +; CHECK-NEXT: stw 3, 124(1) +; CHECK-NEXT: lwz 3, 104(1) +; CHECK-NEXT: stw 3, 120(1) ; CHECK-NEXT: lfd 3, 144(1) ; CHECK-NEXT: lfd 4, 136(1) ; CHECK-NEXT: lfd 1, 128(1) ; CHECK-NEXT: lfd 2, 120(1) ; CHECK-NEXT: bl __gcc_qsub@PLT -; CHECK-NEXT: lis 3, .LCPI0_0@ha -; CHECK-NEXT: lfs 0, .LCPI0_0@l(3) -; CHECK-NEXT: lis 4, .LCPI0_1@ha -; CHECK-NEXT: lfs 3, .LCPI0_1@l(4) -; CHECK-NEXT: fcmpu 0, 30, 0 ; CHECK-NEXT: mffs 0 ; CHECK-NEXT: mtfsb1 31 -; CHECK-NEXT: fcmpu 1, 31, 3 -; CHECK-NEXT: crandc 20, 6, 0 +; CHECK-NEXT: lis 3, .LCPI0_0@ha ; CHECK-NEXT: mtfsb0 30 -; CHECK-NEXT: cror 20, 5, 20 ; CHECK-NEXT: fadd 1, 2, 1 ; CHECK-NEXT: mtfsf 1, 0 -; CHECK-NEXT: mffs 0 +; CHECK-NEXT: fctiwz 0, 1 +; CHECK-NEXT: mffs 1 +; CHECK-NEXT: stfd 0, 96(1) ; CHECK-NEXT: mtfsb1 31 +; CHECK-NEXT: lfs 0, .LCPI0_0@l(3) +; CHECK-NEXT: lis 3, .LCPI0_1@ha ; CHECK-NEXT: mtfsb0 30 ; CHECK-NEXT: fadd 2, 30, 31 -; CHECK-NEXT: mtfsf 1, 0 -; CHECK-NEXT: fctiwz 0, 1 -; CHECK-NEXT: fctiwz 1, 2 -; CHECK-NEXT: stfd 0, 96(1) -; CHECK-NEXT: stfd 1, 88(1) +; CHECK-NEXT: mtfsf 1, 1 +; CHECK-NEXT: lfs 1, .LCPI0_1@l(3) +; CHECK-NEXT: fcmpu 0, 30, 0 +; CHECK-NEXT: fctiwz 2, 2 +; CHECK-NEXT: stfd 2, 88(1) +; CHECK-NEXT: fcmpu 1, 31, 1 ; CHECK-NEXT: lwz 3, 100(1) ; CHECK-NEXT: lwz 4, 92(1) +; CHECK-NEXT: crandc 20, 6, 0 +; CHECK-NEXT: cror 20, 5, 20 ; CHECK-NEXT: addis 3, 3, -32768 ; CHECK-NEXT: bc 12, 20, .LBB0_14 ; CHECK-NEXT: b .LBB0_15 diff --git a/llvm/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll b/llvm/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll index 14a89b0..1488aa6 100644 --- a/llvm/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll +++ b/llvm/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll @@ -50,7 +50,7 @@ for.body4.us: ; preds = %for.body4.lr.ph.us, %2 = load float, float* %arrayidx.us, align 4 %arrayidx7.us = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv %3 = load float, float* %arrayidx7.us, align 4 - %add8.us = fadd float %3, %2 + %add8.us = tail call float asm "fadd $0, $1, $2", "=f,f,f,~{cr2}"(float %3, float %2) store float %add8.us, float* %arrayidx7.us, align 4 %indvars.iv.next = add i64 %indvars.iv, %1 %4 = trunc i64 %indvars.iv.next to i32 diff --git a/llvm/test/CodeGen/PowerPC/MergeConsecutiveStores.ll b/llvm/test/CodeGen/PowerPC/MergeConsecutiveStores.ll index c4f3427..4e4b3d4 100644 --- a/llvm/test/CodeGen/PowerPC/MergeConsecutiveStores.ll +++ b/llvm/test/CodeGen/PowerPC/MergeConsecutiveStores.ll @@ -23,14 +23,14 @@ ;; worthwhile. ;; CHECK-LABEL: f: -;; CHECK: lwzu -;; CHECK: stwu -;; CHECK-NEXT: lwz -;; CHECK-NEXT: lwz -;; CHECK-NEXT: lwz -;; CHECK-NEXT: stw -;; CHECK-NEXT: stw -;; CHECK-NEXT: stw +;; CHECK-DAG: lwzu +;; CHECK-DAG: stwu +;; CHECK-DAG: lwz +;; CHECK-DAG: lwz +;; CHECK-DAG: lwz +;; CHECK-DAG: stw +;; CHECK-DAG: stw +;; CHECK-DAG: stw ;; CHECK-NEXT: blr define void @f() { entry: diff --git a/llvm/test/CodeGen/PowerPC/coalesce-ext.ll b/llvm/test/CodeGen/PowerPC/coalesce-ext.ll index 253df9d..c3a1673 100644 --- a/llvm/test/CodeGen/PowerPC/coalesce-ext.ll +++ b/llvm/test/CodeGen/PowerPC/coalesce-ext.ll @@ -9,10 +9,10 @@ define i32 @test1sext(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind { %F = ashr i64 %E, 32 ; CHECK: extsw [[EXT:r[0-9]+]], [[SUM]] store volatile i64 %F, i64 *%P2 - ; CHECK: std [[EXT]] + ; CHECK-DAG: std [[EXT]] store volatile i32 %D, i32* %P ; Reuse low bits of extended register, don't extend live range of SUM. - ; CHECK: stw [[EXT]] + ; CHECK-DAG: stw [[SUM]] %R = add i32 %D, %D ret i32 %R } diff --git a/llvm/test/CodeGen/PowerPC/i1-to-double.ll b/llvm/test/CodeGen/PowerPC/i1-to-double.ll index 43d7c18..88d6a03 100644 --- a/llvm/test/CodeGen/PowerPC/i1-to-double.ll +++ b/llvm/test/CodeGen/PowerPC/i1-to-double.ll @@ -5,15 +5,13 @@ define double @test(i1 %X) { } ; CHECK-LABEL: @test - -; CHECK: addis 4, 4, .LCPI -; CHECK-NEXT: addis 5, 5, .LCPI -; CHECK: andi. {{[0-9]+}}, 3, 1 +; CHECK-DAG: addis 3, 4, .LCPI +; CHECK-DAG: addis 4, 4, .LCPI +; CHECK-DAG: andi. {{[0-9]+}}, 3, 1 ; CHECK-NEXT: bc 12, 1, [[TRUE:.LBB[0-9]+]] -; CHECK: ori 3, 4, 0 ; CHECK-NEXT: b [[SUCCESSOR:.LBB[0-9]+]] ; CHECK-NEXT: [[TRUE]] -; CHECK-NEXT: addi 3, 5, 0 +; CHECK-NEXT: addi 3, 4, 0 ; CHECK-NEXT: [[SUCCESSOR]] ; CHECK-NEXT: lfs 1, 0(3) ; CHECK-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/lsa.ll b/llvm/test/CodeGen/PowerPC/lsa.ll index d0ebd47..c2bfb21 100644 --- a/llvm/test/CodeGen/PowerPC/lsa.ll +++ b/llvm/test/CodeGen/PowerPC/lsa.ll @@ -23,8 +23,8 @@ entry: ; CHECK: @foo ; CHECK-NOT: lwzx -; CHECK: lwz {{[0-9]+}}, 4([[REG:[0-9]+]]) -; CHECK: lwz {{[0-9]+}}, 0([[REG]]) +; CHECK: lwz {{[0-9]+}}, 0([[REG:[0-9]+]]) +; CHECK: lwz {{[0-9]+}}, 4([[REG]]) ; CHECK: blr %add = add nsw i32 %4, %3 diff --git a/llvm/test/CodeGen/PowerPC/ppc32-vacopy.ll b/llvm/test/CodeGen/PowerPC/ppc32-vacopy.ll index 79c40e3..c68881c 100644 --- a/llvm/test/CodeGen/PowerPC/ppc32-vacopy.ll +++ b/llvm/test/CodeGen/PowerPC/ppc32-vacopy.ll @@ -16,9 +16,10 @@ entry: ret void } ; CHECK: test_vacopy: -; CHECK: lwz [[REG1:[0-9]+]], {{.*}} -; CHECK: lwz [[REG2:[0-9]+]], {{.*}} -; CHECK: lwz [[REG3:[0-9]+]], {{.*}} -; CHECK: stw [[REG1]], {{.*}} -; CHECK: stw [[REG2]], {{.*}} -; CHECK: stw [[REG3]], {{.*}} +; CHECK-DAG: lwz [[REG1:[0-9]+]], {{.*}} +; CHECK-DAG: lwz [[REG2:[0-9]+]], {{.*}} +; CHECK-DAG: lwz [[REG3:[0-9]+]], {{.*}} +; CHECK-DAG: stw [[REG1]], {{.*}} +; CHECK-DAG: stw [[REG2]], {{.*}} +; CHECK-DAG: stw [[REG3]], {{.*}} +; CHECK: blr diff --git a/llvm/test/CodeGen/PowerPC/ppc64-gep-opt.ll b/llvm/test/CodeGen/PowerPC/ppc64-gep-opt.ll index d1ae1bc..ec2bebb 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-gep-opt.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-gep-opt.ll @@ -1,5 +1,4 @@ ; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr7 < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -O3 -print-after=codegenprepare -mcpu=ppc64 < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-NoAA <%t %s ; RUN: llc -verify-machineinstrs -O3 -print-after=codegenprepare -mcpu=pwr7 < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-UseAA <%t %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -29,18 +28,6 @@ if.end: ; preds = %if.then, %entry ret void } -; CHECK-NoAA-LABEL: @test_GEP_CSE( -; CHECK-NoAA: [[PTR0:%[a-zA-Z0-9]+]] = ptrtoint [240 x %struct]* %string to i64 -; CHECK-NoAA: [[PTR1:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96 -; CHECK-NoAA: [[PTR2:%[a-zA-Z0-9]+]] = add i64 [[PTR0]], [[PTR1]] -; CHECK-NoAA: add i64 [[PTR2]], 23052 -; CHECK-NoAA: inttoptr -; CHECK-NoAA: if.then: -; CHECK-NoAA-NOT: ptrtoint -; CHECK-NoAA-NOT: mul -; CHECK-NoAA: add i64 [[PTR2]], 23048 -; CHECK-NoAA: inttoptr - ; CHECK-UseAA-LABEL: @test_GEP_CSE( ; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = bitcast [240 x %struct]* %string to i8* ; CHECK-UseAA: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96 @@ -80,14 +67,6 @@ exit: ; CHECK-NOT: lwzu ; CHECK: blr -; CHECK-NoAA-LABEL: test_GEP_across_BB( -; CHECK-NoAA: add i64 [[TMP:%[a-zA-Z0-9]+]], 528 -; CHECK-NoAA: add i64 [[TMP]], 532 -; CHECK-NoAA: if.true: -; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8, i8* {{.*}}, i64 532 -; CHECK-NoAA: exit: -; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8, i8* {{.*}}, i64 528 - ; CHECK-UseAA-LABEL: test_GEP_across_BB( ; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = getelementptr ; CHECK-UseAA: getelementptr i8, i8* [[PTR0]], i64 528 @@ -112,10 +91,6 @@ entry: %p = getelementptr [1024 x %struct.S], [1024 x %struct.S]* @struct_array, i64 0, i64 %idxprom, i32 1 ret double* %p } -; CHECK-NoAA-LABEL: @test-struct_1( -; CHECK-NoAA-NOT: getelementptr -; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, 88 - ; CHECK-UseAA-LABEL: @test-struct_1( ; CHECK-UseAA: getelementptr i8, i8* %{{[a-zA-Z0-9]+}}, i64 88 @@ -134,10 +109,6 @@ entry: %ptr2 = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1 ret %struct2* %ptr2 } -; CHECK-NoAA-LABEL: @test-struct_2( -; CHECK-NoAA-NOT: = getelementptr -; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, -40 - ; CHECK-UseAA-LABEL: @test-struct_2( ; CHECK-UseAA: getelementptr i8, i8* %{{[a-zA-Z0-9]+}}, i64 -40 diff --git a/llvm/test/CodeGen/PowerPC/save-cr-ppc32svr4.ll b/llvm/test/CodeGen/PowerPC/save-cr-ppc32svr4.ll index 0bd4501..a739a02 100644 --- a/llvm/test/CodeGen/PowerPC/save-cr-ppc32svr4.ll +++ b/llvm/test/CodeGen/PowerPC/save-cr-ppc32svr4.ll @@ -3,12 +3,12 @@ ; Make sure that the CR register is saved correctly on PPC32/SVR4. ; CHECK-LABEL: fred: -; CHECK: stwu 1, -32(1) -; CHECK: stw 31, 28(1) +; CHECK: stwu 1, -48(1) +; CHECK: stw 31, 36(1) ; CHECK: mr 31, 1 -; CHECK-DAG: stw 30, 24(1) +; CHECK-DAG: stw 30, 32(1) ; CHECK-DAG: mfcr [[CR:[0-9]+]] -; CHECK: stw [[CR]], 20(31) +; CHECK: stw [[CR]], 28(31) target datalayout = "E-m:e-p:32:32-i64:64-n32" target triple = "powerpc-unknown-freebsd" @@ -16,7 +16,8 @@ target triple = "powerpc-unknown-freebsd" ; Function Attrs: norecurse nounwind readnone sspstrong define i64 @fred(double %a0) local_unnamed_addr #0 { b1: - %v2 = fcmp olt double %a0, 0x43E0000000000000 + %a1 = tail call double asm "fadd $0, $1, $2", "=f,f,f,~{cr2}"(double %a0, double %a0) + %v2 = fcmp olt double %a1, 0x43E0000000000000 br i1 %v2, label %b3, label %b7 b3: ; preds = %b1 diff --git a/llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll b/llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll index a42ae66..930d53d 100644 --- a/llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll +++ b/llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll @@ -3,18 +3,18 @@ ; CHECK-LABEL: fred ; CHECK: stwux 1, 1, 0 ; Save R31..R29 via R0: -; CHECK: addic 0, 0, -4 +; CHECK: addic 0, 0, -12 ; CHECK: stwx 31, 0, 0 ; CHECK: addic 0, 0, -4 ; CHECK: stwx 30, 0, 0 ; CHECK: addic 0, 0, -4 ; CHECK: stwx 29, 0, 0 ; Set R29 back to the value of R0 from before the updates: -; CHECK: addic 29, 0, 12 +; CHECK: addic 29, 0, 20 ; Save CR through R12 using R29 as the stack pointer (aligned base pointer). ; CHECK: mfcr 12 -; CHECK: stw 28, -16(29) -; CHECK: stw 12, -20(29) +; CHECK: stw 28, -24(29) +; CHECK: stw 12, -28(29) target datalayout = "E-m:e-p:32:32-i64:64-n32" target triple = "powerpc-unknown-freebsd" @@ -24,7 +24,8 @@ define i64 @fred(double %a0) local_unnamed_addr #0 { b1: %v2 = alloca i64, align 128 store i64 0, i64* %v2 - %v3 = fcmp olt double %a0, 0x43E0000000000000 + %a1 = tail call double asm "fadd $0, $1, $2", "=f,f,f,~{cr2}"(double %a0, double %a0) + %v3 = fcmp olt double %a1, 0x43E0000000000000 br i1 %v3, label %b4, label %b8 b4: ; preds = %b1 diff --git a/llvm/test/CodeGen/PowerPC/tls.ll b/llvm/test/CodeGen/PowerPC/tls.ll index 72e731c..8410e98 100644 --- a/llvm/test/CodeGen/PowerPC/tls.ll +++ b/llvm/test/CodeGen/PowerPC/tls.ll @@ -16,7 +16,7 @@ entry: ;OPT0: stw [[REG2]], 0([[REG1]]) ;OPT1: addis [[REG1:[0-9]+]], 13, a@tprel@ha ;OPT1-NEXT: li [[REG2:[0-9]+]], 42 -;OPT1-NEXT: stw [[REG2]], a@tprel@l([[REG1]]) +;OPT1: stw [[REG2]], a@tprel@l([[REG1]]) store i32 42, i32* @a, align 4 ret i32 0 } diff --git a/llvm/test/CodeGen/PowerPC/unal-altivec.ll b/llvm/test/CodeGen/PowerPC/unal-altivec.ll index cdb1604..a804b35 100644 --- a/llvm/test/CodeGen/PowerPC/unal-altivec.ll +++ b/llvm/test/CodeGen/PowerPC/unal-altivec.ll @@ -30,21 +30,14 @@ vector.body: ; preds = %vector.body, %vecto ; CHECK: @foo ; CHECK-DAG: li [[C0:[0-9]+]], 0 -; CHECK-DAG: li [[C15:[0-9]+]], 15 ; CHECK-DAG: lvx [[CNST:[0-9]+]], ; CHECK: .LBB0_1: ; CHECK-DAG: lvsl [[MASK1:[0-9]+]], [[B1:[0-9]+]], [[C0]] -; CHECK-DAG: lvsl [[MASK2:[0-9]+]], [[B2:[0-9]+]], [[C0]] ; CHECK-DAG: add [[B3:[0-9]+]], [[B1]], [[C0]] -; CHECK-DAG: add [[B4:[0-9]+]], [[B2]], [[C0]] ; CHECK-DAG: lvx [[LD1:[0-9]+]], [[B1]], [[C0]] -; CHECK-DAG: lvx [[LD2:[0-9]+]], [[B3]], [[C15]] -; CHECK-DAG: lvx [[LD3:[0-9]+]], [[B2]], [[C0]] -; CHECK-DAG: lvx [[LD4:[0-9]+]], [[B4]], [[C15]] +; CHECK-DAG: lvx [[LD2:[0-9]+]], [[B3]], ; CHECK-DAG: vperm [[R1:[0-9]+]], [[LD1]], [[LD2]], [[MASK1]] -; CHECK-DAG: vperm [[R2:[0-9]+]], [[LD3]], [[LD4]], [[MASK2]] ; CHECK-DAG: vaddfp {{[0-9]+}}, [[R1]], [[CNST]] -; CHECK-DAG: vaddfp {{[0-9]+}}, [[R2]], [[CNST]] ; CHECK: blr for.end: ; preds = %vector.body -- 2.7.4