[PowerPC] Use 16-byte alignment for modern cores for functions/loops

author Hal Finkel <hfinkel@anl.gov>

Sat, 3 Jan 2015 14:58:25 +0000 (14:58 +0000)

committer Hal Finkel <hfinkel@anl.gov>

Sat, 3 Jan 2015 14:58:25 +0000 (14:58 +0000)
author Hal Finkel <hfinkel@anl.gov>
Sat, 3 Jan 2015 14:58:25 +0000 (14:58 +0000)
committer Hal Finkel <hfinkel@anl.gov>
Sat, 3 Jan 2015 14:58:25 +0000 (14:58 +0000)
diff --git a/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp

index 41594be..940d55a 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -70,12 +70,37 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
    Fn.RenumberBlocks();
    BlockSizes.resize(Fn.getNumBlockIDs());
  
+  auto GetAlignmentAdjustment =
+    [TII](MachineBasicBlock &MBB, unsigned Offset) -> unsigned {
+    unsigned Align = MBB.getAlignment();
+    if (!Align)
+      return 0;
+
+    unsigned AlignAmt = 1 << Align;
+    unsigned ParentAlign = MBB.getParent()->getAlignment();
+
+    if (Align <= ParentAlign)
+      return OffsetToAlignment(Offset, AlignAmt);
+
+    // The alignment of this MBB is larger than the function's alignment, so we
+    // can't tell whether or not it will insert nops. Assume that it will.
+    return AlignAmt + OffsetToAlignment(Offset, AlignAmt);
+  };
+
    // Measure each MBB and compute a size for the entire function.
    unsigned FuncSize = 0;
    for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
         ++MFI) {
      MachineBasicBlock *MBB = MFI;
  
+    // The end of the previous block may have extra nops if this block has an
+    // alignment requirement.
+    if (MBB->getNumber() > 0) {
+      unsigned AlignExtra = GetAlignmentAdjustment(*MBB, FuncSize);
+      BlockSizes[MBB->getNumber()-1] += AlignExtra;
+      FuncSize += AlignExtra;
+    }
+
      unsigned BlockSize = 0;
      for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
           MBBI != EE; ++MBBI)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

index 32f958e..8d8c322 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -679,6 +679,24 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
    if (Subtarget.isDarwin())
      setPrefFunctionAlignment(4);
  
+  switch (Subtarget.getDarwinDirective()) {
+  default: break;
+  case PPC::DIR_970:
+  case PPC::DIR_A2:
+  case PPC::DIR_E500mc:
+  case PPC::DIR_E5500:
+  case PPC::DIR_PWR4:
+  case PPC::DIR_PWR5:
+  case PPC::DIR_PWR5X:
+  case PPC::DIR_PWR6:
+  case PPC::DIR_PWR6X:
+  case PPC::DIR_PWR7:
+  case PPC::DIR_PWR8:
+    setPrefFunctionAlignment(4);
+    setPrefLoopAlignment(4);
+    break;
+  }
+
    setInsertFencesForAtomic(true);
  
    if (Subtarget.enableMachineScheduler())
@@ -688,8 +706,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
  
    computeRegisterProperties();
  
-  // The Freescale cores does better with aggressive inlining of memcpy and
-  // friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
+  // The Freescale cores do better with aggressive inlining of memcpy and
+  // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
    if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
        Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
      MaxStoresPerMemset = 32;
@@ -698,8 +716,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
      MaxStoresPerMemcpyOptSize = 8;
      MaxStoresPerMemmove = 32;
      MaxStoresPerMemmoveOptSize = 8;
-
-    setPrefFunctionAlignment(4);
    }
  }
  
diff --git a/llvm/test/CodeGen/PowerPC/code-align.ll b/llvm/test/CodeGen/PowerPC/code-align.ll

new file mode 100644 (file)

index 0000000..5550547
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/code-align.ll
@@ -0,0 +1,65 @@
+; RUN: llc -mcpu=ppc64 < %s | FileCheck %s -check-prefix=GENERIC
+; RUN: llc -mcpu=970 < %s | FileCheck %s -check-prefix=BASIC
+; RUN: llc -mcpu=a2 < %s | FileCheck %s -check-prefix=BASIC
+; RUN: llc -mcpu=e500mc < %s | FileCheck %s -check-prefix=BASIC
+; RUN: llc -mcpu=e5500 < %s | FileCheck %s -check-prefix=BASIC
+; RUN: llc -mcpu=pwr4 < %s | FileCheck %s -check-prefix=BASIC
+; RUN: llc -mcpu=pwr5 < %s | FileCheck %s -check-prefix=BASIC
+; RUN: llc -mcpu=pwr5x < %s | FileCheck %s -check-prefix=BASIC
+; RUN: llc -mcpu=pwr6 < %s | FileCheck %s -check-prefix=BASIC
+; RUN: llc -mcpu=pwr6x < %s | FileCheck %s -check-prefix=BASIC
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s -check-prefix=BASIC
+; RUN: llc -mcpu=pwr8 < %s | FileCheck %s -check-prefix=BASIC
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+define signext i32 @foo(i32 signext %x) #0 {
+entry:
+  %mul = shl nsw i32 %x, 1
+  ret i32 %mul
+
+; GENERIC-LABEL: .globl  foo
+; BASIC-LABEL: .globl  foo
+; GENERIC: .align  2
+; BASIC: .align  4
+; GENERIC: @foo
+; BASIC: @foo
+}
+
+; Function Attrs: nounwind
+define void @loop(i32 signext %x, i32* nocapture %a) #1 {
+entry:
+  br label %vector.body
+
+; GENERIC-LABEL: @loop
+; BASIC-LABEL: @loop
+; GENERIC: mtctr
+; BASIC: mtctr
+; GENERIC-NOT: .align
+; BASIC: .align  4
+; GENERIC: bdnz
+; BASIC: bdnz
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %induction45 = or i64 %index, 1
+  %0 = getelementptr inbounds i32* %a, i64 %index
+  %1 = getelementptr inbounds i32* %a, i64 %induction45
+  %2 = load i32* %0, align 4
+  %3 = load i32* %1, align 4
+  %4 = add nsw i32 %2, 4
+  %5 = add nsw i32 %3, 4
+  store i32 %4, i32* %0, align 4
+  store i32 %5, i32* %1, align 4
+  %index.next = add i64 %index, 2
+  %6 = icmp eq i64 %index.next, 2048
+  br i1 %6, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
author	Hal Finkel <hfinkel@anl.gov>
	Sat, 3 Jan 2015 14:58:25 +0000 (14:58 +0000)
committer	Hal Finkel <hfinkel@anl.gov>
	Sat, 3 Jan 2015 14:58:25 +0000 (14:58 +0000)
llvm/lib/Target/PowerPC/PPCBranchSelector.cpp		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/PowerPC/code-align.ll	[new file with mode: 0644]	patch \| blob