[GlobalMerge] Take into account minsize on Global users' parents.

author Ahmed Bougacha <ahmed.bougacha@gmail.com>

Thu, 4 Jun 2015 20:39:23 +0000 (20:39 +0000)

committer Ahmed Bougacha <ahmed.bougacha@gmail.com>

Thu, 4 Jun 2015 20:39:23 +0000 (20:39 +0000)
author Ahmed Bougacha <ahmed.bougacha@gmail.com>
Thu, 4 Jun 2015 20:39:23 +0000 (20:39 +0000)
committer Ahmed Bougacha <ahmed.bougacha@gmail.com>
Thu, 4 Jun 2015 20:39:23 +0000 (20:39 +0000)
diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h

index 293ceb1..481d4fc 100644 (file)
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -152,7 +152,14 @@ Pass *createLoopInterchangePass();
  //
  Pass *createLoopStrengthReducePass();
  
-Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset);
+//===----------------------------------------------------------------------===//
+//
+// GlobalMerge - This pass merges internal (by default) globals into structs
+// to enable reuse of a base pointer by indexed addressing modes.
+// It can also be configured to focus on size optimizations only.
+//
+Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset,
+                            bool OnlyOptimizeForSize = false);
  
  //===----------------------------------------------------------------------===//
  //
diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp

index 79de175..df54a9c 100644 (file)
--- a/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -124,6 +124,12 @@ namespace {
      // for more information.
      unsigned MaxOffset;
  
+    /// Whether we should try to optimize for size only.
+    /// Currently, this applies a dead simple heuristic: only consider globals
+    /// used in minsize functions for merging.
+    /// FIXME: This could learn about optsize, and be used in the cost model.
+    bool OnlyOptimizeForSize;
+
      bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
                   Module &M, bool isConst, unsigned AddrSpace) const;
      /// \brief Merge everything in \p Globals for which the corresponding bit
@@ -152,9 +158,10 @@ namespace {
    public:
      static char ID;             // Pass identification, replacement for typeid.
      explicit GlobalMerge(const TargetMachine *TM = nullptr,
-                         unsigned MaximalOffset = 0)
+                         unsigned MaximalOffset = 0,
+                         bool OnlyOptimizeForSize = false)
          : FunctionPass(ID), TM(TM), DL(TM->getDataLayout()),
-          MaxOffset(MaximalOffset) {
+          MaxOffset(MaximalOffset), OnlyOptimizeForSize(OnlyOptimizeForSize) {
        initializeGlobalMergePass(*PassRegistry::getPassRegistry());
      }
  
@@ -290,6 +297,12 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
            continue;
  
          Function *ParentFn = I->getParent()->getParent();
+
+        // If we're only optimizing for size, ignore non-minsize functions.
+        if (OnlyOptimizeForSize &&
+            !ParentFn->hasFnAttribute(Attribute::MinSize))
+          continue;
+
          size_t UGSIdx = GlobalUsesByFunction[ParentFn];
  
          // If this is the first global the basic block uses, map it to the set
@@ -585,6 +598,7 @@ bool GlobalMerge::doFinalization(Module &M) {
    return false;
  }
  
-Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset) {
-  return new GlobalMerge(TM, Offset);
+Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset,
+                                  bool OnlyOptimizeForSize) {
+  return new GlobalMerge(TM, Offset, OnlyOptimizeForSize);
  }
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp

index a9059ab..f23dd33 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -250,10 +250,14 @@ bool AArch64PassConfig::addPreISel() {
    // FIXME: On AArch64, this depends on the type.
    // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
    // and the offset has to be a multiple of the related size in bytes.
-  if ((TM->getOptLevel() == CodeGenOpt::Aggressive &&
+  if ((TM->getOptLevel() != CodeGenOpt::None &&
         EnableGlobalMerge == cl::BOU_UNSET) ||
-      EnableGlobalMerge == cl::BOU_TRUE)
-    addPass(createGlobalMergePass(TM, 4095));
+      EnableGlobalMerge == cl::BOU_TRUE) {
+    bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
+                               (EnableGlobalMerge == cl::BOU_UNSET);
+    addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize));
+  }
+
    if (TM->getOptLevel() != CodeGenOpt::None)
      addPass(createAArch64AddressTypePromotionPass());
  
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp

index e794fb7..0f98d52 100644 (file)
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -339,15 +339,18 @@ void ARMPassConfig::addIRPasses() {
  }
  
  bool ARMPassConfig::addPreISel() {
-  if ((TM->getOptLevel() == CodeGenOpt::Aggressive &&
+  if ((TM->getOptLevel() != CodeGenOpt::None &&
         EnableGlobalMerge == cl::BOU_UNSET) ||
-      EnableGlobalMerge == cl::BOU_TRUE)
+      EnableGlobalMerge == cl::BOU_TRUE) {
      // FIXME: This is using the thumb1 only constant value for
      // maximal global offset for merging globals. We may want
      // to look into using the old value for non-thumb1 code of
      // 4095 based on the TargetMachine, but this starts to become
      // tricky when doing code gen per function.
-    addPass(createGlobalMergePass(TM, 127));
+    bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
+                               (EnableGlobalMerge == cl::BOU_UNSET);
+    addPass(createGlobalMergePass(TM, 127, OnlyOptimizeForSize));
+  }
  
    return false;
  }
diff --git a/llvm/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll b/llvm/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll

new file mode 100644 (file)

index 0000000..e83cbab
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll
@@ -0,0 +1,74 @@
+; RUN: llc -mtriple=aarch64-apple-ios -asm-verbose=false -aarch64-collect-loh=false \
+; RUN:   -O1 -global-merge-group-by-use -global-merge-ignore-single-use \
+; RUN:   %s -o - | FileCheck %s
+
+; Check that, at -O1, we only merge globals used in minsize functions.
+; We assume that globals of the same size aren't reordered inside a set.
+; We use -global-merge-ignore-single-use, and thus only expect one merged set.
+
+@m1 = internal global i32 0, align 4
+@n1 = internal global i32 0, align 4
+
+; CHECK-LABEL: f1:
+define void @f1(i32 %a1, i32 %a2) minsize nounwind {
+; CHECK-NEXT: adrp x8, [[SET:__MergedGlobals]]@PAGE
+; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
+; CHECK-NEXT: stp w0, w1, [x8]
+; CHECK-NEXT: ret
+  store i32 %a1, i32* @m1, align 4
+  store i32 %a2, i32* @n1, align 4
+  ret void
+}
+
+@m2 = internal global i32 0, align 4
+@n2 = internal global i32 0, align 4
+
+; CHECK-LABEL: f2:
+define void @f2(i32 %a1, i32 %a2) nounwind {
+; CHECK-NEXT: adrp x8, _m2@PAGE
+; CHECK-NEXT: adrp x9, _n2@PAGE
+; CHECK-NEXT: str w0, [x8, _m2@PAGEOFF]
+; CHECK-NEXT: str w1, [x9, _n2@PAGEOFF]
+; CHECK-NEXT: ret
+  store i32 %a1, i32* @m2, align 4
+  store i32 %a2, i32* @n2, align 4
+  ret void
+}
+
+; If we have use sets partially overlapping between a minsize and a non-minsize
+; function, explicitly check that we only consider the globals used in the
+; minsize function for merging.
+
+@m3 = internal global i32 0, align 4
+@n3 = internal global i32 0, align 4
+
+; CHECK-LABEL: f3:
+define void @f3(i32 %a1, i32 %a2) minsize nounwind {
+; CHECK-NEXT: adrp x8, [[SET]]@PAGE
+; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
+; CHECK-NEXT: stp w0, w1, [x8, #8]
+; CHECK-NEXT: ret
+  store i32 %a1, i32* @m3, align 4
+  store i32 %a2, i32* @n3, align 4
+  ret void
+}
+
+@n4 = internal global i32 0, align 4
+
+; CHECK-LABEL: f4:
+define void @f4(i32 %a1, i32 %a2) nounwind {
+; CHECK-NEXT: adrp x8, [[SET]]@PAGE
+; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
+; CHECK-NEXT: adrp x9, _n4@PAGE
+; CHECK-NEXT: str w0, [x8, #8]
+; CHECK-NEXT: str w1, [x9, _n4@PAGEOFF]
+; CHECK-NEXT: ret
+  store i32 %a1, i32* @m3, align 4
+  store i32 %a2, i32* @n4, align 4
+  ret void
+}
+
+; CHECK-DAG: .zerofill __DATA,__bss,[[SET]],16,3
+; CHECK-DAG: .zerofill __DATA,__bss,_m2,4,2
+; CHECK-DAG: .zerofill __DATA,__bss,_n2,4,2
+; CHECK-DAG: .zerofill __DATA,__bss,_n4,4,2
author	Ahmed Bougacha <ahmed.bougacha@gmail.com>
	Thu, 4 Jun 2015 20:39:23 +0000 (20:39 +0000)
committer	Ahmed Bougacha <ahmed.bougacha@gmail.com>
	Thu, 4 Jun 2015 20:39:23 +0000 (20:39 +0000)
llvm/include/llvm/Transforms/Scalar.h		patch \| blob \| history
llvm/lib/CodeGen/GlobalMerge.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64TargetMachine.cpp		patch \| blob \| history
llvm/lib/Target/ARM/ARMTargetMachine.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll	[new file with mode: 0644]	patch \| blob