[LoopVectorize][AArch64] Enable ordered reductions by default for AArch64

author David Sherwood <david.sherwood@arm.com>

Fri, 2 Jul 2021 10:12:16 +0000 (11:12 +0100)

committer David Sherwood <david.sherwood@arm.com>

Thu, 19 Aug 2021 08:29:40 +0000 (09:29 +0100)
author David Sherwood <david.sherwood@arm.com>
Fri, 2 Jul 2021 10:12:16 +0000 (11:12 +0100)
committer David Sherwood <david.sherwood@arm.com>
Thu, 19 Aug 2021 08:29:40 +0000 (09:29 +0100)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h

index dd5a75f..9b87231 100644 (file)
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -662,6 +662,9 @@ public:
    /// Return true if the target supports masked expand load.
    bool isLegalMaskedExpandLoad(Type *DataType) const;
  
+  /// Return true if we should be enabling ordered reductions for the target.
+  bool enableOrderedReductions() const;
+
    /// Return true if the target has a unified operation to calculate division
    /// and remainder. If so, the additional implicit multiplication and
    /// subtraction required to calculate a remainder from division are free. This
@@ -1508,6 +1511,7 @@ public:
    virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
    virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
    virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
+  virtual bool enableOrderedReductions() = 0;
    virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
    virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
    virtual bool prefersVectorizedAddressing() = 0;
@@ -1890,6 +1894,9 @@ public:
    bool isLegalMaskedExpandLoad(Type *DataType) override {
      return Impl.isLegalMaskedExpandLoad(DataType);
    }
+  bool enableOrderedReductions() override {
+    return Impl.enableOrderedReductions();
+  }
    bool hasDivRemOp(Type *DataType, bool IsSigned) override {
      return Impl.hasDivRemOp(DataType, IsSigned);
    }
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

index 4151cb1..0e92518 100644 (file)
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -263,6 +263,8 @@ public:
  
    bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }
  
+  bool enableOrderedReductions() const { return false; }
+
    bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
  
    bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp

index 951f7d3..c299186 100644 (file)
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -410,6 +410,10 @@ bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {
    return TTIImpl->isLegalMaskedExpandLoad(DataType);
  }
  
+bool TargetTransformInfo::enableOrderedReductions() const {
+  return TTIImpl->enableOrderedReductions();
+}
+
  bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
    return TTIImpl->hasDivRemOp(DataType, IsSigned);
  }
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

index 5c09504..5ef3931 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -299,6 +299,8 @@ public:
      return BaseT::isLegalNTStore(DataType, Alignment);
    }
  
+  bool enableOrderedReductions() const { return true; }
+
    InstructionCost getInterleavedMemoryOpCost(
        unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
        Align Alignment, unsigned AddressSpace,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

index 00416ef..611525b 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -331,7 +331,7 @@ static cl::opt<bool>
                             cl::desc("Prefer in-loop vector reductions, "
                                      "overriding the targets preference."));
  
-cl::opt<bool> ForceOrderedReductions(
+static cl::opt<bool> ForceOrderedReductions(
      "force-ordered-reductions", cl::init(false), cl::Hidden,
      cl::desc("Enable the vectorisation of loops with in-order (strict) "
               "FP reductions"));
@@ -1317,8 +1317,7 @@ public:
    /// the IsOrdered flag of RdxDesc is set and we do not allow reordering
    /// of FP operations.
    bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) {
-    return ForceOrderedReductions && !Hints->allowReordering() &&
-           RdxDesc.isOrdered();
+    return !Hints->allowReordering() && RdxDesc.isOrdered();
    }
  
    /// \returns The smallest bitwidth each instruction can be represented with.
@@ -10225,7 +10224,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
      return false;
    }
  
-  if (!LVL.canVectorizeFPMath(ForceOrderedReductions)) {
+  bool AllowOrderedReductions;
+  // If the flag is set, use that instead and override the TTI behaviour.
+  if (ForceOrderedReductions.getNumOccurrences() > 0)
+    AllowOrderedReductions = ForceOrderedReductions;
+  else
+    AllowOrderedReductions = TTI->enableOrderedReductions();
+  if (!LVL.canVectorizeFPMath(AllowOrderedReductions)) {
      ORE->emit([&]() {
        auto *ExactFPMathInst = Requirements.getExactFPInst();
        return OptimizationRemarkAnalysisFPCommute(DEBUG_TYPE, "CantReorderFPOps",
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll

index cba948e..eb78fe1 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
@@ -2,7 +2,7 @@
  ; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=false -hints-allow-reordering=true  -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
  ; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true  -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
  ; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true  -hints-allow-reordering=true  -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
-; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED
+; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
  
  define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) #0 {
  ; CHECK-ORDERED-LABEL: @fadd_strict
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll

index 0722ac3..501a151 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
@@ -2,7 +2,7 @@
  ; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -force-ordered-reductions=false -hints-allow-reordering=true  -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
  ; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -force-ordered-reductions=true  -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
  ; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -force-ordered-reductions=true  -hints-allow-reordering=true  -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
-; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED
+; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
  
  define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) {
  ; CHECK-ORDERED-LABEL: @fadd_strict
author	David Sherwood <david.sherwood@arm.com>
	Fri, 2 Jul 2021 10:12:16 +0000 (11:12 +0100)
committer	David Sherwood <david.sherwood@arm.com>
	Thu, 19 Aug 2021 08:29:40 +0000 (09:29 +0100)
llvm/include/llvm/Analysis/TargetTransformInfo.h		patch \| blob \| history
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h		patch \| blob \| history
llvm/lib/Analysis/TargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h		patch \| blob \| history
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll		patch \| blob \| history