[LoopVectorizer, TTI] New method supportsEfficientVectorElementLoadStore()

author Jonas Paulsson <paulsson@linux.vnet.ibm.com>

Wed, 12 Apr 2017 12:41:37 +0000 (12:41 +0000)

committer Jonas Paulsson <paulsson@linux.vnet.ibm.com>

Wed, 12 Apr 2017 12:41:37 +0000 (12:41 +0000)
author Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Wed, 12 Apr 2017 12:41:37 +0000 (12:41 +0000)
committer Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Wed, 12 Apr 2017 12:41:37 +0000 (12:41 +0000)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h

index 3d92208..6719668 100644 (file)
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -437,6 +437,11 @@ public:
    unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
                                              unsigned VF) const;
  
+  /// If target has efficient vector element load/store instructions, it can
+  /// return true here so that insertion/extraction costs are not added to
+  /// the scalarization cost of a load/store.
+  bool supportsEfficientVectorElementLoadStore() const;
+
    /// \brief Don't restrict interleaved unrolling to small loops.
    bool enableAggressiveInterleaving(bool LoopHasReductions) const;
  
@@ -790,6 +795,7 @@ public:
    getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
    virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
                                                      unsigned VF) = 0;
+  virtual bool supportsEfficientVectorElementLoadStore() = 0;
    virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
    virtual bool enableInterleavedAccessVectorization() = 0;
    virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
@@ -996,6 +1002,10 @@ public:
      return Impl.getOperandsScalarizationOverhead(Args, VF);
    }
  
+  bool supportsEfficientVectorElementLoadStore() override {
+    return Impl.supportsEfficientVectorElementLoadStore();
+  }
+
    bool enableAggressiveInterleaving(bool LoopHasReductions) override {
      return Impl.enableAggressiveInterleaving(LoopHasReductions);
    }
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

index 790acbc..9ab6b74 100644 (file)
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -262,6 +262,8 @@ public:
    unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
                                              unsigned VF) { return 0; }
  
+  bool supportsEfficientVectorElementLoadStore() { return false; }
+
    bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
  
    bool enableInterleavedAccessVectorization() { return false; }
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp

index c8b8740..d73b1a1 100644 (file)
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -197,6 +197,10 @@ getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
    return TTIImpl->getOperandsScalarizationOverhead(Args, VF);
  }
  
+bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const {
+  return TTIImpl->supportsEfficientVectorElementLoadStore();
+}
+
  bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
    return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
  }
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h

index d2639cb..3766ed4 100644 (file)
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -55,6 +55,7 @@ public:
    unsigned getNumberOfRegisters(bool Vector);
    unsigned getRegisterBitWidth(bool Vector);
  
+  bool supportsEfficientVectorElementLoadStore() { return true; }
    bool enableInterleavedAccessVectorization() { return true; }
  
    int getArithmeticInstrCost(
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

index f891cd9..26bcbcb 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3663,13 +3663,17 @@ static unsigned getScalarizationOverhead(Instruction *I, unsigned VF,
  
    unsigned Cost = 0;
    Type *RetTy = ToVectorTy(I->getType(), VF);
-  if (!RetTy->isVoidTy())
+  if (!RetTy->isVoidTy() &&
+      (!isa<LoadInst>(I) ||
+       !TTI.supportsEfficientVectorElementLoadStore()))
      Cost += TTI.getScalarizationOverhead(RetTy, true, false);
  
    if (CallInst *CI = dyn_cast<CallInst>(I)) {
      SmallVector<const Value *, 4> Operands(CI->arg_operands());
      Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
-  } else {
+  }
+  else if (!isa<StoreInst>(I) ||
+           !TTI.supportsEfficientVectorElementLoadStore()) {
      SmallVector<const Value *, 4> Operands(I->operand_values());
      Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
    }
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll

new file mode 100644 (file)

index 0000000..e7096c2
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll
@@ -0,0 +1,33 @@
+; REQUIRES: asserts
+; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize \
+; RUN:   -force-vector-width=4 -debug-only=loop-vectorize \
+; RUN:   -disable-output -enable-interleaved-mem-accesses=false < %s 2>&1 | \
+; RUN:   FileCheck %s
+;
+; Check that a scalarized load/store does not get a cost for insterts/
+; extracts, since z13 supports element load/store.
+
+define void @fun(i32* %data, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %tmp0 = getelementptr inbounds i32, i32* %data, i64 %i
+  %tmp1 = load i32, i32* %tmp0, align 4
+  %tmp2 = add i32 %tmp1, 1
+  store i32 %tmp2, i32* %tmp0, align 4
+  %i.next = add nuw nsw i64 %i, 2
+  %cond = icmp slt i64 %i.next, %n
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  ret void
+
+; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   %tmp1 = load i32, i32* %tmp0, align 4
+; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   store i32 %tmp2, i32* %tmp0, align 4
+
+; CHECK: LV: Scalarizing:  %tmp1 = load i32, i32* %tmp0, align 4
+; CHECK: LV: Scalarizing:  store i32 %tmp2, i32* %tmp0, align 4
+}
+
author	Jonas Paulsson <paulsson@linux.vnet.ibm.com>
	Wed, 12 Apr 2017 12:41:37 +0000 (12:41 +0000)
committer	Jonas Paulsson <paulsson@linux.vnet.ibm.com>
	Wed, 12 Apr 2017 12:41:37 +0000 (12:41 +0000)
llvm/include/llvm/Analysis/TargetTransformInfo.h		patch \| blob \| history
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h		patch \| blob \| history
llvm/lib/Analysis/TargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h		patch \| blob \| history
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll	[new file with mode: 0644]	patch \| blob