[Hexagon] Rudimentary support for auto-vectorization for HVX

author Krzysztof Parzyszek <kparzysz@codeaurora.org>

Tue, 27 Mar 2018 17:07:52 +0000 (17:07 +0000)

committer Krzysztof Parzyszek <kparzysz@codeaurora.org>

Tue, 27 Mar 2018 17:07:52 +0000 (17:07 +0000)
author Krzysztof Parzyszek <kparzysz@codeaurora.org>
Tue, 27 Mar 2018 17:07:52 +0000 (17:07 +0000)
committer Krzysztof Parzyszek <kparzysz@codeaurora.org>
Tue, 27 Mar 2018 17:07:52 +0000 (17:07 +0000)
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp

index 4feaca3..3290b59 100644 (file)
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -26,6 +26,9 @@ using namespace llvm;
  
  #define DEBUG_TYPE "hexagontti"
  
+static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false),
+  cl::Hidden, cl::desc("Enable loop vectorizer for HVX"));
+
  static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables",
    cl::init(true), cl::Hidden,
    cl::desc("Control lookup table emission on Hexagon target"));
@@ -47,8 +50,41 @@ bool HexagonTTIImpl::shouldFavorPostInc() const {
    return true;
  }
  
-unsigned HexagonTTIImpl::getNumberOfRegisters(bool vector) const {
-  return vector ? 0 : 32;
+unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const {
+  if (Vector)
+    return HexagonAutoHVX && getST()->useHVXOps() ? 32 : 0;
+  return 32;
+}
+
+unsigned HexagonTTIImpl::getMaxInterleaveFactor(unsigned VF) {
+  return HexagonAutoHVX && getST()->useHVXOps() ? 64 : 0;
+}
+
+unsigned HexagonTTIImpl::getRegisterBitWidth(bool Vector) const {
+  return Vector ? getMinVectorRegisterBitWidth() : 32;
+}
+
+unsigned HexagonTTIImpl::getMinVectorRegisterBitWidth() const {
+  return getST()->useHVXOps() ? getST()->getVectorLength()*8 : 0;
+}
+
+unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+      unsigned Alignment, unsigned AddressSpace, const Instruction *I) {
+  if (Opcode == Instruction::Load && Src->isVectorTy()) {
+    VectorType *VecTy = cast<VectorType>(Src);
+    unsigned VecWidth = VecTy->getBitWidth();
+    if (VecWidth > 64) {
+      // Assume that vectors longer than 64 bits are meant for HVX.
+      if (getNumberOfRegisters(true) > 0) {
+        if (VecWidth % getRegisterBitWidth(true) == 0)
+          return 1;
+      }
+      unsigned AlignWidth = 8 * std::max(1u, Alignment);
+      unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
+      return 3*NumLoads;
+    }
+  }
+  return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
  }
  
  unsigned HexagonTTIImpl::getPrefetchDistance() const {
@@ -61,21 +97,22 @@ unsigned HexagonTTIImpl::getCacheLineSize() const {
  
  int HexagonTTIImpl::getUserCost(const User *U,
                                  ArrayRef<const Value *> Operands) {
-  auto isCastFoldedIntoLoad = [](const CastInst *CI) -> bool {
+  auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool {
      if (!CI->isIntegerCast())
        return false;
+    // Only extensions from an integer type shorter than 32-bit to i32
+    // can be folded into the load.
+    const DataLayout &DL = getDataLayout();
+    unsigned SBW = DL.getTypeSizeInBits(CI->getSrcTy());
+    unsigned DBW = DL.getTypeSizeInBits(CI->getDestTy());
+    if (DBW != 32 || SBW >= DBW)
+      return false;
+
      const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0));
      // Technically, this code could allow multiple uses of the load, and
      // check if all the uses are the same extension operation, but this
      // should be sufficient for most cases.
-    if (!LI || !LI->hasOneUse())
-      return false;
-
-    // Only extensions from an integer type shorter than 32-bit to i32
-    // can be folded into the load.
-    unsigned SBW = CI->getSrcTy()->getIntegerBitWidth();
-    unsigned DBW = CI->getDestTy()->getIntegerBitWidth();
-    return DBW == 32 && (SBW < DBW);
+    return LI && LI->hasOneUse();
    };
  
    if (const CastInst *CI = dyn_cast<const CastInst>(U))
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h

index 58e23b2..7adacaf 100644 (file)
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -70,6 +70,113 @@ public:
    /// @{
  
    unsigned getNumberOfRegisters(bool vector) const;
+  unsigned getMaxInterleaveFactor(unsigned VF);
+  unsigned getRegisterBitWidth(bool Vector) const;
+  unsigned getMinVectorRegisterBitWidth() const;
+
+  bool supportsEfficientVectorElementLoadStore() {
+    return false;
+  }
+
+  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
+    return 0;
+  }
+
+  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value*> Args,
+                                            unsigned VF) {
+    return 0;
+  }
+
+  unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type*> Tys) {
+    return 1;
+  }
+
+  unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+            ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) {
+    return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
+  }
+  unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+            ArrayRef<Type*> Tys, FastMathFlags FMF,
+            unsigned ScalarizationCostPassed = UINT_MAX) {
+    return 1;
+  }
+
+  bool hasBranchDivergence() {
+    return false;
+  }
+
+  bool enableAggressiveInterleaving(bool LoopHasReductions) {
+    return false;
+  }
+
+  unsigned getCFInstrCost(unsigned Opcode) {
+    return 1;
+  }
+
+  unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *,
+                                     const SCEV *) {
+    return 0;
+  }
+
+  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+            unsigned AddressSpace, const Instruction *I = nullptr);
+
+  unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                                 unsigned AddressSpace) {
+    return 1;
+  }
+
+  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+                          Type *SubTp) {
+    return 1;
+  }
+
+  unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
+                                  bool VariableMask,
+                                  unsigned Alignment) {
+    return 1;
+  }
+
+  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+                                      unsigned Factor,
+                                      ArrayRef<unsigned> Indices,
+                                      unsigned Alignment,
+                                      unsigned AddressSpace) {
+    return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+                                             Alignment, AddressSpace);
+  }
+
+  unsigned getNumberOfParts(Type *Tp) {
+    return BaseT::getNumberOfParts(Tp);
+  }
+
+  bool prefersVectorizedAddressing() {
+    return true;
+  }
+
+  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+                              const Instruction *I) {
+    return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+  }
+
+  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+            TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+            TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+            TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+            TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+            ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
+    return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+                                         Opd1PropInfo, Opd2PropInfo, Args);
+  }
+
+  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+                            const Instruction *I = nullptr) {
+    return 1;
+  }
+
+  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
+    return 1;
+  }
  
    /// @}
  
@@ -80,5 +187,4 @@ public:
  };
  
  } // end namespace llvm
-
  #endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H
author	Krzysztof Parzyszek <kparzysz@codeaurora.org>
	Tue, 27 Mar 2018 17:07:52 +0000 (17:07 +0000)
committer	Krzysztof Parzyszek <kparzysz@codeaurora.org>
	Tue, 27 Mar 2018 17:07:52 +0000 (17:07 +0000)
llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h		patch \| blob \| history