From b03f3fbd6a6b8843469865b16c9eb3af8adc2d3a Mon Sep 17 00:00:00 2001
From: Christopher Tetreault <ctetreau@quicinc.com>
Date: Mon, 3 Feb 2020 12:46:42 -0800
Subject: [PATCH] Reapply: [SVE] Fix bug in simplification of scalable vector
 instructions

This reverts commit a05441038a3a4a011b9421751367c5c797d57137, reapplying
commit 31574d38ac5fa4646cf01dd252a23e682402134f
---
 llvm/include/llvm/IR/Instructions.h                |  8 +--
 llvm/lib/Analysis/InstructionSimplify.cpp          | 83 +++++++++++++---------
 llvm/lib/Analysis/ValueTracking.cpp                |  5 ++
 llvm/lib/AsmParser/LLParser.cpp                    |  8 +--
 llvm/lib/IR/ConstantFold.cpp                       |  5 +-
 llvm/lib/IR/Instructions.cpp                       |  4 ++
 .../ConstantFolding/vscale-getelementptr.ll        | 32 +++++++++
 .../ConstantFolding/vscale-shufflevector.ll        | 41 +++++++++++
 8 files changed, 144 insertions(+), 42 deletions(-)
 create mode 100644 llvm/test/Analysis/ConstantFolding/vscale-getelementptr.ll
 create mode 100644 llvm/test/Analysis/ConstantFolding/vscale-shufflevector.ll
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index c3d4dc9..e558d43 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -1060,13 +1060,13 @@ public:
                                    Ptr->getType()->getPointerAddressSpace());
     // Vector GEP
     if (Ptr->getType()->isVectorTy()) {
-      unsigned NumElem = Ptr->getType()->getVectorNumElements();
-      return VectorType::get(PtrTy, NumElem);
+      ElementCount EltCount = Ptr->getType()->getVectorElementCount();
+      return VectorType::get(PtrTy, EltCount);
     }
     for (Value *Index : IdxList)
       if (Index->getType()->isVectorTy()) {
-        unsigned NumElem = Index->getType()->getVectorNumElements();
-        return VectorType::get(PtrTy, NumElem);
+        ElementCount EltCount = Index->getType()->getVectorElementCount();
+        return VectorType::get(PtrTy, EltCount);
       }
     // Scalar GEP
     return PtrTy;
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 98f64b9..bc1b008 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4074,9 +4074,9 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
   Type *LastType = GetElementPtrInst::getIndexedType(SrcTy, Ops.slice(1));
   Type *GEPTy = PointerType::get(LastType, AS);
   if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType()))
-    GEPTy = VectorType::get(GEPTy, VT->getNumElements());
+    GEPTy = VectorType::get(GEPTy, VT->getElementCount());
   else if (VectorType *VT = dyn_cast<VectorType>(Ops[1]->getType()))
-    GEPTy = VectorType::get(GEPTy, VT->getNumElements());
+    GEPTy = VectorType::get(GEPTy, VT->getElementCount());
 
   if (isa<UndefValue>(Ops[0]))
     return UndefValue::get(GEPTy);
@@ -4445,52 +4445,66 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
     return UndefValue::get(RetTy);
 
   Type *InVecTy = Op0->getType();
-  unsigned MaskNumElts = Mask->getType()->getVectorNumElements();
-  unsigned InVecNumElts = InVecTy->getVectorNumElements();
+  ElementCount MaskEltCount = Mask->getType()->getVectorElementCount();
+  ElementCount InVecEltCount = InVecTy->getVectorElementCount();
+
+  assert(MaskEltCount.Scalable == InVecEltCount.Scalable &&
+         "vscale mismatch between input vector and mask");
+
+  bool Scalable = MaskEltCount.Scalable;
 
   SmallVector<int, 32> Indices;
-  ShuffleVectorInst::getShuffleMask(Mask, Indices);
-  assert(MaskNumElts == Indices.size() &&
-         "Size of Indices not same as number of mask elements?");
-
-  // Canonicalization: If mask does not select elements from an input vector,
-  // replace that input vector with undef.
-  bool MaskSelects0 = false, MaskSelects1 = false;
-  for (unsigned i = 0; i != MaskNumElts; ++i) {
-    if (Indices[i] == -1)
-      continue;
-    if ((unsigned)Indices[i] < InVecNumElts)
-      MaskSelects0 = true;
-    else
-      MaskSelects1 = true;
+  if (!Scalable) {
+    ShuffleVectorInst::getShuffleMask(Mask, Indices);
+    assert(MaskEltCount.Min == Indices.size() &&
+           "Size of Indices not same as number of mask elements?");
+  }
+
+  if (!Scalable) {
+    // Canonicalization: If mask does not select elements from an input vector,
+    // replace that input vector with undef.
+    bool MaskSelects0 = false, MaskSelects1 = false;
+    for (unsigned i = 0; i != MaskEltCount.Min; ++i) {
+      if (Indices[i] == -1)
+        continue;
+      if ((unsigned)Indices[i] < InVecEltCount.Min)
+        MaskSelects0 = true;
+      else
+        MaskSelects1 = true;
+    }
+    if (!MaskSelects0)
+      Op0 = UndefValue::get(InVecTy);
+    if (!MaskSelects1)
+      Op1 = UndefValue::get(InVecTy);
   }
-  if (!MaskSelects0)
-    Op0 = UndefValue::get(InVecTy);
-  if (!MaskSelects1)
-    Op1 = UndefValue::get(InVecTy);
 
   auto *Op0Const = dyn_cast<Constant>(Op0);
   auto *Op1Const = dyn_cast<Constant>(Op1);
 
-  // If all operands are constant, constant fold the shuffle.
-  if (Op0Const && Op1Const)
+  // If all operands are constant, constant fold the shuffle. This
+  // transformation depends on the value of the mask which is not known at
+  // compile time for scalable vectors
+  if (!Scalable && Op0Const && Op1Const)
     return ConstantFoldShuffleVectorInstruction(Op0Const, Op1Const, Mask);
 
   // Canonicalization: if only one input vector is constant, it shall be the
-  // second one.
-  if (Op0Const && !Op1Const) {
+  // second one. This transformation depends on the value of the mask which
+  // is not known at compile time for scalable vectors
+  if (!Scalable && Op0Const && !Op1Const) {
     std::swap(Op0, Op1);
-    ShuffleVectorInst::commuteShuffleMask(Indices, InVecNumElts);
+    ShuffleVectorInst::commuteShuffleMask(Indices, InVecEltCount.Min);
   }
 
   // A splat of an inserted scalar constant becomes a vector constant:
   // shuf (inselt ?, C, IndexC), undef, <IndexC, IndexC...> --> <C, C...>
   // NOTE: We may have commuted above, so analyze the updated Indices, not the
   //       original mask constant.
+  // NOTE: This transformation depends on the value of the mask which is not
+  // known at compile time for scalable vectors
   Constant *C;
   ConstantInt *IndexC;
-  if (match(Op0, m_InsertElement(m_Value(), m_Constant(C),
-                                 m_ConstantInt(IndexC)))) {
+  if (!Scalable && match(Op0, m_InsertElement(m_Value(), m_Constant(C),
+                                              m_ConstantInt(IndexC)))) {
     // Match a splat shuffle mask of the insert index allowing undef elements.
     int InsertIndex = IndexC->getZExtValue();
     if (all_of(Indices, [InsertIndex](int MaskElt) {
@@ -4499,8 +4513,8 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
       assert(isa<UndefValue>(Op1) && "Expected undef operand 1 for splat");
 
       // Shuffle mask undefs become undefined constant result elements.
-      SmallVector<Constant *, 16> VecC(MaskNumElts, C);
-      for (unsigned i = 0; i != MaskNumElts; ++i)
+      SmallVector<Constant *, 16> VecC(MaskEltCount.Min, C);
+      for (unsigned i = 0; i != MaskEltCount.Min; ++i)
         if (Indices[i] == -1)
           VecC[i] = UndefValue::get(C->getType());
       return ConstantVector::get(VecC);
@@ -4514,6 +4528,11 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
         OpShuf->getMask()->getSplatValue())
       return Op0;
 
+  // All remaining transformation depend on the value of the mask, which is
+  // not known at compile time for scalable vectors.
+  if (Scalable)
+    return nullptr;
+
   // Don't fold a shuffle with undef mask elements. This may get folded in a
   // better way using demanded bits or other analysis.
   // TODO: Should we allow this?
@@ -4525,7 +4544,7 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
   // shuffle. This handles simple identity shuffles as well as chains of
   // shuffles that may widen/narrow and/or move elements across lanes and back.
   Value *RootVec = nullptr;
-  for (unsigned i = 0; i != MaskNumElts; ++i) {
+  for (unsigned i = 0; i != MaskEltCount.Min; ++i) {
     // Note that recursion is limited for each vector element, so if any element
     // exceeds the limit, this will fail to simplify.
     RootVec =
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 5b4c810..1761cb6 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -166,6 +166,11 @@ static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
 static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf,
                                    const APInt &DemandedElts,
                                    APInt &DemandedLHS, APInt &DemandedRHS) {
+  // The length of scalable vectors is unknown at compile time, thus we
+  // cannot check their values
+  if (Shuf->getMask()->getType()->getVectorElementCount().Scalable)
+    return false;
+
   int NumElts = Shuf->getOperand(0)->getType()->getVectorNumElements();
   int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements();
   DemandedLHS = DemandedRHS = APInt::getNullValue(NumElts);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 1fc965a..b22e7cb 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -7224,8 +7224,8 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
   bool AteExtraComma = false;
   // GEP returns a vector of pointers if at least one of parameters is a vector.
   // All vector parameters should have the same vector width.
-  unsigned GEPWidth = BaseType->isVectorTy() ?
-    BaseType->getVectorNumElements() : 0;
+  ElementCount GEPWidth = BaseType->isVectorTy() ?
+    BaseType->getVectorElementCount() : ElementCount(0, false);
 
   while (EatIfPresent(lltok::comma)) {
     if (Lex.getKind() == lltok::MetadataVar) {
@@ -7237,8 +7237,8 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
       return Error(EltLoc, "getelementptr index must be an integer");
 
     if (Val->getType()->isVectorTy()) {
-      unsigned ValNumEl = Val->getType()->getVectorNumElements();
-      if (GEPWidth && GEPWidth != ValNumEl)
+      ElementCount ValNumEl = Val->getType()->getVectorElementCount();
+      if (GEPWidth != ElementCount(0, false) && GEPWidth != ValNumEl)
         return Error(EltLoc,
           "getelementptr vector index has a wrong number of elements");
       GEPWidth = ValNumEl;
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 9fd7dbd..cd037f0b 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -863,12 +863,12 @@ Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
 Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
                                                      Constant *V2,
                                                      Constant *Mask) {
-  unsigned MaskNumElts = Mask->getType()->getVectorNumElements();
+  ElementCount MaskEltCount = Mask->getType()->getVectorElementCount();
   Type *EltTy = V1->getType()->getVectorElementType();
 
   // Undefined shuffle mask -> undefined value.
   if (isa<UndefValue>(Mask))
-    return UndefValue::get(VectorType::get(EltTy, MaskNumElts));
+    return UndefValue::get(VectorType::get(EltTy, MaskEltCount));
 
   // Don't break the bitcode reader hack.
   if (isa<ConstantExpr>(Mask)) return nullptr;
@@ -879,6 +879,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
   if (ValTy->isScalable())
     return nullptr;
 
+  unsigned MaskNumElts = MaskEltCount.Min;
   unsigned SrcNumElts = V1->getType()->getVectorNumElements();
 
   // Loop over the shuffle mask, evaluating each element.
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index e93f4b3..3886494 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -1887,6 +1887,8 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
 
 int ShuffleVectorInst::getMaskValue(const Constant *Mask, unsigned i) {
   assert(i < Mask->getType()->getVectorNumElements() && "Index out of range");
+  assert(!Mask->getType()->getVectorElementCount().Scalable &&
+    "Length of scalable vectors unknown at compile time");
   if (auto *CDS = dyn_cast<ConstantDataSequential>(Mask))
     return CDS->getElementAsInteger(i);
   Constant *C = Mask->getAggregateElement(i);
@@ -1897,6 +1899,8 @@ int ShuffleVectorInst::getMaskValue(const Constant *Mask, unsigned i) {
 
 void ShuffleVectorInst::getShuffleMask(const Constant *Mask,
                                        SmallVectorImpl<int> &Result) {
+  assert(!Mask->getType()->getVectorElementCount().Scalable &&
+    "Length of scalable vectors unknown at compile time");
   unsigned NumElts = Mask->getType()->getVectorNumElements();
 
   if (auto *CDS = dyn_cast<ConstantDataSequential>(Mask)) {
diff --git a/llvm/test/Analysis/ConstantFolding/vscale-getelementptr.ll b/llvm/test/Analysis/ConstantFolding/vscale-getelementptr.ll
new file mode 100644
index 0000000..8e90961
--- /dev/null
+++ b/llvm/test/Analysis/ConstantFolding/vscale-getelementptr.ll
@@ -0,0 +1,32 @@
+; RUN: opt -early-cse -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+; CHECK-LABEL: define <4 x i32*> @fixed_length_version_first() {
+; CHECK-NEXT:  ret <4 x i32*> undef
+define <4 x i32*> @fixed_length_version_first() {
+  %ptr = getelementptr i32, <4 x i32*> undef, <4 x i64> undef
+  ret <4 x i32*> %ptr
+}
+
+; CHECK-LABEL: define <4 x <4 x i32>*> @fixed_length_version_second() {
+; CHECK-NEXT:  ret <4 x <4 x i32>*> undef
+define <4 x <4 x i32>*> @fixed_length_version_second() {
+  %ptr = getelementptr <4 x i32>, <4 x i32>* undef, <4 x i64> undef
+  ret <4 x <4 x i32>*> %ptr
+}
+
+; CHECK-LABEL: define <vscale x 4 x i32*> @vscale_version_first() {
+; CHECK-NEXT:  ret <vscale x 4 x i32*> undef
+define <vscale x 4 x i32*> @vscale_version_first() {
+  %ptr = getelementptr i32, <vscale x 4 x i32*> undef, <vscale x 4 x i64> undef
+  ret <vscale x 4 x i32*> %ptr
+}
+
+; CHECK-LABEL: define <vscale x 4 x <vscale x 4 x i32>*> @vscale_version_second() {
+; CHECK-NEXT:  ret <vscale x 4 x <vscale x 4 x i32>*> undef
+define <vscale x 4 x <vscale x 4 x i32>*> @vscale_version_second() {
+  %ptr = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* undef, <vscale x 4 x i64> undef
+  ret <vscale x 4 x <vscale x 4 x i32>*> %ptr
+}
diff --git a/llvm/test/Analysis/ConstantFolding/vscale-shufflevector.ll b/llvm/test/Analysis/ConstantFolding/vscale-shufflevector.ll
new file mode 100644
index 0000000..dc3b66e
--- /dev/null
+++ b/llvm/test/Analysis/ConstantFolding/vscale-shufflevector.ll
@@ -0,0 +1,41 @@
+; RUN: opt -early-cse -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+; This test checks that SimplifyInstruction does not blow up in the face of
+; a scalable shufflevector. vscale is a constant value known only at runtime.
+; Therefore, it is not possible to know the concrete value of, or the length
+; of the mask at compile time. Simplifications that depend on the value
+; of the mask cannot be performed.
+
+; Given the fact that the value of the mask is unknown at compile time for
+; scalable vectors, very few simplifications will be done. Here, we want to
+; see that the instruction can be passed to SimplifyInstruction and not crash
+; the compiler. It happens to be the case that this will be the result.
+
+; CHECK-LABEL: define <vscale x 8 x i1> @vscale_version()
+; CHECK-NEXT: %splatter = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+; CHECK-NEXT: %foo = shufflevector <vscale x 8 x i1> %splatter, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: ret <vscale x 8 x i1> %foo
+
+define <vscale x 8 x i1> @vscale_version() {
+  %splatter = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+  %foo = shufflevector <vscale x 8 x i1> %splatter,
+                       <vscale x 8 x i1> undef,
+                       <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x i1> %foo
+}
+
+; The non-scalable version should be optimized as normal.
+
+; CHECK-LABEL: define <8 x i1> @fixed_length_version() {
+; CHECK-NEXT:  ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+define <8 x i1> @fixed_length_version() {
+  %splatter = insertelement <8 x i1> undef, i1 true, i32 0
+  %foo = shufflevector <8 x i1> %splatter,
+                       <8 x i1> undef,
+                       <8 x i32> zeroinitializer
+  ret <8 x i1> %foo
+}
+
-- 
2.7.4