From b03f3fbd6a6b8843469865b16c9eb3af8adc2d3a Mon Sep 17 00:00:00 2001 From: Christopher Tetreault Date: Mon, 3 Feb 2020 12:46:42 -0800 Subject: [PATCH] Reapply: [SVE] Fix bug in simplification of scalable vector instructions This reverts commit a05441038a3a4a011b9421751367c5c797d57137, reapplying commit 31574d38ac5fa4646cf01dd252a23e682402134f --- llvm/include/llvm/IR/Instructions.h | 8 +-- llvm/lib/Analysis/InstructionSimplify.cpp | 83 +++++++++++++--------- llvm/lib/Analysis/ValueTracking.cpp | 5 ++ llvm/lib/AsmParser/LLParser.cpp | 8 +-- llvm/lib/IR/ConstantFold.cpp | 5 +- llvm/lib/IR/Instructions.cpp | 4 ++ .../ConstantFolding/vscale-getelementptr.ll | 32 +++++++++ .../ConstantFolding/vscale-shufflevector.ll | 41 +++++++++++ 8 files changed, 144 insertions(+), 42 deletions(-) create mode 100644 llvm/test/Analysis/ConstantFolding/vscale-getelementptr.ll create mode 100644 llvm/test/Analysis/ConstantFolding/vscale-shufflevector.ll diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index c3d4dc9..e558d43 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -1060,13 +1060,13 @@ public: Ptr->getType()->getPointerAddressSpace()); // Vector GEP if (Ptr->getType()->isVectorTy()) { - unsigned NumElem = Ptr->getType()->getVectorNumElements(); - return VectorType::get(PtrTy, NumElem); + ElementCount EltCount = Ptr->getType()->getVectorElementCount(); + return VectorType::get(PtrTy, EltCount); } for (Value *Index : IdxList) if (Index->getType()->isVectorTy()) { - unsigned NumElem = Index->getType()->getVectorNumElements(); - return VectorType::get(PtrTy, NumElem); + ElementCount EltCount = Index->getType()->getVectorElementCount(); + return VectorType::get(PtrTy, EltCount); } // Scalar GEP return PtrTy; diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 98f64b9..bc1b008 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4074,9 +4074,9 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, Type *LastType = GetElementPtrInst::getIndexedType(SrcTy, Ops.slice(1)); Type *GEPTy = PointerType::get(LastType, AS); if (VectorType *VT = dyn_cast(Ops[0]->getType())) - GEPTy = VectorType::get(GEPTy, VT->getNumElements()); + GEPTy = VectorType::get(GEPTy, VT->getElementCount()); else if (VectorType *VT = dyn_cast(Ops[1]->getType())) - GEPTy = VectorType::get(GEPTy, VT->getNumElements()); + GEPTy = VectorType::get(GEPTy, VT->getElementCount()); if (isa(Ops[0])) return UndefValue::get(GEPTy); @@ -4445,52 +4445,66 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, return UndefValue::get(RetTy); Type *InVecTy = Op0->getType(); - unsigned MaskNumElts = Mask->getType()->getVectorNumElements(); - unsigned InVecNumElts = InVecTy->getVectorNumElements(); + ElementCount MaskEltCount = Mask->getType()->getVectorElementCount(); + ElementCount InVecEltCount = InVecTy->getVectorElementCount(); + + assert(MaskEltCount.Scalable == InVecEltCount.Scalable && + "vscale mismatch between input vector and mask"); + + bool Scalable = MaskEltCount.Scalable; SmallVector Indices; - ShuffleVectorInst::getShuffleMask(Mask, Indices); - assert(MaskNumElts == Indices.size() && - "Size of Indices not same as number of mask elements?"); - - // Canonicalization: If mask does not select elements from an input vector, - // replace that input vector with undef. - bool MaskSelects0 = false, MaskSelects1 = false; - for (unsigned i = 0; i != MaskNumElts; ++i) { - if (Indices[i] == -1) - continue; - if ((unsigned)Indices[i] < InVecNumElts) - MaskSelects0 = true; - else - MaskSelects1 = true; + if (!Scalable) { + ShuffleVectorInst::getShuffleMask(Mask, Indices); + assert(MaskEltCount.Min == Indices.size() && + "Size of Indices not same as number of mask elements?"); + } + + if (!Scalable) { + // Canonicalization: If mask does not select elements from an input vector, + // replace that input vector with undef. + bool MaskSelects0 = false, MaskSelects1 = false; + for (unsigned i = 0; i != MaskEltCount.Min; ++i) { + if (Indices[i] == -1) + continue; + if ((unsigned)Indices[i] < InVecEltCount.Min) + MaskSelects0 = true; + else + MaskSelects1 = true; + } + if (!MaskSelects0) + Op0 = UndefValue::get(InVecTy); + if (!MaskSelects1) + Op1 = UndefValue::get(InVecTy); } - if (!MaskSelects0) - Op0 = UndefValue::get(InVecTy); - if (!MaskSelects1) - Op1 = UndefValue::get(InVecTy); auto *Op0Const = dyn_cast(Op0); auto *Op1Const = dyn_cast(Op1); - // If all operands are constant, constant fold the shuffle. - if (Op0Const && Op1Const) + // If all operands are constant, constant fold the shuffle. This + // transformation depends on the value of the mask which is not known at + // compile time for scalable vectors + if (!Scalable && Op0Const && Op1Const) return ConstantFoldShuffleVectorInstruction(Op0Const, Op1Const, Mask); // Canonicalization: if only one input vector is constant, it shall be the - // second one. - if (Op0Const && !Op1Const) { + // second one. This transformation depends on the value of the mask which + // is not known at compile time for scalable vectors + if (!Scalable && Op0Const && !Op1Const) { std::swap(Op0, Op1); - ShuffleVectorInst::commuteShuffleMask(Indices, InVecNumElts); + ShuffleVectorInst::commuteShuffleMask(Indices, InVecEltCount.Min); } // A splat of an inserted scalar constant becomes a vector constant: // shuf (inselt ?, C, IndexC), undef, --> // NOTE: We may have commuted above, so analyze the updated Indices, not the // original mask constant. + // NOTE: This transformation depends on the value of the mask which is not + // known at compile time for scalable vectors Constant *C; ConstantInt *IndexC; - if (match(Op0, m_InsertElement(m_Value(), m_Constant(C), - m_ConstantInt(IndexC)))) { + if (!Scalable && match(Op0, m_InsertElement(m_Value(), m_Constant(C), + m_ConstantInt(IndexC)))) { // Match a splat shuffle mask of the insert index allowing undef elements. int InsertIndex = IndexC->getZExtValue(); if (all_of(Indices, [InsertIndex](int MaskElt) { @@ -4499,8 +4513,8 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, assert(isa(Op1) && "Expected undef operand 1 for splat"); // Shuffle mask undefs become undefined constant result elements. - SmallVector VecC(MaskNumElts, C); - for (unsigned i = 0; i != MaskNumElts; ++i) + SmallVector VecC(MaskEltCount.Min, C); + for (unsigned i = 0; i != MaskEltCount.Min; ++i) if (Indices[i] == -1) VecC[i] = UndefValue::get(C->getType()); return ConstantVector::get(VecC); @@ -4514,6 +4528,11 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, OpShuf->getMask()->getSplatValue()) return Op0; + // All remaining transformation depend on the value of the mask, which is + // not known at compile time for scalable vectors. + if (Scalable) + return nullptr; + // Don't fold a shuffle with undef mask elements. This may get folded in a // better way using demanded bits or other analysis. // TODO: Should we allow this? @@ -4525,7 +4544,7 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, // shuffle. This handles simple identity shuffles as well as chains of // shuffles that may widen/narrow and/or move elements across lanes and back. Value *RootVec = nullptr; - for (unsigned i = 0; i != MaskNumElts; ++i) { + for (unsigned i = 0; i != MaskEltCount.Min; ++i) { // Note that recursion is limited for each vector element, so if any element // exceeds the limit, this will fail to simplify. RootVec = diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 5b4c810..1761cb6 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -166,6 +166,11 @@ static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) { static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS) { + // The length of scalable vectors is unknown at compile time, thus we + // cannot check their values + if (Shuf->getMask()->getType()->getVectorElementCount().Scalable) + return false; + int NumElts = Shuf->getOperand(0)->getType()->getVectorNumElements(); int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements(); DemandedLHS = DemandedRHS = APInt::getNullValue(NumElts); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 1fc965a..b22e7cb 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -7224,8 +7224,8 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { bool AteExtraComma = false; // GEP returns a vector of pointers if at least one of parameters is a vector. // All vector parameters should have the same vector width. - unsigned GEPWidth = BaseType->isVectorTy() ? - BaseType->getVectorNumElements() : 0; + ElementCount GEPWidth = BaseType->isVectorTy() ? + BaseType->getVectorElementCount() : ElementCount(0, false); while (EatIfPresent(lltok::comma)) { if (Lex.getKind() == lltok::MetadataVar) { @@ -7237,8 +7237,8 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { return Error(EltLoc, "getelementptr index must be an integer"); if (Val->getType()->isVectorTy()) { - unsigned ValNumEl = Val->getType()->getVectorNumElements(); - if (GEPWidth && GEPWidth != ValNumEl) + ElementCount ValNumEl = Val->getType()->getVectorElementCount(); + if (GEPWidth != ElementCount(0, false) && GEPWidth != ValNumEl) return Error(EltLoc, "getelementptr vector index has a wrong number of elements"); GEPWidth = ValNumEl; diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 9fd7dbd..cd037f0b 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -863,12 +863,12 @@ Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val, Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2, Constant *Mask) { - unsigned MaskNumElts = Mask->getType()->getVectorNumElements(); + ElementCount MaskEltCount = Mask->getType()->getVectorElementCount(); Type *EltTy = V1->getType()->getVectorElementType(); // Undefined shuffle mask -> undefined value. if (isa(Mask)) - return UndefValue::get(VectorType::get(EltTy, MaskNumElts)); + return UndefValue::get(VectorType::get(EltTy, MaskEltCount)); // Don't break the bitcode reader hack. if (isa(Mask)) return nullptr; @@ -879,6 +879,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, if (ValTy->isScalable()) return nullptr; + unsigned MaskNumElts = MaskEltCount.Min; unsigned SrcNumElts = V1->getType()->getVectorNumElements(); // Loop over the shuffle mask, evaluating each element. diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index e93f4b3..3886494 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -1887,6 +1887,8 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, int ShuffleVectorInst::getMaskValue(const Constant *Mask, unsigned i) { assert(i < Mask->getType()->getVectorNumElements() && "Index out of range"); + assert(!Mask->getType()->getVectorElementCount().Scalable && + "Length of scalable vectors unknown at compile time"); if (auto *CDS = dyn_cast(Mask)) return CDS->getElementAsInteger(i); Constant *C = Mask->getAggregateElement(i); @@ -1897,6 +1899,8 @@ int ShuffleVectorInst::getMaskValue(const Constant *Mask, unsigned i) { void ShuffleVectorInst::getShuffleMask(const Constant *Mask, SmallVectorImpl &Result) { + assert(!Mask->getType()->getVectorElementCount().Scalable && + "Length of scalable vectors unknown at compile time"); unsigned NumElts = Mask->getType()->getVectorNumElements(); if (auto *CDS = dyn_cast(Mask)) { diff --git a/llvm/test/Analysis/ConstantFolding/vscale-getelementptr.ll b/llvm/test/Analysis/ConstantFolding/vscale-getelementptr.ll new file mode 100644 index 0000000..8e90961 --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/vscale-getelementptr.ll @@ -0,0 +1,32 @@ +; RUN: opt -early-cse -S < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64" + +; CHECK-LABEL: define <4 x i32*> @fixed_length_version_first() { +; CHECK-NEXT: ret <4 x i32*> undef +define <4 x i32*> @fixed_length_version_first() { + %ptr = getelementptr i32, <4 x i32*> undef, <4 x i64> undef + ret <4 x i32*> %ptr +} + +; CHECK-LABEL: define <4 x <4 x i32>*> @fixed_length_version_second() { +; CHECK-NEXT: ret <4 x <4 x i32>*> undef +define <4 x <4 x i32>*> @fixed_length_version_second() { + %ptr = getelementptr <4 x i32>, <4 x i32>* undef, <4 x i64> undef + ret <4 x <4 x i32>*> %ptr +} + +; CHECK-LABEL: define @vscale_version_first() { +; CHECK-NEXT: ret undef +define @vscale_version_first() { + %ptr = getelementptr i32, undef, undef + ret %ptr +} + +; CHECK-LABEL: define *> @vscale_version_second() { +; CHECK-NEXT: ret *> undef +define *> @vscale_version_second() { + %ptr = getelementptr , * undef, undef + ret *> %ptr +} diff --git a/llvm/test/Analysis/ConstantFolding/vscale-shufflevector.ll b/llvm/test/Analysis/ConstantFolding/vscale-shufflevector.ll new file mode 100644 index 0000000..dc3b66e --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/vscale-shufflevector.ll @@ -0,0 +1,41 @@ +; RUN: opt -early-cse -S < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64" + +; This test checks that SimplifyInstruction does not blow up in the face of +; a scalable shufflevector. vscale is a constant value known only at runtime. +; Therefore, it is not possible to know the concrete value of, or the length +; of the mask at compile time. Simplifications that depend on the value +; of the mask cannot be performed. + +; Given the fact that the value of the mask is unknown at compile time for +; scalable vectors, very few simplifications will be done. Here, we want to +; see that the instruction can be passed to SimplifyInstruction and not crash +; the compiler. It happens to be the case that this will be the result. + +; CHECK-LABEL: define @vscale_version() +; CHECK-NEXT: %splatter = insertelement undef, i1 true, i32 0 +; CHECK-NEXT: %foo = shufflevector %splatter, undef, zeroinitializer +; CHECK-NEXT: ret %foo + +define @vscale_version() { + %splatter = insertelement undef, i1 true, i32 0 + %foo = shufflevector %splatter, + undef, + zeroinitializer + ret %foo +} + +; The non-scalable version should be optimized as normal. + +; CHECK-LABEL: define <8 x i1> @fixed_length_version() { +; CHECK-NEXT: ret <8 x i1> +define <8 x i1> @fixed_length_version() { + %splatter = insertelement <8 x i1> undef, i1 true, i32 0 + %foo = shufflevector <8 x i1> %splatter, + <8 x i1> undef, + <8 x i32> zeroinitializer + ret <8 x i1> %foo +} + -- 2.7.4