/// Checks if the given value is actually an undefined constant vector.
/// Also, if the\p ShuffleMask is not empty, tries to check if the non-masked
/// elements actually mask the insertelement buildvector, if any.
-static bool isUndefVector(const Value *V, ArrayRef<int> ShuffleMask = None) {
- if (isa<UndefValue>(V))
- return true;
+template <bool IsPoisonOnly = false>
+static SmallBitVector isUndefVector(const Value *V,
+ ArrayRef<int> ShuffleMask = None) {
+ SmallBitVector Res(ShuffleMask.empty() ? 1 : ShuffleMask.size(), true);
+ using T =
+ typename std::conditional<IsPoisonOnly, PoisonValue, UndefValue>::type;
+ if (isa<T>(V))
+ return Res;
auto *VecTy = dyn_cast<FixedVectorType>(V->getType());
if (!VecTy)
- return false;
+ return Res.reset();
auto *C = dyn_cast<Constant>(V);
if (!C) {
if (!ShuffleMask.empty()) {
const Value *Base = V;
while (auto *II = dyn_cast<InsertElementInst>(Base)) {
+ if (isa<T>(II->getOperand(1)))
+ continue;
Base = II->getOperand(0);
Optional<unsigned> Idx = getInsertIndex(II);
if (!Idx)
continue;
if (*Idx < ShuffleMask.size() && ShuffleMask[*Idx] == UndefMaskElem)
- return false;
+ Res.reset(*Idx);
}
- return V != Base && isUndefVector(Base);
+ // TODO: Add analysis for shuffles here too.
+ if (V == Base) {
+ Res.reset();
+ } else {
+ SmallVector<int> SubMask(ShuffleMask.size(), UndefMaskElem);
+ Res &= isUndefVector<IsPoisonOnly>(Base, SubMask);
+ }
+ } else {
+ Res.reset();
}
- return false;
+ return Res;
}
for (unsigned I = 0, E = VecTy->getNumElements(); I != E; ++I) {
if (Constant *Elem = C->getAggregateElement(I))
- if (!isa<UndefValue>(Elem) &&
+ if (!isa<T>(Elem) &&
(ShuffleMask.empty() ||
(I < ShuffleMask.size() && ShuffleMask[I] == UndefMaskElem)))
- return false;
+ Res.reset(I);
}
- return true;
+ return Res;
}
/// Checks if the vector of instructions can be represented as a shuffle, like:
return None;
auto *Vec = EI->getVectorOperand();
// We can extractelement from undef or poison vector.
- if (isUndefVector(Vec))
+ if (isUndefVector(Vec).all())
continue;
// All vector operands must have the same number of vector elements.
if (cast<FixedVectorType>(Vec->getType())->getNumElements() != Size)
// Undefs are always profitable for extractelements.
if (!Ex2Idx)
return LookAheadHeuristics::ScoreConsecutiveExtracts;
- if (isUndefVector(EV2) && EV2->getType() == EV1->getType())
+ if (isUndefVector(EV2).all() && EV2->getType() == EV1->getType())
return LookAheadHeuristics::ScoreConsecutiveExtracts;
if (EV2 == EV1) {
int Idx1 = Ex1Idx->getZExtValue();
if (isa<ExtractElementInst, UndefValue>(V))
Key = hash_value(Value::UndefValueVal + 1);
if (auto *EI = dyn_cast<ExtractElementInst>(V)) {
- if (!isUndefVector(EI->getVectorOperand()) &&
+ if (!isUndefVector(EI->getVectorOperand()).all() &&
!isa<UndefValue>(EI->getIndexOperand()))
SubKey = hash_value(EI->getVectorOperand());
}
// initial vector or inserting a subvector.
// TODO: Implement the analysis of the FirstInsert->getOperand(0)
// subvector of ActualVecTy.
- if (!isUndefVector(FirstInsert->getOperand(0), InsertMask) &&
- NumScalars != NumElts && !IsWholeSubvector) {
+ SmallBitVector InMask =
+ isUndefVector(FirstInsert->getOperand(0), InsertMask);
+ if (!InMask.all() && NumScalars != NumElts && !IsWholeSubvector) {
if (InsertVecSz != VecSz) {
auto *ActualVecTy =
FixedVectorType::get(SrcVecTy->getElementType(), VecSz);
CostKind, OffsetBeg - Offset, InsertVecTy);
} else {
for (unsigned I = 0, End = OffsetBeg - Offset; I < End; ++I)
- Mask[I] = I;
+ Mask[I] = InMask.test(I) ? UndefMaskElem : I;
for (unsigned I = OffsetBeg - Offset, End = OffsetEnd - Offset;
I <= End; ++I)
if (Mask[I] != UndefMaskElem)
Mask[I] = I + VecSz;
for (unsigned I = OffsetEnd + 1 - Offset; I < VecSz; ++I)
- Mask[I] = I;
+ Mask[I] = InMask.test(I) ? UndefMaskElem : I;
Cost += TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, InsertVecTy, Mask);
}
}
SmallVector<int> Mask(ShuffleMask.begin()->second);
auto VMIt = std::next(ShuffleMask.begin());
T *Prev = nullptr;
- bool IsBaseNotUndef = !isUndefVector(Base, Mask);
- if (IsBaseNotUndef) {
+ SmallBitVector IsBaseUndef = isUndefVector(Base, Mask);
+ if (!IsBaseUndef.all()) {
// Base is not undef, need to combine it with the next subvectors.
std::pair<T *, bool> Res =
ResizeAction(ShuffleMask.begin()->first, Mask, /*ForSingleMask=*/false);
+ SmallBitVector IsBasePoison = isUndefVector<true>(Base, Mask);
for (unsigned Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
if (Mask[Idx] == UndefMaskElem)
- Mask[Idx] = Idx;
+ Mask[Idx] = IsBasePoison.test(Idx) ? UndefMaskElem : Idx;
else
Mask[Idx] = (Res.second ? Idx : Mask[Idx]) + VF;
}
}
VMIt = std::next(VMIt);
}
+ bool IsBaseNotUndef = !IsBaseUndef.all();
// Perform requested actions for the remaining masks/vectors.
for (auto E = ShuffleMask.end(); VMIt != E; ++VMIt) {
// Shuffle other input vectors, if any.
if (Mask[I] != UndefMaskElem)
InsertMask[Offset + I] = I;
}
- bool IsFirstUndef = isUndefVector(FirstInsert->getOperand(0), InsertMask);
- if ((!IsIdentity || Offset != 0 || !IsFirstUndef) &&
+ SmallBitVector IsFirstUndef =
+ isUndefVector(FirstInsert->getOperand(0), InsertMask);
+ if ((!IsIdentity || Offset != 0 || !IsFirstUndef.all()) &&
NumElts != NumScalars) {
- if (IsFirstUndef) {
+ if (IsFirstUndef.all()) {
if (!ShuffleVectorInst::isIdentityMask(InsertMask)) {
+ SmallBitVector IsFirstPoison =
+ isUndefVector<true>(FirstInsert->getOperand(0), InsertMask);
+ if (!IsFirstPoison.all()) {
+ for (unsigned I = 0; I < NumElts; I++) {
+ if (InsertMask[I] == UndefMaskElem && !IsFirstPoison.test(I))
+ InsertMask[I] = I + NumElts;
+ }
+ }
V = Builder.CreateShuffleVector(
- V, InsertMask, cast<Instruction>(E->Scalars.back())->getName());
+ V,
+ IsFirstPoison.all() ? PoisonValue::get(V->getType())
+ : FirstInsert->getOperand(0),
+ InsertMask, cast<Instruction>(E->Scalars.back())->getName());
if (auto *I = dyn_cast<Instruction>(V)) {
GatherShuffleExtractSeq.insert(I);
CSEBlocks.insert(I->getParent());
}
- // Create freeze for undef values.
- if (!isa<PoisonValue>(FirstInsert->getOperand(0)))
- V = Builder.CreateFreeze(V);
}
} else {
+ SmallBitVector IsFirstPoison =
+ isUndefVector<true>(FirstInsert->getOperand(0), InsertMask);
for (unsigned I = 0; I < NumElts; I++) {
if (InsertMask[I] == UndefMaskElem)
- InsertMask[I] = I;
+ InsertMask[I] = IsFirstPoison.test(I) ? UndefMaskElem : I;
else
InsertMask[I] += NumElts;
}
if (IsIdentityMask(Mask, cast<FixedVectorType>(SV->getType())) ||
SV->isZeroEltSplat())
break;
- bool IsOp1Undef = isUndefVector(SV->getOperand(0), Mask);
- bool IsOp2Undef = isUndefVector(SV->getOperand(1), Mask);
+ bool IsOp1Undef = isUndefVector(SV->getOperand(0), Mask).all();
+ bool IsOp2Undef = isUndefVector(SV->getOperand(1), Mask).all();
if (!IsOp1Undef && !IsOp2Undef)
break;
SmallVector<int> ShuffleMask(SV->getShuffleMask().begin(),
&CombineMasks](Value *V1, Value *V2,
ArrayRef<int> Mask) -> Value * {
assert(V1 && "Expected at least one vector value.");
- if (V2 && !isUndefVector(V2, Mask)) {
+ if (V2 && !isUndefVector(V2, Mask).all()) {
// Peek through shuffles.
Value *Op1 = V1;
Value *Op2 = V2;
if (SV1->getOperand(0)->getType() ==
SV2->getOperand(0)->getType() &&
SV1->getOperand(0)->getType() != SV1->getType() &&
- isUndefVector(SV1->getOperand(1), CombinedMask1) &&
- isUndefVector(SV2->getOperand(1), CombinedMask2)) {
+ isUndefVector(SV1->getOperand(1), CombinedMask1).all() &&
+ isUndefVector(SV2->getOperand(1), CombinedMask2).all()) {
Op1 = SV1->getOperand(0);
Op2 = SV2->getOperand(0);
SmallVector<int> ShuffleMask1(SV1->getShuffleMask().begin(),
; CHECK-NEXT: [[LD4:%.*]] = load float, float* [[GEP4]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <4 x i32> <i32 0, i32 4, i32 5, i32 3>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <4 x i32> <i32 0, i32 4, i32 5, i32 undef>
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[LD4]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[LD1]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[LD1]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP7:%.*]] = freeze <8 x i32> [[VECINS_I_5_I1]]
+; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 14, i32 15>
; CHECK-NEXT: ret void
;
entry:
; CHECK-LABEL: @b(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> poison, float 0x7FF8000000000000, i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> <float 0xFFF8000000000000, float 0xFFF8000000000000, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 5, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> <float 0xFFF8000000000000, float 0xFFF8000000000000, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 5, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 0x7FF8000000000000, i32 3
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP2]], <4 x float> zeroinitializer, <4 x float> zeroinitializer)
; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x float> [[TMP3]], <float undef, float undef, float undef, float 2.000000e+00>
; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP13]], <2 x float> [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
-; CHECK-NEXT: [[TMP19:%.*]] = freeze <4 x float> [[RD1]]
-; CHECK-NEXT: ret <4 x float> [[TMP19]]
+; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> undef, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
+; CHECK-NEXT: ret <4 x float> [[RD1]]
;
%c0 = extractelement <4 x i32> %c, i32 0
%c1 = extractelement <4 x i32> %c, i32 1
; CHECK-NEXT: [[TMP4:%.*]] = lshr <8 x i32> [[SHUFFLE]], <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[SHR_4_I_I]], i32 5
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SHR_5_I_I]], i32 6
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[SHR_6_I_I]], i32 7
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, <2 x ptr> [[TMP5]], <2 x i64> <i64 32, i64 24>
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG]], i32 3
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x ptr> [[TMP6]], <2 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x ptr> [[TMP7]], <4 x ptr> [[TMP8]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x ptr> [[TMP7]], <4 x ptr> [[TMP8]], <4 x i32> <i32 4, i32 5, i32 undef, i32 3>
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x ptr> [[TMP9]], ptr [[ARG_1]], i32 2
; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP10]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> poison)
; CHECK-NEXT: br label [[JOIN]]
; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[TMP6]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i32> [[TMP12]], <8 x i32> [[TMP13]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[ARRAYIDX2]] to <8 x i32>*
; CHECK-NEXT: [[T67:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T32]], i32 2
; CHECK-NEXT: [[T68:%.*]] = insertelement <8 x i32> [[T67]], i32 [[T49]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 undef, i32 undef>
; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6
; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7
; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>