return Cost;
}
+/// Shuffles \p Mask in accordance with the given \p SubMask.
+static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask) {
+ if (SubMask.empty())
+ return;
+ if (Mask.empty()) {
+ Mask.append(SubMask.begin(), SubMask.end());
+ return;
+ }
+ SmallVector<int, 4> NewMask(SubMask.size(), SubMask.size());
+ int TermValue = std::min(Mask.size(), SubMask.size());
+ for (int I = 0, E = SubMask.size(); I < E; ++I) {
+ if (SubMask[I] >= TermValue || SubMask[I] == UndefMaskElem ||
+ Mask[SubMask[I]] >= TermValue) {
+ NewMask[I] = UndefMaskElem;
+ continue;
+ }
+ NewMask[I] = Mask[SubMask[I]];
+ }
+ Mask.swap(NewMask);
+}
+
InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
ArrayRef<Value *> VectorizedVals) {
ArrayRef<Value*> VL = E->Scalars;
else if (auto *IE = dyn_cast<InsertElementInst>(VL[0]))
ScalarTy = IE->getOperand(1)->getType();
auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
+ auto *FinalVecTy = VecTy;
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
// If we have computed a smaller type for the expression, update VecTy so
unsigned ReuseShuffleNumbers = E->ReuseShuffleIndices.size();
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
- InstructionCost ReuseShuffleCost = 0;
- if (NeedToShuffleReuses) {
- ReuseShuffleCost =
- TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy,
- E->ReuseShuffleIndices);
- }
+ if (NeedToShuffleReuses)
+ FinalVecTy =
+ FixedVectorType::get(VecTy->getElementType(), ReuseShuffleNumbers);
// FIXME: it tries to fix a problem with MSVC buildbots.
TargetTransformInfo &TTIRef = *TTI;
auto &&AdjustExtractsCost = [this, &TTIRef, CostKind, VL, VecTy,
dbgs()
<< "SLP: perfect diamond match for gather bundle that starts with "
<< *VL.front() << ".\n");
+ if (NeedToShuffleReuses)
+ GatherCost =
+ TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
+ FinalVecTy, E->ReuseShuffleIndices);
} else {
LLVM_DEBUG(dbgs() << "SLP: shuffled " << Entries.size()
<< " entries for bundle that starts with "
// Detected that instead of gather we can emit a shuffle of single/two
// previously vectorized nodes. Add the cost of the permutation rather
// than gather.
- GatherCost = TTI->getShuffleCost(*Shuffle, VecTy, Mask);
+ ::addMask(Mask, E->ReuseShuffleIndices);
+ GatherCost = TTI->getShuffleCost(*Shuffle, FinalVecTy, Mask);
}
- return ReuseShuffleCost + GatherCost;
+ return GatherCost;
}
if (isSplat(VL)) {
// Found the broadcasting of the single scalar, calculate the cost as the
// broadcast.
- return ReuseShuffleCost +
- TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, None,
- 0);
+ return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy);
}
if (E->getOpcode() == Instruction::ExtractElement && allSameType(VL) &&
allSameBlock(VL) &&
InstructionCost Cost =
computeExtractCost(VL, VecTy, *ShuffleKind, Mask, *TTI);
AdjustExtractsCost(Cost, /*IsGather=*/true);
- return ReuseShuffleCost + Cost;
+ if (NeedToShuffleReuses)
+ Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
+ FinalVecTy, E->ReuseShuffleIndices);
+ return Cost;
}
}
+ InstructionCost ReuseShuffleCost = 0;
+ if (NeedToShuffleReuses)
+ ReuseShuffleCost = TTI->getShuffleCost(
+ TTI::SK_PermuteSingleSrc, FinalVecTy, E->ReuseShuffleIndices);
return ReuseShuffleCost + getGatherCost(VL);
}
+ InstructionCost CommonCost = 0;
+ SmallVector<int> Mask;
+ if (!E->ReorderIndices.empty()) {
+ SmallVector<int> NewMask;
+ if (E->getOpcode() == Instruction::Store) {
+ // For stores the order is actually a mask.
+ NewMask.resize(E->ReorderIndices.size());
+ copy(E->ReorderIndices, NewMask.begin());
+ } else {
+ inversePermutation(E->ReorderIndices, NewMask);
+ }
+ ::addMask(Mask, NewMask);
+ }
+ if (NeedToShuffleReuses)
+ ::addMask(Mask, E->ReuseShuffleIndices);
+ if (!Mask.empty() && !ShuffleVectorInst::isIdentityMask(Mask))
+ CommonCost =
+ TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, FinalVecTy, Mask);
assert((E->State == TreeEntry::Vectorize ||
E->State == TreeEntry::ScatterVectorize) &&
"Unhandled state");
for (unsigned I : E->ReuseShuffleIndices) {
if (ShuffleOrOp == Instruction::ExtractElement) {
auto *EE = cast<ExtractElementInst>(VL[I]);
- ReuseShuffleCost -= TTI->getVectorInstrCost(
- Instruction::ExtractElement, EE->getVectorOperandType(),
- *getExtractIndex(EE));
+ CommonCost -= TTI->getVectorInstrCost(Instruction::ExtractElement,
+ EE->getVectorOperandType(),
+ *getExtractIndex(EE));
} else {
- ReuseShuffleCost -= TTI->getVectorInstrCost(
- Instruction::ExtractElement, VecTy, Idx);
+ CommonCost -= TTI->getVectorInstrCost(Instruction::ExtractElement,
+ VecTy, Idx);
++Idx;
}
}
for (Value *V : VL) {
if (ShuffleOrOp == Instruction::ExtractElement) {
auto *EE = cast<ExtractElementInst>(V);
- ReuseShuffleCost += TTI->getVectorInstrCost(
- Instruction::ExtractElement, EE->getVectorOperandType(),
- *getExtractIndex(EE));
+ CommonCost += TTI->getVectorInstrCost(Instruction::ExtractElement,
+ EE->getVectorOperandType(),
+ *getExtractIndex(EE));
} else {
--Idx;
- ReuseShuffleCost += TTI->getVectorInstrCost(
- Instruction::ExtractElement, VecTy, Idx);
+ CommonCost += TTI->getVectorInstrCost(Instruction::ExtractElement,
+ VecTy, Idx);
}
}
- CommonCost = ReuseShuffleCost;
- } else if (!E->ReorderIndices.empty()) {
- SmallVector<int> NewMask;
- inversePermutation(E->ReorderIndices, NewMask);
- CommonCost = TTI->getShuffleCost(
- TargetTransformInfo::SK_PermuteSingleSrc, VecTy, NewMask);
}
if (ShuffleOrOp == Instruction::ExtractValue) {
for (unsigned I = 0, E = VL.size(); I < E; ++I) {
TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy,
TTI::getCastContextHint(VL0), CostKind, VL0);
if (NeedToShuffleReuses) {
- ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
+ CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
// Calculate the cost of this instruction.
InstructionCost VecCost = 0;
// Check if the values are candidates to demote.
if (!MinBWs.count(VL0) || VecTy != SrcVecTy) {
- VecCost =
- ReuseShuffleCost +
- TTI->getCastInstrCost(E->getOpcode(), VecTy, SrcVecTy,
- TTI::getCastContextHint(VL0), CostKind, VL0);
+ VecCost = CommonCost + TTI->getCastInstrCost(
+ E->getOpcode(), VecTy, SrcVecTy,
+ TTI::getCastContextHint(VL0), CostKind, VL0);
}
- LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecCost, ScalarCost));
+ LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
return VecCost - ScalarCost;
}
case Instruction::FCmp:
TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, Builder.getInt1Ty(),
CmpInst::BAD_ICMP_PREDICATE, CostKind, VL0);
if (NeedToShuffleReuses) {
- ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
+ CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
CmpInst::BAD_ICMP_PREDICATE, CostKind);
VecCost = std::min(VecCost, IntrinsicCost);
}
- LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecCost, ScalarCost));
- return ReuseShuffleCost + VecCost - ScalarCost;
+ LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
+ return CommonCost + VecCost - ScalarCost;
}
case Instruction::FNeg:
case Instruction::Add:
TTI->getArithmeticInstrCost(E->getOpcode(), ScalarTy, CostKind, Op1VK,
Op2VK, Op1VP, Op2VP, Operands, VL0);
if (NeedToShuffleReuses) {
- ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
+ CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecCost =
TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind, Op1VK,
Op2VK, Op1VP, Op2VP, Operands, VL0);
- LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecCost, ScalarCost));
- return ReuseShuffleCost + VecCost - ScalarCost;
+ LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
+ return CommonCost + VecCost - ScalarCost;
}
case Instruction::GetElementPtr: {
TargetTransformInfo::OperandValueKind Op1VK =
InstructionCost ScalarEltCost = TTI->getArithmeticInstrCost(
Instruction::Add, ScalarTy, CostKind, Op1VK, Op2VK);
if (NeedToShuffleReuses) {
- ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
+ CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecCost = TTI->getArithmeticInstrCost(
Instruction::Add, VecTy, CostKind, Op1VK, Op2VK);
- LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecCost, ScalarCost));
- return ReuseShuffleCost + VecCost - ScalarCost;
+ LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
+ return CommonCost + VecCost - ScalarCost;
}
case Instruction::Load: {
// Cost of wide load - cost of scalar loads.
InstructionCost ScalarEltCost = TTI->getMemoryOpCost(
Instruction::Load, ScalarTy, Alignment, 0, CostKind, VL0);
if (NeedToShuffleReuses) {
- ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
+ CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
InstructionCost ScalarLdCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecLdCost;
Instruction::Load, VecTy, cast<LoadInst>(VL0)->getPointerOperand(),
/*VariableMask=*/false, Alignment, CostKind, VL0);
}
- if (!NeedToShuffleReuses && !E->ReorderIndices.empty()) {
- SmallVector<int> NewMask;
- inversePermutation(E->ReorderIndices, NewMask);
- VecLdCost += TTI->getShuffleCost(
- TargetTransformInfo::SK_PermuteSingleSrc, VecTy, NewMask);
- }
- LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecLdCost, ScalarLdCost));
- return ReuseShuffleCost + VecLdCost - ScalarLdCost;
+ LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecLdCost, ScalarLdCost));
+ return CommonCost + VecLdCost - ScalarLdCost;
}
case Instruction::Store: {
// We know that we can merge the stores. Calculate the cost.
InstructionCost ScalarStCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecStCost = TTI->getMemoryOpCost(
Instruction::Store, VecTy, Alignment, 0, CostKind, VL0);
- if (IsReorder) {
- SmallVector<int> NewMask;
- inversePermutation(E->ReorderIndices, NewMask);
- VecStCost += TTI->getShuffleCost(
- TargetTransformInfo::SK_PermuteSingleSrc, VecTy, NewMask);
- }
- LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecStCost, ScalarStCost));
- return VecStCost - ScalarStCost;
+ LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecStCost, ScalarStCost));
+ return CommonCost + VecStCost - ScalarStCost;
}
case Instruction::Call: {
CallInst *CI = cast<CallInst>(VL0);
InstructionCost ScalarEltCost =
TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
if (NeedToShuffleReuses) {
- ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
+ CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
InstructionCost ScalarCallCost = VecTy->getNumElements() * ScalarEltCost;
<< " (" << VecCallCost << "-" << ScalarCallCost << ")"
<< " for " << *CI << "\n");
- return ReuseShuffleCost + VecCallCost - ScalarCallCost;
+ return CommonCost + VecCallCost - ScalarCallCost;
}
case Instruction::ShuffleVector: {
assert(E->isAltShuffle() &&
if (NeedToShuffleReuses) {
for (unsigned Idx : E->ReuseShuffleIndices) {
Instruction *I = cast<Instruction>(VL[Idx]);
- ReuseShuffleCost -= TTI->getInstructionCost(I, CostKind);
+ CommonCost -= TTI->getInstructionCost(I, CostKind);
}
for (Value *V : VL) {
Instruction *I = cast<Instruction>(V);
- ReuseShuffleCost += TTI->getInstructionCost(I, CostKind);
+ CommonCost += TTI->getInstructionCost(I, CostKind);
}
}
for (Value *V : VL) {
}
VecCost +=
TTI->getShuffleCost(TargetTransformInfo::SK_Select, VecTy, Mask, 0);
- LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecCost, ScalarCost));
- return ReuseShuffleCost + VecCost - ScalarCost;
+ LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
+ return CommonCost + VecCost - ScalarCost;
}
default:
llvm_unreachable("Unknown instruction");
addMask(NewMask);
}
- void addMask(ArrayRef<int> SubMask) {
- if (SubMask.empty())
- return;
- if (Mask.empty()) {
- Mask.append(SubMask.begin(), SubMask.end());
- return;
- }
- SmallVector<int, 4> NewMask(SubMask.size(), SubMask.size());
- int TermValue = std::min(Mask.size(), SubMask.size());
- for (int I = 0, E = SubMask.size(); I < E; ++I) {
- if (SubMask[I] >= TermValue || SubMask[I] == UndefMaskElem ||
- Mask[SubMask[I]] >= TermValue) {
- NewMask[I] = UndefMaskElem;
- continue;
- }
- NewMask[I] = Mask[SubMask[I]];
- }
- Mask.swap(NewMask);
- }
+ void addMask(ArrayRef<int> SubMask) { ::addMask(Mask, SubMask); }
Value *finalize(Value *V) {
IsFinalized = true;
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -slp-vectorizer -mtriple=x86_64-- -S | FileCheck %s --check-prefixes=CHECK,SSE
-; RUN: opt < %s -slp-vectorizer -mtriple=x86_64-- -mattr=avx512vl -S | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: opt < %s -slp-vectorizer -mtriple=x86_64-- -S | FileCheck %s
+; RUN: opt < %s -slp-vectorizer -mtriple=x86_64-- -mattr=avx512vl -S | FileCheck %s
declare void @use1(i1)
}
define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) {
-; SSE-LABEL: @logical_and_icmp_clamp_v8i32(
-; SSE-NEXT: [[X0:%.*]] = extractelement <8 x i32> [[X:%.*]], i32 0
-; SSE-NEXT: [[X1:%.*]] = extractelement <8 x i32> [[X]], i32 1
-; SSE-NEXT: [[X2:%.*]] = extractelement <8 x i32> [[X]], i32 2
-; SSE-NEXT: [[X3:%.*]] = extractelement <8 x i32> [[X]], i32 3
-; SSE-NEXT: [[Y0:%.*]] = extractelement <8 x i32> [[Y:%.*]], i32 0
-; SSE-NEXT: [[Y1:%.*]] = extractelement <8 x i32> [[Y]], i32 1
-; SSE-NEXT: [[Y2:%.*]] = extractelement <8 x i32> [[Y]], i32 2
-; SSE-NEXT: [[Y3:%.*]] = extractelement <8 x i32> [[Y]], i32 3
-; SSE-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[X0]], i32 0
-; SSE-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[X1]], i32 1
-; SSE-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[X2]], i32 2
-; SSE-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[X3]], i32 3
-; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 poison, i32 poison, i32 poison, i32 poison>, i32 [[Y0]], i32 4
-; SSE-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[Y1]], i32 5
-; SSE-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[Y2]], i32 6
-; SSE-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[Y3]], i32 7
-; SSE-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[SHUFFLE]], [[TMP8]]
-; SSE-NEXT: [[TMP10:%.*]] = freeze <8 x i1> [[TMP9]]
-; SSE-NEXT: [[TMP11:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP10]])
-; SSE-NEXT: ret i1 [[TMP11]]
-;
-; AVX512-LABEL: @logical_and_icmp_clamp_v8i32(
-; AVX512-NEXT: [[X0:%.*]] = extractelement <8 x i32> [[X:%.*]], i32 0
-; AVX512-NEXT: [[X1:%.*]] = extractelement <8 x i32> [[X]], i32 1
-; AVX512-NEXT: [[X2:%.*]] = extractelement <8 x i32> [[X]], i32 2
-; AVX512-NEXT: [[X3:%.*]] = extractelement <8 x i32> [[X]], i32 3
-; AVX512-NEXT: [[Y0:%.*]] = extractelement <8 x i32> [[Y:%.*]], i32 0
-; AVX512-NEXT: [[Y1:%.*]] = extractelement <8 x i32> [[Y]], i32 1
-; AVX512-NEXT: [[Y2:%.*]] = extractelement <8 x i32> [[Y]], i32 2
-; AVX512-NEXT: [[Y3:%.*]] = extractelement <8 x i32> [[Y]], i32 3
-; AVX512-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[X0]], i32 0
-; AVX512-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[X1]], i32 1
-; AVX512-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[X2]], i32 2
-; AVX512-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X3]], i32 3
-; AVX512-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], <i32 42, i32 42, i32 42, i32 42>
-; AVX512-NEXT: [[D0:%.*]] = icmp slt i32 [[X0]], [[Y0]]
-; AVX512-NEXT: [[D1:%.*]] = icmp slt i32 [[X1]], [[Y1]]
-; AVX512-NEXT: [[D2:%.*]] = icmp slt i32 [[X2]], [[Y2]]
-; AVX512-NEXT: [[D3:%.*]] = icmp slt i32 [[X3]], [[Y3]]
-; AVX512-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
-; AVX512-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]])
-; AVX512-NEXT: [[S4:%.*]] = select i1 [[TMP7]], i1 [[D0]], i1 false
-; AVX512-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false
-; AVX512-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false
-; AVX512-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false
-; AVX512-NEXT: ret i1 [[S7]]
+; CHECK-LABEL: @logical_and_icmp_clamp_v8i32(
+; CHECK-NEXT: [[X0:%.*]] = extractelement <8 x i32> [[X:%.*]], i32 0
+; CHECK-NEXT: [[X1:%.*]] = extractelement <8 x i32> [[X]], i32 1
+; CHECK-NEXT: [[X2:%.*]] = extractelement <8 x i32> [[X]], i32 2
+; CHECK-NEXT: [[X3:%.*]] = extractelement <8 x i32> [[X]], i32 3
+; CHECK-NEXT: [[Y0:%.*]] = extractelement <8 x i32> [[Y:%.*]], i32 0
+; CHECK-NEXT: [[Y1:%.*]] = extractelement <8 x i32> [[Y]], i32 1
+; CHECK-NEXT: [[Y2:%.*]] = extractelement <8 x i32> [[Y]], i32 2
+; CHECK-NEXT: [[Y3:%.*]] = extractelement <8 x i32> [[Y]], i32 3
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[X0]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[X1]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[X2]], i32 2
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X3]], i32 3
+; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], <i32 42, i32 42, i32 42, i32 42>
+; CHECK-NEXT: [[D0:%.*]] = icmp slt i32 [[X0]], [[Y0]]
+; CHECK-NEXT: [[D1:%.*]] = icmp slt i32 [[X1]], [[Y1]]
+; CHECK-NEXT: [[D2:%.*]] = icmp slt i32 [[X2]], [[Y2]]
+; CHECK-NEXT: [[D3:%.*]] = icmp slt i32 [[X3]], [[Y3]]
+; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]])
+; CHECK-NEXT: [[S4:%.*]] = select i1 [[TMP7]], i1 [[D0]], i1 false
+; CHECK-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false
+; CHECK-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false
+; CHECK-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false
+; CHECK-NEXT: ret i1 [[S7]]
;
%x0 = extractelement <8 x i32> %x, i32 0
%x1 = extractelement <8 x i32> %x, i32 1