/// Vectorize a single entry in the tree.
Value *vectorizeTree(TreeEntry *E);
- /// Vectorize a single entry in the tree, the \p Idx-th operand of the entry
- /// \p E.
- Value *vectorizeOperand(TreeEntry *E, unsigned NodeIdx);
+ /// Vectorize a single entry in the tree, starting in \p VL.
+ Value *vectorizeTree(ArrayRef<Value *> VL);
/// Create a new vector from a list of scalar values. Produces a sequence
/// which exploits values reused across lanes, and arranges the inserts
/// for ease of later optimization.
- Value *createBuildVector(const TreeEntry *E);
+ Value *createBuildVector(ArrayRef<Value *> VL);
/// \returns the scalarization cost for this type. Scalarization in this
/// context means the creation of vectors from a group of scalars. If \p
return IsSame(Scalars, ReuseShuffleIndices);
}
- bool isOperandGatherNode(const EdgeInfo &UserEI) const {
- return State == TreeEntry::NeedToGather &&
- UserTreeIndices.front().EdgeIdx == UserEI.EdgeIdx &&
- UserTreeIndices.front().UserTE == UserEI.UserTE;
- }
-
/// \returns true if current entry has same operands as \p TE.
bool hasEqualOperands(const TreeEntry &TE) const {
if (TE.getNumOperands() != getNumOperands())
}
void BoUpSLP::reorderNodeWithReuses(TreeEntry &TE, ArrayRef<int> Mask) const {
- // Reorder reuses mask.
- reorderReuses(TE.ReuseShuffleIndices, Mask);
+ // For vectorized and non-clustered reused - just reorder reuses mask.
const unsigned Sz = TE.Scalars.size();
- // For vectorized and non-clustered reused no need to do anything else.
- if (TE.State != TreeEntry::NeedToGather ||
+ if (TE.State != TreeEntry::NeedToGather || !TE.ReorderIndices.empty() ||
!ShuffleVectorInst::isOneUseSingleSourceMask(TE.ReuseShuffleIndices,
Sz) ||
- !isRepeatedNonIdentityClusteredMask(TE.ReuseShuffleIndices, Sz))
+ !isRepeatedNonIdentityClusteredMask(TE.ReuseShuffleIndices, Sz)) {
+ reorderReuses(TE.ReuseShuffleIndices, Mask);
return;
- SmallVector<int> NewMask;
- inversePermutation(TE.ReorderIndices, NewMask);
- addMask(NewMask, TE.ReuseShuffleIndices);
- // Clear reorder since it is going to be applied to the new mask.
- TE.ReorderIndices.clear();
+ }
// Try to improve gathered nodes with clustered reuses, if possible.
- reorderScalars(TE.Scalars, makeArrayRef(NewMask).slice(0, Sz));
+ reorderScalars(TE.Scalars, makeArrayRef(TE.ReuseShuffleIndices).slice(0, Sz));
// Fill the reuses mask with the identity submasks.
for (auto *It = TE.ReuseShuffleIndices.begin(),
*End = TE.ReuseShuffleIndices.end();
};
} // namespace
-Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) {
- ArrayRef<Value *> VL = E->getOperand(NodeIdx);
+Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
const unsigned VF = VL.size();
InstructionsState S = getSameOpcode(VL, *TLI);
// Special processing for GEPs bundle, which may include non-gep values.
S = getSameOpcode(*It, *TLI);
}
if (S.getOpcode()) {
- if (TreeEntry *VE = getTreeEntry(S.OpValue); VE && VE->isSame(VL)) {
- assert((any_of(VE->UserTreeIndices,
- [E, NodeIdx](const EdgeInfo &EI) {
- return EI.EdgeIdx == NodeIdx && EI.UserTE == E;
- }) ||
- any_of(VectorizableTree,
- [E, NodeIdx, VE](const std::unique_ptr<TreeEntry> &TE) {
- return TE->isOperandGatherNode({E, NodeIdx}) &&
- VE->isSame(TE->Scalars);
- })) &&
- "Expected same vectorizable node.");
- Value *V = vectorizeTree(VE);
- if (VF != cast<FixedVectorType>(V->getType())->getNumElements()) {
- if (!VE->ReuseShuffleIndices.empty()) {
- // Reshuffle to get only unique values.
- // If some of the scalars are duplicated in the vectorization
- // tree entry, we do not vectorize them but instead generate a
- // mask for the reuses. But if there are several users of the
- // same entry, they may have different vectorization factors.
- // This is especially important for PHI nodes. In this case, we
- // need to adapt the resulting instruction for the user
- // vectorization factor and have to reshuffle it again to take
- // only unique elements of the vector. Without this code the
- // function incorrectly returns reduced vector instruction with
- // the same elements, not with the unique ones.
-
- // block:
- // %phi = phi <2 x > { .., %entry} {%shuffle, %block}
- // %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0>
- // ... (use %2)
- // %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0}
- // br %block
- SmallVector<int> UniqueIdxs(VF, UndefMaskElem);
- SmallSet<int, 4> UsedIdxs;
- int Pos = 0;
- for (int Idx : VE->ReuseShuffleIndices) {
- if (Idx != static_cast<int>(VF) && Idx != UndefMaskElem &&
- UsedIdxs.insert(Idx).second)
- UniqueIdxs[Idx] = Pos;
- ++Pos;
+ if (TreeEntry *E = getTreeEntry(S.OpValue))
+ if (E->isSame(VL)) {
+ Value *V = vectorizeTree(E);
+ if (VF != cast<FixedVectorType>(V->getType())->getNumElements()) {
+ if (!E->ReuseShuffleIndices.empty()) {
+ // Reshuffle to get only unique values.
+ // If some of the scalars are duplicated in the vectorization tree
+ // entry, we do not vectorize them but instead generate a mask for
+ // the reuses. But if there are several users of the same entry,
+ // they may have different vectorization factors. This is especially
+ // important for PHI nodes. In this case, we need to adapt the
+ // resulting instruction for the user vectorization factor and have
+ // to reshuffle it again to take only unique elements of the vector.
+ // Without this code the function incorrectly returns reduced vector
+ // instruction with the same elements, not with the unique ones.
+
+ // block:
+ // %phi = phi <2 x > { .., %entry} {%shuffle, %block}
+ // %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0>
+ // ... (use %2)
+ // %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0}
+ // br %block
+ SmallVector<int> UniqueIdxs(VF, UndefMaskElem);
+ SmallSet<int, 4> UsedIdxs;
+ int Pos = 0;
+ int Sz = VL.size();
+ for (int Idx : E->ReuseShuffleIndices) {
+ if (Idx != Sz && Idx != UndefMaskElem &&
+ UsedIdxs.insert(Idx).second)
+ UniqueIdxs[Idx] = Pos;
+ ++Pos;
+ }
+ assert(VF >= UsedIdxs.size() && "Expected vectorization factor "
+ "less than original vector size.");
+ UniqueIdxs.append(VF - UsedIdxs.size(), UndefMaskElem);
+ V = Builder.CreateShuffleVector(V, UniqueIdxs, "shrink.shuffle");
+ } else {
+ assert(VF < cast<FixedVectorType>(V->getType())->getNumElements() &&
+ "Expected vectorization factor less "
+ "than original vector size.");
+ SmallVector<int> UniformMask(VF, 0);
+ std::iota(UniformMask.begin(), UniformMask.end(), 0);
+ V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle");
+ }
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ GatherShuffleExtractSeq.insert(I);
+ CSEBlocks.insert(I->getParent());
}
- assert(VF >= UsedIdxs.size() && "Expected vectorization factor "
- "less than original vector size.");
- UniqueIdxs.append(VF - UsedIdxs.size(), UndefMaskElem);
- V = Builder.CreateShuffleVector(V, UniqueIdxs, "shrink.shuffle");
- } else {
- assert(VF < cast<FixedVectorType>(V->getType())->getNumElements() &&
- "Expected vectorization factor less "
- "than original vector size.");
- SmallVector<int> UniformMask(VF, 0);
- std::iota(UniformMask.begin(), UniformMask.end(), 0);
- V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle");
- }
- if (auto *I = dyn_cast<Instruction>(V)) {
- GatherShuffleExtractSeq.insert(I);
- CSEBlocks.insert(I->getParent());
}
+ return V;
}
- return V;
- }
}
- // Find the corresponding gather entry and vectorize it.
- // Allows to be more accurate with tree/graph transformations, checks for the
- // correctness of the transformations in many cases.
- auto *I = find_if(VectorizableTree,
- [E, NodeIdx](const std::unique_ptr<TreeEntry> &TE) {
- return TE->isOperandGatherNode({E, NodeIdx});
- });
- assert(I != VectorizableTree.end() && "Gather node is not in the graph.");
- assert(I->get()->UserTreeIndices.size() == 1 &&
- "Expected only single user for the gather node.");
- assert(I->get()->isSame(VL) && "Expected same list of scalars.");
- return vectorizeTree(I->get());
+ // Can't vectorize this, so simply build a new vector with each lane
+ // corresponding to the requested value.
+ return createBuildVector(VL);
}
-
-Value *BoUpSLP::createBuildVector(const TreeEntry *E) {
- assert(E->State == TreeEntry::NeedToGather && "Expected gather node.");
- unsigned VF = E->getVectorFactor();
-
- ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleExtractSeq,
- CSEBlocks);
- SmallVector<Value *> Gathered(
- VF, PoisonValue::get(E->Scalars.front()->getType()));
- bool NeedFreeze = false;
- SmallVector<Value *> VL(E->Scalars.begin(), E->Scalars.end());
- // Build a mask out of the redorder indices and reorder scalars per this mask.
- SmallVector<int> ReorderMask;
- inversePermutation(E->ReorderIndices, ReorderMask);
- if (!ReorderMask.empty())
- reorderScalars(VL, ReorderMask);
- if (!allConstant(VL)) {
- // For splats with can emit broadcasts instead of gathers, so try to find
- // such sequences.
- bool IsSplat = isSplat(VL) && (VL.size() > 2 || VL.front() == VL.back());
- SmallVector<int> ReuseMask(VF, UndefMaskElem);
- SmallVector<int> UndefPos;
+Value *BoUpSLP::createBuildVector(ArrayRef<Value *> VL) {
+ assert(any_of(VectorizableTree,
+ [VL](const std::unique_ptr<TreeEntry> &TE) {
+ return TE->State == TreeEntry::NeedToGather && TE->isSame(VL);
+ }) &&
+ "Non-matching gather node.");
+ unsigned VF = VL.size();
+ // Exploit possible reuse of values across lanes.
+ SmallVector<int> ReuseShuffleIndicies;
+ SmallVector<Value *> UniqueValues;
+ if (VL.size() > 2) {
DenseMap<Value *, unsigned> UniquePositions;
- // Gather unique non-const values and all constant values.
- // For repeated values, just shuffle them.
- for (auto [I, V] : enumerate(VL)) {
+ unsigned NumValues =
+ std::distance(VL.begin(), find_if(reverse(VL), [](Value *V) {
+ return !isa<UndefValue>(V);
+ }).base());
+ VF = std::max<unsigned>(VF, PowerOf2Ceil(NumValues));
+ int UniqueVals = 0;
+ for (Value *V : VL.drop_back(VL.size() - VF)) {
if (isa<UndefValue>(V)) {
- if (!isa<PoisonValue>(V)) {
- Gathered[I] = V;
- ReuseMask[I] = I;
- UndefPos.push_back(I);
- }
+ ReuseShuffleIndicies.emplace_back(UndefMaskElem);
continue;
}
if (isConstant(V)) {
- Gathered[I] = V;
- ReuseMask[I] = I;
+ ReuseShuffleIndicies.emplace_back(UniqueValues.size());
+ UniqueValues.emplace_back(V);
continue;
}
- if (IsSplat) {
- Gathered.front() = V;
- ReuseMask[I] = 0;
- } else {
- const auto Res = UniquePositions.try_emplace(V, I);
- Gathered[Res.first->second] = V;
- ReuseMask[I] = Res.first->second;
- }
- }
- if (!UndefPos.empty() && IsSplat) {
- // For undef values, try to replace them with the simple broadcast.
- // We can do it if the broadcasted value is guaranteed to be
- // non-poisonous, or by freezing the incoming scalar value first.
- auto *It = find_if(Gathered, [this, E](Value *V) {
- return !isa<UndefValue>(V) &&
- (getTreeEntry(V) || isGuaranteedNotToBePoison(V) ||
- any_of(V->uses(), [E](const Use &U) {
- // Check if the value already used in the same operation in
- // one of the nodes already.
- return E->UserTreeIndices.size() == 1 &&
- is_contained(
- E->UserTreeIndices.front().UserTE->Scalars,
- U.getUser()) &&
- E->UserTreeIndices.front().EdgeIdx != U.getOperandNo();
- }));
- });
- if (It != Gathered.end()) {
- // Replace undefs by the non-poisoned scalars and emit broadcast.
- int Pos = std::distance(Gathered.begin(), It);
- for_each(UndefPos, [&](int I) {
- // Set the undef position to the non-poisoned scalar.
- ReuseMask[I] = Pos;
- // Replace the undef by the poison, in the mask it is replaced by non-poisoned scalar already.
- if (I != Pos)
- Gathered[I] = PoisonValue::get(Gathered[I]->getType());
- });
- } else {
- // Replace undefs by the poisons, emit broadcast and then emit
- // freeze.
- for_each(UndefPos, [&](int I) {
- ReuseMask[I] = UndefMaskElem;
- if (isa<UndefValue>(Gathered[I]))
- Gathered[I] = PoisonValue::get(Gathered[I]->getType());
- });
- NeedFreeze = true;
+ auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
+ ReuseShuffleIndicies.emplace_back(Res.first->second);
+ if (Res.second) {
+ UniqueValues.emplace_back(V);
+ ++UniqueVals;
}
}
- ShuffleBuilder.addMask(ReuseMask);
- } else {
- copy(VL, Gathered.begin());
- }
- // Gather unique scalars and all constants.
- Value *Vec = gather(Gathered);
- ShuffleBuilder.addMask(E->ReuseShuffleIndices);
- Vec = ShuffleBuilder.finalize(Vec);
- if (NeedFreeze)
- Vec = Builder.CreateFreeze(Vec);
+ if (UniqueVals == 1 && UniqueValues.size() == 1) {
+ // Emit pure splat vector.
+ ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(),
+ UndefMaskElem);
+ } else if (UniqueValues.size() >= VF - 1 || UniqueValues.size() <= 1) {
+ if (UniqueValues.empty()) {
+ assert(all_of(VL, UndefValue::classof) && "Expected list of undefs.");
+ NumValues = VF;
+ }
+ ReuseShuffleIndicies.clear();
+ UniqueValues.clear();
+ UniqueValues.append(VL.begin(), std::next(VL.begin(), NumValues));
+ }
+ UniqueValues.append(VF - UniqueValues.size(),
+ PoisonValue::get(VL[0]->getType()));
+ VL = UniqueValues;
+ }
+
+ ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleExtractSeq,
+ CSEBlocks);
+ Value *Vec = gather(VL);
+ if (!ReuseShuffleIndicies.empty()) {
+ ShuffleBuilder.addMask(ReuseShuffleIndicies);
+ Vec = ShuffleBuilder.finalize(Vec);
+ }
return Vec;
}
ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleExtractSeq,
CSEBlocks);
if (E->State == TreeEntry::NeedToGather) {
- if (E->Idx > 0) {
- // We are in the middle of a vectorizable chain. We need to gather the
- // scalars from the users.
- Value *Vec = createBuildVector(E);
- E->VectorizedValue = Vec;
- return Vec;
- }
if (E->getMainOp())
setInsertPointAfterBundle(E);
Value *Vec;
Builder.SetInsertPoint(IBB->getTerminator());
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
- Value *Vec = vectorizeOperand(E, i);
+ Value *Vec = vectorizeTree(E->getOperand(i));
NewPhi->addIncoming(Vec, IBB);
}
case Instruction::InsertElement: {
assert(E->ReuseShuffleIndices.empty() && "All inserts should be unique");
Builder.SetInsertPoint(cast<Instruction>(E->Scalars.back()));
- Value *V = vectorizeOperand(E, 1);
+ Value *V = vectorizeTree(E->getOperand(1));
// Create InsertVector shuffle if necessary
auto *FirstInsert = cast<Instruction>(*find_if(E->Scalars, [E](Value *V) {
case Instruction::BitCast: {
setInsertPointAfterBundle(E);
- Value *InVec = vectorizeOperand(E, 0);
+ Value *InVec = vectorizeTree(E->getOperand(0));
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
case Instruction::ICmp: {
setInsertPointAfterBundle(E);
- Value *L = vectorizeOperand(E, 0);
- Value *R = vectorizeOperand(E, 1);
+ Value *L = vectorizeTree(E->getOperand(0));
+ Value *R = vectorizeTree(E->getOperand(1));
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
case Instruction::Select: {
setInsertPointAfterBundle(E);
- Value *Cond = vectorizeOperand(E, 0);
- Value *True = vectorizeOperand(E, 1);
- Value *False = vectorizeOperand(E, 2);
+ Value *Cond = vectorizeTree(E->getOperand(0));
+ Value *True = vectorizeTree(E->getOperand(1));
+ Value *False = vectorizeTree(E->getOperand(2));
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
case Instruction::FNeg: {
setInsertPointAfterBundle(E);
- Value *Op = vectorizeOperand(E, 0);
+ Value *Op = vectorizeTree(E->getOperand(0));
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
case Instruction::Xor: {
setInsertPointAfterBundle(E);
- Value *LHS = vectorizeOperand(E, 0);
- Value *RHS = vectorizeOperand(E, 1);
+ Value *LHS = vectorizeTree(E->getOperand(0));
+ Value *RHS = vectorizeTree(E->getOperand(1));
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
}
} else {
assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state");
- Value *VecPtr = vectorizeOperand(E, 0);
+ Value *VecPtr = vectorizeTree(E->getOperand(0));
// Use the minimum alignment of the gathered loads.
Align CommonAlignment = LI->getAlign();
for (Value *V : E->Scalars)
setInsertPointAfterBundle(E);
- Value *VecValue = vectorizeOperand(E, 0);
+ Value *VecValue = vectorizeTree(E->getOperand(0));
ShuffleBuilder.addMask(E->ReorderIndices);
VecValue = ShuffleBuilder.finalize(VecValue);
auto *GEP0 = cast<GetElementPtrInst>(VL0);
setInsertPointAfterBundle(E);
- Value *Op0 = vectorizeOperand(E, 0);
+ Value *Op0 = vectorizeTree(E->getOperand(0));
SmallVector<Value *> OpVecs;
for (int J = 1, N = GEP0->getNumOperands(); J < N; ++J) {
- Value *OpVec = vectorizeOperand(E, J);
+ Value *OpVec = vectorizeTree(E->getOperand(J));
OpVecs.push_back(OpVec);
}
continue;
}
- Value *OpVec = vectorizeOperand(E, j);
+ Value *OpVec = vectorizeTree(E->getOperand(j));
LLVM_DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n");
OpVecs.push_back(OpVec);
if (isVectorIntrinsicWithOverloadTypeAtArg(IID, j))
Value *LHS = nullptr, *RHS = nullptr;
if (Instruction::isBinaryOp(E->getOpcode()) || isa<CmpInst>(VL0)) {
setInsertPointAfterBundle(E);
- LHS = vectorizeOperand(E, 0);
- RHS = vectorizeOperand(E, 1);
+ LHS = vectorizeTree(E->getOperand(0));
+ RHS = vectorizeTree(E->getOperand(1));
} else {
setInsertPointAfterBundle(E);
- LHS = vectorizeOperand(E, 0);
+ LHS = vectorizeTree(E->getOperand(0));
}
if (E->VectorizedValue) {
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[ARRAYIDX3_I]] to <2 x double>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TEMP]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP2]]
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[ARRAYIDX7_I]] to <2 x double>*
-; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[TMP5]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TEMP2]], i32 0
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[SHUFFLE1]], [[TMP6]]
-; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP4]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast double* [[OUT:%.*]] to <2 x double>*
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[TEMP]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], [[TMP2]]
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[ARRAYIDX7_I]] to <2 x double>*
+; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[TEMP2]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[TEMP2]], i32 1
+; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> [[TMP9]], [[TMP7]]
+; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP5]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[OUT:%.*]] to <2 x double>*
; CHECK-NEXT: [[RES_I_SROA_5_0_OUT2_I_SROA_IDX4:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 2
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[ARRAYIDX25_I]] to <2 x double>*
-; CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, <2 x double>* [[TMP11]], align 8
-; CHECK-NEXT: [[TMP13:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[ARRAYIDX30_I]] to <2 x double>*
-; CHECK-NEXT: [[TMP15:%.*]] = load <2 x double>, <2 x double>* [[TMP14]], align 8
-; CHECK-NEXT: [[TMP16:%.*]] = fmul <2 x double> [[SHUFFLE1]], [[TMP15]]
-; CHECK-NEXT: [[TMP17:%.*]] = fadd <2 x double> [[TMP13]], [[TMP16]]
-; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast double* [[RES_I_SROA_5_0_OUT2_I_SROA_IDX4]] to <2 x double>*
-; CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* [[TMP18]], align 8
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast double* [[ARRAYIDX25_I]] to <2 x double>*
+; CHECK-NEXT: [[TMP14:%.*]] = load <2 x double>, <2 x double>* [[TMP13]], align 8
+; CHECK-NEXT: [[TMP15:%.*]] = fmul <2 x double> [[TMP4]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast double* [[ARRAYIDX30_I]] to <2 x double>*
+; CHECK-NEXT: [[TMP17:%.*]] = load <2 x double>, <2 x double>* [[TMP16]], align 8
+; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x double> [[TMP9]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = fadd <2 x double> [[TMP15]], [[TMP18]]
+; CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[TMP12]], align 8
+; CHECK-NEXT: [[TMP20:%.*]] = bitcast double* [[RES_I_SROA_5_0_OUT2_I_SROA_IDX4]] to <2 x double>*
+; CHECK-NEXT: store <2 x double> [[TMP19]], <2 x double>* [[TMP20]], align 8
; CHECK-NEXT: [[RES_I_SROA_7_0_OUT2_I_SROA_IDX8:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 4
-; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> poison, double [[TEMP10]], i32 0
-; CHECK-NEXT: [[SHUFFLE4:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = fmul <2 x double> [[TMP2]], [[SHUFFLE4]]
-; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x double> poison, double [[TEMP11]], i32 0
-; CHECK-NEXT: [[SHUFFLE5:%.*]] = shufflevector <2 x double> [[TMP21]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP22:%.*]] = fmul <2 x double> [[TMP6]], [[SHUFFLE5]]
-; CHECK-NEXT: [[TMP23:%.*]] = fadd <2 x double> [[TMP20]], [[TMP22]]
-; CHECK-NEXT: [[TMP24:%.*]] = bitcast double* [[RES_I_SROA_7_0_OUT2_I_SROA_IDX8]] to <2 x double>*
-; CHECK-NEXT: store <2 x double> [[TMP23]], <2 x double>* [[TMP24]], align 8
-; CHECK-NEXT: [[RES_I_SROA_9_0_OUT2_I_SROA_IDX12:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 6
-; CHECK-NEXT: [[TMP25:%.*]] = fmul <2 x double> [[TMP12]], [[SHUFFLE4]]
-; CHECK-NEXT: [[TMP26:%.*]] = fmul <2 x double> [[TMP15]], [[SHUFFLE5]]
-; CHECK-NEXT: [[TMP27:%.*]] = fadd <2 x double> [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP28:%.*]] = bitcast double* [[RES_I_SROA_9_0_OUT2_I_SROA_IDX12]] to <2 x double>*
+; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x double> poison, double [[TEMP10]], i32 0
+; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x double> [[TMP21]], double [[TEMP10]], i32 1
+; CHECK-NEXT: [[TMP23:%.*]] = fmul <2 x double> [[TMP2]], [[TMP22]]
+; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x double> poison, double [[TEMP11]], i32 0
+; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x double> [[TMP24]], double [[TEMP11]], i32 1
+; CHECK-NEXT: [[TMP26:%.*]] = fmul <2 x double> [[TMP7]], [[TMP25]]
+; CHECK-NEXT: [[TMP27:%.*]] = fadd <2 x double> [[TMP23]], [[TMP26]]
+; CHECK-NEXT: [[TMP28:%.*]] = bitcast double* [[RES_I_SROA_7_0_OUT2_I_SROA_IDX8]] to <2 x double>*
; CHECK-NEXT: store <2 x double> [[TMP27]], <2 x double>* [[TMP28]], align 8
+; CHECK-NEXT: [[RES_I_SROA_9_0_OUT2_I_SROA_IDX12:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 6
+; CHECK-NEXT: [[TMP29:%.*]] = fmul <2 x double> [[TMP14]], [[TMP22]]
+; CHECK-NEXT: [[TMP30:%.*]] = fmul <2 x double> [[TMP17]], [[TMP25]]
+; CHECK-NEXT: [[TMP31:%.*]] = fadd <2 x double> [[TMP29]], [[TMP30]]
+; CHECK-NEXT: [[TMP32:%.*]] = bitcast double* [[RES_I_SROA_9_0_OUT2_I_SROA_IDX12]] to <2 x double>*
+; CHECK-NEXT: store <2 x double> [[TMP31]], <2 x double>* [[TMP32]], align 8
; CHECK-NEXT: ret void
;
%arrayidx1.i = getelementptr inbounds [2 x double], [2 x double]* %A, i64 0, i64 0
; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[SHUFFLE1]], [[TMP1]]
-; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x float> [[TMP1]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP6:%.*]] = fsub fast <2 x float> [[TMP5]], [[SHUFFLE2]]
-; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[TMP5]], [[SHUFFLE2]]
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[A]], align 4
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
-; CHECK-NEXT: store float [[TMP9]], ptr [[B]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[B_0]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP3]], [[TMP1]]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[A_0]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <2 x float> [[TMP7]], [[SHUFFLE]]
+; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x float> [[TMP7]], [[SHUFFLE]]
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[A]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
+; CHECK-NEXT: store float [[TMP11]], ptr [[B]], align 4
; CHECK-NEXT: ret void
;
%gep.B.1 = getelementptr inbounds float, ptr %B, i64 1
; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[SHUFFLE1]], [[TMP1]]
-; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = fmul reassoc <2 x float> [[TMP1]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP6:%.*]] = fsub reassoc <2 x float> [[TMP5]], [[SHUFFLE2]]
-; CHECK-NEXT: [[TMP7:%.*]] = fadd reassoc <2 x float> [[TMP5]], [[SHUFFLE2]]
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[A]], align 4
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
-; CHECK-NEXT: store float [[TMP9]], ptr [[B]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[B_0]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[TMP1]]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[A_0]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = fmul reassoc <2 x float> [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = fsub reassoc <2 x float> [[TMP7]], [[SHUFFLE]]
+; CHECK-NEXT: [[TMP9:%.*]] = fadd reassoc <2 x float> [[TMP7]], [[SHUFFLE]]
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[A]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
+; CHECK-NEXT: store float [[TMP11]], ptr [[B]], align 4
; CHECK-NEXT: ret void
;
%gep.B.1 = getelementptr inbounds float, ptr %B, i64 1
; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[SHUFFLE1]], [[TMP1]]
-; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x float> [[TMP1]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP6:%.*]] = fsub <2 x float> [[TMP5]], [[SHUFFLE2]]
-; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x float> [[TMP5]], [[SHUFFLE2]]
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[A]], align 4
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
-; CHECK-NEXT: store float [[TMP9]], ptr [[B]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[B_0]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP3]], [[TMP1]]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[A_0]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x float> [[TMP7]], [[SHUFFLE]]
+; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x float> [[TMP7]], [[SHUFFLE]]
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[A]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
+; CHECK-NEXT: store float [[TMP11]], ptr [[B]], align 4
; CHECK-NEXT: ret void
;
%gep.B.1 = getelementptr inbounds float, ptr %B, i64 1
; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[SHUFFLE1]], [[TMP1]]
-; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP1]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP6:%.*]] = fsub <2 x float> [[TMP5]], [[SHUFFLE2]]
-; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x float> [[TMP5]], [[SHUFFLE2]]
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[A]], align 4
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
-; CHECK-NEXT: store float [[TMP9]], ptr [[B]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[B_0]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[TMP1]]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[A_0]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x float> [[TMP7]], [[SHUFFLE]]
+; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x float> [[TMP7]], [[SHUFFLE]]
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[A]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
+; CHECK-NEXT: store float [[TMP11]], ptr [[B]], align 4
; CHECK-NEXT: ret void
;
%gep.B.1 = getelementptr inbounds float, ptr %B, i64 1
; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = fmul nnan <2 x float> [[SHUFFLE1]], [[TMP1]]
-; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan <2 x float> [[TMP1]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP6:%.*]] = fsub nnan <2 x float> [[TMP5]], [[SHUFFLE2]]
-; CHECK-NEXT: [[TMP7:%.*]] = fadd nnan <2 x float> [[TMP5]], [[SHUFFLE2]]
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[A]], align 4
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
-; CHECK-NEXT: store float [[TMP9]], ptr [[B]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[B_0]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = fmul nnan <2 x float> [[TMP3]], [[TMP1]]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[A_0]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = fmul nnan <2 x float> [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = fsub nnan <2 x float> [[TMP7]], [[SHUFFLE]]
+; CHECK-NEXT: [[TMP9:%.*]] = fadd nnan <2 x float> [[TMP7]], [[SHUFFLE]]
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[A]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
+; CHECK-NEXT: store float [[TMP11]], ptr [[B]], align 4
; CHECK-NEXT: ret void
;
%gep.B.1 = getelementptr inbounds float, ptr %B, i64 1
; CHECK-NEXT: [[SUB_I1096:%.*]] = fsub fast float 1.000000e+00, [[TMP0:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[A:%.*]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP1]], [[SHUFFLE]]
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[SUB_I1096]], i32 0
-; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x float> [[TMP1]], [[SHUFFLE2]]
-; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <2 x float> [[SHUFFLE1]], [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = fsub fast <2 x float> [[SHUFFLE1]], [[TMP5]]
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[B:%.*]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[SUB_I1096]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[SUB_I1096]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[SHUFFLE]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <2 x float> [[SHUFFLE]], [[TMP7]]
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[B:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE1]]
-; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>*
-; CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[LD_2_0]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[LD_2_1]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP4]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>*
+; CHECK-NEXT: store <2 x double> [[TMP8]], <2 x double>* [[TMP9]], align 8
; CHECK-NEXT: ret void
;
entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[GEP_1_0]] to <2 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[LD_2_0]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP1]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[LD_2_1]], i32 0
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP1]], [[SHUFFLE1]]
-; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[TMP3]], [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast float* [[GEP_1_0]] to <2 x float>*
-; CHECK-NEXT: store <2 x float> [[TMP6]], <2 x float>* [[TMP7]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[LD_2_0]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[LD_2_1]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[LD_2_1]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[TMP4]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[GEP_1_0]] to <2 x float>*
+; CHECK-NEXT: store <2 x float> [[TMP8]], <2 x float>* [[TMP9]], align 4
; CHECK-NEXT: ret void
;
entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[GEP_1_0]] to <2 x i64>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[LD_2_0]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[LD_2_1]], i32 0
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i64> [[TMP1]], [[SHUFFLE1]]
-; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP3]], [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[GEP_1_0]] to <2 x i64>*
-; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[LD_2_0]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[LD_2_1]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[LD_2_1]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i64> [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP4]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64* [[GEP_1_0]] to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> [[TMP8]], <2 x i64>* [[TMP9]], align 4
; CHECK-NEXT: ret void
;
entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[GEP_1_0]] to <2 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> poison, i32 [[LD_2_0]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP1]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[LD_2_1]], i32 0
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP1]], [[SHUFFLE1]]
-; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP3]], [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[GEP_1_0]] to <2 x i32>*
-; CHECK-NEXT: store <2 x i32> [[TMP6]], <2 x i32>* [[TMP7]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[LD_2_0]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[LD_2_1]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[LD_2_1]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i32> [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP4]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[GEP_1_0]] to <2 x i32>*
+; CHECK-NEXT: store <2 x i32> [[TMP8]], <2 x i32>* [[TMP9]], align 4
; CHECK-NEXT: ret void
;
entry:
; CHECK-LABEL: @s116_modified(
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 0
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, float* [[A]], i64 1
-; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, float* [[A]], i64 3
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, float* [[A]], i64 2
+; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds float, float* [[A]], i64 4
+; CHECK-NEXT: [[LD1:%.*]] = load float, float* [[GEP1]], align 4
; CHECK-NEXT: [[LD0:%.*]] = load float, float* [[GEP0]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP1]] to <2 x float>*
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP2]] to <2 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[GEP3]] to <2 x float>*
-; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 undef>
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP4]], <4 x i32> <i32 0, i32 undef, i32 1, i32 2>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
-; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <4 x float> [[TMP9]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
-; CHECK-NEXT: store <4 x float> [[TMP11]], <4 x float>* [[TMP12]], align 4
+; CHECK-NEXT: [[LD4:%.*]] = load float, float* [[GEP4]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <4 x i32> <i32 0, i32 4, i32 5, i32 undef>
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[LD4]], i32 3
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[LD1]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[LD1]], i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <4 x float> [[TMP6]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
+; CHECK-NEXT: store <4 x float> [[TMP10]], <4 x float>* [[TMP11]], align 4
; CHECK-NEXT: ret void
;
%gep0 = getelementptr inbounds float, float* %a, i64 0
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 16383, i32 16383>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> [[SHUFFLE]]
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[PTR]] to <2 x i32>*
-; CHECK-NEXT: store <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]], align 2
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[X]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[PTR]] to <2 x i32>*
+; CHECK-NEXT: store <2 x i32> [[TMP5]], <2 x i32>* [[TMP6]], align 2
; CHECK-NEXT: ret void
;
entry:
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i64> [[TMP1]], <i64 16383, i64 16383>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> [[SHUFFLE]]
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
-; CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[TMP5]], align 2
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[X]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 2
; CHECK-NEXT: ret void
;
entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V1_LANE_0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V3_LANE_1]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[V2_LANE_2]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
; CHECK-NEXT: call void @use(double [[V1_LANE_0]])
; CHECK-NEXT: call void @use(double [[V3_LANE_1]])
-; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[PTR_1]], align 8
+; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[PTR_1]], align 8
; CHECK-NEXT: ret void
;
bb:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V1_LANE_2]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1_LANE_3]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[V2_LANE_2]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: call void @use(double [[V1_LANE_2]])
; CHECK-NEXT: call void @use(double [[V1_LANE_3]])
-; CHECK-NEXT: store <4 x double> [[TMP4]], <4 x double>* [[PTR_1]], align 8
+; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[PTR_1]], align 8
; CHECK-NEXT: ret void
;
bb:
; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, <4 x double>* [[PTR_2:%.*]], align 16
; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> [[V_1]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[V_1]], i32 0
-; CHECK-NEXT: call void @use(double [[TMP3]])
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[V_1]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V2_LANE_2]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[V_1]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[V_1]], i32 0
; CHECK-NEXT: call void @use(double [[TMP4]])
-; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* [[PTR_1]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[V_1]], i32 1
+; CHECK-NEXT: call void @use(double [[TMP5]])
+; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[PTR_1]], align 8
; CHECK-NEXT: ret void
;
bb:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V1_LANE_1]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1_LANE_2]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[V2_LANE_2]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: call void @use(double [[V1_LANE_1]])
; CHECK-NEXT: call void @use(double [[V1_LANE_2]])
-; CHECK-NEXT: store <4 x double> [[TMP4]], <4 x double>* [[PTR_1]], align 8
+; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[PTR_1]], align 8
; CHECK-NEXT: ret void
;
bb:
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[V1_LANE_3]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> poison, double [[V2_LANE_2]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[V2_LANE_1]], i32 1
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[V2_LANE_0]], i32 3
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 3>
-; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP3]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[V2_LANE_2]], i32 2
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x double> [[TMP6]], double [[V2_LANE_0]], i32 3
+; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x double> [[TMP3]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: call void @use(double [[V1_LANE_0]])
; CHECK-NEXT: call void @use(double [[V1_LANE_1]])
; CHECK-NEXT: call void @use(double [[V1_LANE_2]])
; CHECK-NEXT: call void @use(double [[V1_LANE_3]])
-; CHECK-NEXT: store <9 x double> [[TMP8]], <9 x double>* [[PTR_1]], align 8
+; CHECK-NEXT: store <9 x double> [[TMP9]], <9 x double>* [[PTR_1]], align 8
; CHECK-NEXT: ret void
;
bb:
; GCN-NEXT: [[TMP1:%.*]] = bitcast half addrspace(3)* [[A:%.*]] to <2 x half> addrspace(3)*
; GCN-NEXT: [[TMP2:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP1]], align 2
; GCN-NEXT: [[TMP3:%.*]] = insertelement <2 x half> poison, half [[SCALAR:%.*]], i32 0
-; GCN-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x half> [[TMP3]], <2 x half> poison, <2 x i32> zeroinitializer
-; GCN-NEXT: [[TMP4:%.*]] = fmul <2 x half> [[TMP2]], [[SHUFFLE]]
-; GCN-NEXT: [[TMP5:%.*]] = bitcast half addrspace(3)* [[C:%.*]] to <2 x half> addrspace(3)*
-; GCN-NEXT: store <2 x half> [[TMP4]], <2 x half> addrspace(3)* [[TMP5]], align 2
+; GCN-NEXT: [[TMP4:%.*]] = insertelement <2 x half> [[TMP3]], half [[SCALAR]], i32 1
+; GCN-NEXT: [[TMP5:%.*]] = fmul <2 x half> [[TMP2]], [[TMP4]]
+; GCN-NEXT: [[TMP6:%.*]] = bitcast half addrspace(3)* [[C:%.*]] to <2 x half> addrspace(3)*
+; GCN-NEXT: store <2 x half> [[TMP5]], <2 x half> addrspace(3)* [[TMP6]], align 2
; GCN-NEXT: ret void
;
%i0 = load half, half addrspace(3)* %a, align 2
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* bitcast ([6 x double]* @global to <2 x double>*), align 16
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2) to <2 x double>*), align 16
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[ARG:%.*]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP0]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4) to <2 x double>*), align 16
-; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP5]], [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = fptosi <2 x double> [[TMP6]] to <2 x i32>
-; CHECK-NEXT: [[TMP8:%.*]] = sext <2 x i32> [[TMP7]] to <2 x i64>
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP8]], i32 0
-; CHECK-NEXT: [[T16:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP9]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP8]], i32 1
-; CHECK-NEXT: [[T17:%.*]] = insertvalue { i64, i64 } [[T16]], i64 [[TMP10]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[ARG]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP0]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4) to <2 x double>*), align 16
+; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = fptosi <2 x double> [[TMP7]] to <2 x i32>
+; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
+; CHECK-NEXT: [[T16:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
+; CHECK-NEXT: [[T17:%.*]] = insertvalue { i64, i64 } [[T16]], i64 [[TMP11]], 1
; CHECK-NEXT: ret { i64, i64 } [[T17]]
;
bb:
; CHECK-LABEL: @Test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[TMP0:%.*]], i32 0
-; CHECK-NEXT: [[SHUFFLE8:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[SHUFFLE7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> poison, i32 [[TMP0]], i32 0
-; CHECK-NEXT: [[SHUFFLE7:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[SHUFFLE6:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ [[TMP13:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ [[TMP14:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = add <8 x i32> [[SHUFFLE]], <i32 0, i32 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685>
-; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[SHUFFLE7]])
-; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[SHUFFLE8]])
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[SHUFFLE6]])
+; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[SHUFFLE7]])
; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP5]])
; CHECK-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[TMP8]]
; CHECK-NEXT: [[OP_RDX4:%.*]] = and i32 [[OP_RDX2]], [[OP_RDX3]]
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> <i32 poison, i32 14910>, i32 [[OP_RDX4]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i32 0
-; CHECK-NEXT: [[SHUFFLE6:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = and <2 x i32> [[TMP9]], [[SHUFFLE6]]
-; CHECK-NEXT: [[TMP12:%.*]] = add <2 x i32> [[TMP9]], [[SHUFFLE6]]
-; CHECK-NEXT: [[TMP13]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP4]], i32 1
+; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i32> [[TMP9]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = add <2 x i32> [[TMP9]], [[TMP11]]
+; CHECK-NEXT: [[TMP14]] = shufflevector <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: br label [[LOOP]]
;
; FORCE_REDUCTION-LABEL: @Test(
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CALL:%.*]] = load i16, i16* undef, align 2
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> <i16 poison, i16 0, i16 0, i16 poison, i16 poison, i16 0, i16 poison, i16 0>, i16 [[CALL37:%.*]], i32 3
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> <i16 poison, i16 0, i16 0, i16 poison, i16 0, i16 0, i16 poison, i16 poison>, i16 [[CALL37:%.*]], i32 3
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[CALL]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 5, i32 3, i32 7>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 4, i32 3, i32 5>
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i16> [[SHUFFLE]], zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <8 x i16> [[SHUFFLE]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i64 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[A0]], i32 0
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> <i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0>
-; CHECK-NEXT: [[TMP1:%.*]] = freeze <8 x i32> [[SHUFFLE]]
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[IDXS0]] to <8 x i32>*
-; CHECK-NEXT: store <8 x i32> [[TMP1]], <8 x i32>* [[TMP2]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDXS0]] to <8 x i32>*
+; CHECK-NEXT: store <8 x i32> [[SHUFFLE]], <8 x i32>* [[TMP1]], align 8
; CHECK-NEXT: ret void
;
entry:
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> [[TMP0]], float [[A]], i32 1
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> zeroinitializer, [[SHUFFLE]]
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> zeroinitializer, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br label [[LOOP]]
;
entry:
; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x double> [[TMP28]], double [[TMP12]], i32 1
; CHECK-NEXT: [[TMP30:%.*]] = fsub <2 x double> [[TMP27]], [[TMP29]]
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x double> poison, double [[MUL88]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP31]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP32:%.*]] = fdiv <2 x double> [[TMP30]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x double> [[TMP32]], i32 1
-; CHECK-NEXT: [[CMP93:%.*]] = fcmp olt double [[TMP33]], 0x3EB0C6F7A0B5ED8D
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[TMP32]], i32 0
-; CHECK-NEXT: [[CMP94:%.*]] = fcmp olt double [[TMP34]], 0x3EB0C6F7A0B5ED8D
+; CHECK-NEXT: [[TMP32:%.*]] = insertelement <2 x double> [[TMP31]], double [[MUL88]], i32 1
+; CHECK-NEXT: [[TMP33:%.*]] = fdiv <2 x double> [[TMP30]], [[TMP32]]
+; CHECK-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[TMP33]], i32 1
+; CHECK-NEXT: [[CMP93:%.*]] = fcmp olt double [[TMP34]], 0x3EB0C6F7A0B5ED8D
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x double> [[TMP33]], i32 0
+; CHECK-NEXT: [[CMP94:%.*]] = fcmp olt double [[TMP35]], 0x3EB0C6F7A0B5ED8D
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP93]], i1 [[CMP94]], i1 false
; CHECK-NEXT: br i1 [[OR_COND]], label [[CLEANUP]], label [[LOR_LHS_FALSE:%.*]]
; CHECK: lor.lhs.false:
-; CHECK-NEXT: [[TMP35:%.*]] = fcmp ule <2 x double> [[TMP32]], <double 1.000000e+00, double 1.000000e+00>
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x i1> [[TMP35]], i32 0
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i1> [[TMP35]], i32 1
-; CHECK-NEXT: [[OR_COND106:%.*]] = select i1 [[TMP37]], i1 true, i1 [[TMP36]]
+; CHECK-NEXT: [[TMP36:%.*]] = fcmp ule <2 x double> [[TMP33]], <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i1> [[TMP36]], i32 0
+; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x i1> [[TMP36]], i32 1
+; CHECK-NEXT: [[OR_COND106:%.*]] = select i1 [[TMP38]], i1 true, i1 [[TMP37]]
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = zext i1 [[OR_COND106]] to i32
; CHECK-NEXT: br label [[CLEANUP]]
; CHECK: cleanup:
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = fcmp une <2 x double> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[G:%.*]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x double> [[SHUFFLE]], <2 x double> <double 1.000000e+00, double 1.000000e+00>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[G]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x double> [[TMP4]], <2 x double> <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
+; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
; CHECK-NEXT: ret i32 undef
;
entry:
; AVX-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], [[SHUFFLE1]]
; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i32 1
; AVX-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[C]], i32 2
-; AVX-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
-; AVX-NEXT: [[TMP6:%.*]] = xor <4 x i32> [[TMP3]], [[SHUFFLE2]]
-; AVX-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16
+; AVX-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 3
+; AVX-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP3]], [[TMP6]]
+; AVX-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16
; AVX-NEXT: ret void
;
%add1 = add i32 %c, %a
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[N:%.*]] to double
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[CONV]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[CONV]], i32 1
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
-; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], <double 7.000000e+00, double 4.000000e+00>
-; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP5]], <double 5.000000e+00, double 9.000000e+00>
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1
-; CHECK-NEXT: [[CMP11:%.*]] = fcmp ogt double [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP2:%.*]] = shl nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
+; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], <double 7.000000e+00, double 4.000000e+00>
+; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], <double 5.000000e+00, double 9.000000e+00>
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1
+; CHECK-NEXT: [[CMP11:%.*]] = fcmp ogt double [[TMP8]], [[TMP9]]
; CHECK-NEXT: br i1 [[CMP11]], label [[IF_THEN:%.*]], label [[FOR_INC]]
; CHECK: if.then:
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0))
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ACC1_056:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD13:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store float [[ACC1_056]], float* [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP0]], [[SHUFFLE]]
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP0]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x float> [[TMP4]], [[SHUFFLE1]]
-; CHECK-NEXT: [[TMP6:%.*]] = fcmp olt <2 x float> [[TMP5]], <float 1.000000e+00, float 1.000000e+00>
-; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP6]], <2 x float> [[TMP5]], <2 x float> <float 1.000000e+00, float 1.000000e+00>
-; CHECK-NEXT: [[TMP8:%.*]] = fcmp olt <2 x float> [[TMP7]], <float -1.000000e+00, float -1.000000e+00>
-; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x float> [[TMP7]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP8]], <2 x float> <float -0.000000e+00, float -0.000000e+00>, <2 x float> [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP10]], i32 0
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[TMP10]], i32 1
-; CHECK-NEXT: [[ADD13]] = fadd float [[TMP11]], [[TMP12]]
-; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> <i32 1, i32 undef>
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[ADD13]], i32 1
-; CHECK-NEXT: [[TMP15:%.*]] = fcmp olt <2 x float> [[TMP14]], <float 1.000000e+00, float 1.000000e+00>
-; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x float> [[TMP14]], <2 x float> <float 1.000000e+00, float 1.000000e+00>
-; CHECK-NEXT: [[TMP17:%.*]] = fcmp olt <2 x float> [[TMP16]], <float -1.000000e+00, float -1.000000e+00>
-; CHECK-NEXT: [[TMP18]] = select <2 x i1> [[TMP17]], <2 x float> <float -1.000000e+00, float -1.000000e+00>, <2 x float> [[TMP16]]
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x float> [[TMP0]], [[TMP3]]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP0]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[TMP5]], [[SHUFFLE]]
+; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt <2 x float> [[TMP6]], <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP7]], <2 x float> [[TMP6]], <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT: [[TMP9:%.*]] = fcmp olt <2 x float> [[TMP8]], <float -1.000000e+00, float -1.000000e+00>
+; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x float> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP9]], <2 x float> <float -0.000000e+00, float -0.000000e+00>, <2 x float> [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[TMP11]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP11]], i32 1
+; CHECK-NEXT: [[ADD13]] = fadd float [[TMP12]], [[TMP13]]
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[ADD13]], i32 1
+; CHECK-NEXT: [[TMP16:%.*]] = fcmp olt <2 x float> [[TMP15]], <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP16]], <2 x float> [[TMP15]], <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT: [[TMP18:%.*]] = fcmp olt <2 x float> [[TMP17]], <float -1.000000e+00, float -1.000000e+00>
+; CHECK-NEXT: [[TMP19]] = select <2 x i1> [[TMP18]], <2 x float> <float -1.000000e+00, float -1.000000e+00>, <2 x float> [[TMP17]]
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 32
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-LABEL: @exceed(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP0:%.*]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TMP1:%.*]], i32 0
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast <2 x double> [[SHUFFLE]], [[SHUFFLE1]]
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
-; CHECK-NEXT: [[IX:%.*]] = fmul double [[TMP5]], undef
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[TMP1:%.*]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = fdiv fast <2 x double> [[TMP3]], [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 1
+; CHECK-NEXT: [[IX:%.*]] = fmul double [[TMP7]], undef
; CHECK-NEXT: [[IXX0:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[IXX1:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[IXX2:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[IXX3:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[IXX4:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[IXX5:%.*]] = fsub double undef, undef
-; CHECK-NEXT: [[IX1:%.*]] = fmul double [[TMP5]], undef
+; CHECK-NEXT: [[IX1:%.*]] = fmul double [[TMP7]], undef
; CHECK-NEXT: [[IXX10:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[IXX11:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[IXX12:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[IXX20:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[IXX21:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[IXX22:%.*]] = fsub double undef, undef
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
-; CHECK-NEXT: [[IX2:%.*]] = fmul double [[TMP6]], [[TMP6]]
-; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <2 x double> [[SHUFFLE]], [[SHUFFLE1]]
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1
-; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <2 x double> [[TMP9]], [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 0
+; CHECK-NEXT: [[IX2:%.*]] = fmul double [[TMP8]], [[TMP8]]
+; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]]
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP11]], [[TMP9]]
; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP4]], <2 x i32> <i32 3, i32 1>
-; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <2 x double> [[TMP12]], undef
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> [[TMP6]], <2 x i32> <i32 3, i32 1>
+; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[TMP14]], undef
; CHECK-NEXT: switch i32 undef, label [[BB1:%.*]] [
; CHECK-NEXT: i32 0, label [[BB2:%.*]]
; CHECK-NEXT: ]
; CHECK: bb2:
; CHECK-NEXT: br label [[LABEL]]
; CHECK: label:
-; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x double> [ [[TMP10]], [[BB1]] ], [ [[TMP13]], [[BB2]] ]
+; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x double> [ [[TMP12]], [[BB1]] ], [ [[TMP15]], [[BB2]] ]
; CHECK-NEXT: ret void
;
entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[CONV]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP1]]
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[A]] to <2 x double>*
-; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[CONV]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[A]] to <2 x double>*
+; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[N]], 4
; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[IF_END:%.*]]
; CHECK: if.end:
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[A]], i64 2
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[N]], 4
; CHECK-NEXT: [[CONV12:%.*]] = sitofp i32 [[ADD]] to double
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[ARRAYIDX7]] to <2 x double>*
-; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[TMP5]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP2]], double [[CONV12]], i32 1
-; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP6]]
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[ARRAYIDX7]] to <2 x double>*
-; CHECK-NEXT: store <2 x double> [[TMP8]], <2 x double>* [[TMP9]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[ARRAYIDX7]] to <2 x double>*
+; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP2]], double [[CONV12]], i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP8]], [[TMP7]]
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast double* [[ARRAYIDX7]] to <2 x double>*
+; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8
; CHECK-NEXT: br label [[RETURN]]
; CHECK: return:
; CHECK-NEXT: ret i32 0
; CHECK-LABEL: @cse_for_hoisted_instructions_in_preheader(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[A]], i32 1
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> <i32 22, i32 22>, [[SHUFFLE]]
+; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> <i32 22, i32 22>, [[TMP1]]
; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], <i32 3, i32 3>
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[GEP_0]] to <2 x i32>*
-; CHECK-NEXT: store <2 x i32> [[TMP2]], <2 x i32>* [[TMP3]], align 4
-; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[SHUFFLE]], <i32 3, i32 3>
+; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP2]], <i32 3, i32 3>
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[GEP_0]] to <2 x i32>*
+; CHECK-NEXT: store <2 x i32> [[TMP3]], <2 x i32>* [[TMP4]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP1]], <i32 3, i32 3>
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 10
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[GEP_2]] to <2 x i32>*
-; CHECK-NEXT: store <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[GEP_2]] to <2 x i32>*
+; CHECK-NEXT: store <2 x i32> [[TMP5]], <2 x i32>* [[TMP6]], align 4
; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret void
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[TMP7:%.*]], i32 2
; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[TMP0]], zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 2, i32 3, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 undef, i32 6
-; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 undef, i32 0>, [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 undef, i32 0>, [[TMP3]]
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 undef, i32 4
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 2, i32 3, i32 undef, i32 4, i32 undef>
+; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 undef, i32 0>, [[SHUFFLE]]
+; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 undef, i32 0>, [[SHUFFLE]]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP5]], <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 12, i32 13, i32 6, i32 7>
; CHECK-NEXT: [[TMP7:%.*]] = add <8 x i32> zeroinitializer, [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** @a, align 8
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64*> poison, i64* [[TMP0]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64*> [[TMP1]], <2 x i64*> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, <2 x i64*> [[SHUFFLE]], <2 x i64> <i64 11, i64 56>
-; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint <2 x i64*> [[TMP2]] to <2 x i64>
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64*> [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <2 x i64>*
-; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP5]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64*> [[TMP1]], i64* [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x i64*> [[TMP2]], <2 x i64> <i64 11, i64 56>
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x i64*> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64*> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[TMP6]], align 8
; CHECK-NEXT: ret i32 undef
;
entry:
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** @a, align 8
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64*> poison, i64* [[TMP0]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64*> [[TMP1]], <2 x i64*> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, <2 x i64*> [[SHUFFLE]], <2 x i64> <i64 56, i64 11>
-; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint <2 x i64*> [[TMP2]] to <2 x i64>
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64*> [[TMP2]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <2 x i64>*
-; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[TMP3]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[TMP4]] to <2 x i64>*
-; CHECK-NEXT: store <2 x i64> [[TMP7]], <2 x i64>* [[TMP8]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64*> [[TMP1]], i64* [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x i64*> [[TMP2]], <2 x i64> <i64 56, i64 11>
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x i64*> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64*> [[TMP3]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>*
+; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 8
+; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP4]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> [[TMP8]], <2 x i64>* [[TMP9]], align 8
; CHECK-NEXT: ret void
;
entry:
; CHECK-LABEL: @multi_uses(
; CHECK-NEXT: [[Y1:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[Y1]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X:%.*]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
-; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[Y1]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[X:%.*]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
+; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP4]], [[TMP5]]
; CHECK-NEXT: ret float [[ADD]]
;
%x0 = extractelement <2 x float> %x, i32 0
; THRESH1-LABEL: @f_used_twice_in_tree(
; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
; THRESH1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
-; THRESH1-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
-; THRESH1-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[SHUFFLE]], [[X]]
-; THRESH1-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
-; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
-; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP4]], [[TMP5]]
+; THRESH1-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1
+; THRESH1-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[X]]
+; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
+; THRESH1-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
+; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]]
; THRESH1-NEXT: ret float [[ADD]]
;
; THRESH2-LABEL: @f_used_twice_in_tree(
; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
; THRESH2-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
-; THRESH2-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
-; THRESH2-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[SHUFFLE]], [[X]]
-; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
-; THRESH2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
-; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP4]], [[TMP5]]
+; THRESH2-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1
+; THRESH2-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[X]]
+; THRESH2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
+; THRESH2-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
+; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]]
; THRESH2-NEXT: ret float [[ADD]]
;
%x0 = extractelement <2 x float> %x, i32 0
; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]])
; THRESHOLD-NEXT: [[TMP3:%.*]] = insertelement <2 x float> <float poison, float 3.000000e+00>, float [[TMP2]], i32 0
; THRESHOLD-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[CONV]], i32 0
-; THRESHOLD-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer
-; THRESHOLD-NEXT: [[TMP5:%.*]] = fadd fast <2 x float> [[TMP3]], [[SHUFFLE]]
-; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0
-; THRESHOLD-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP5]], i32 1
-; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP6]], [[TMP7]]
+; THRESHOLD-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[CONV]], i32 1
+; THRESHOLD-NEXT: [[TMP6:%.*]] = fadd fast <2 x float> [[TMP3]], [[TMP5]]
+; THRESHOLD-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0
+; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1
+; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP7]], [[TMP8]]
; THRESHOLD-NEXT: ret float [[OP_RDX2]]
;
entry:
; THRESHOLD-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
; THRESHOLD-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[CONVC]], i32 1
; THRESHOLD-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[CONV]], i32 0
-; THRESHOLD-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <2 x i32> zeroinitializer
-; THRESHOLD-NEXT: [[TMP6:%.*]] = fadd fast <2 x float> [[TMP4]], [[SHUFFLE]]
-; THRESHOLD-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0
-; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1
-; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP7]], [[TMP8]]
+; THRESHOLD-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[CONV]], i32 1
+; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[TMP4]], [[TMP6]]
+; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0
+; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP7]], i32 1
+; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP8]], [[TMP9]]
; THRESHOLD-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], 3.000000e+00
; THRESHOLD-NEXT: ret float [[OP_RDX3]]
;
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[N:%.*]] to double
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[CONV]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[CONV]], i32 1
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
-; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], <double 7.000000e+00, double 4.000000e+00>
-; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP5]], <double 5.000000e+00, double 9.000000e+00>
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0
-; CHECK-NEXT: [[INTREEUSER:%.*]] = fadd double [[TMP7]], [[TMP7]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1
-; CHECK-NEXT: [[CMP11:%.*]] = fcmp ogt double [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP2:%.*]] = shl nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
+; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], <double 7.000000e+00, double 4.000000e+00>
+; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], <double 5.000000e+00, double 9.000000e+00>
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0
+; CHECK-NEXT: [[INTREEUSER:%.*]] = fadd double [[TMP8]], [[TMP8]]
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1
+; CHECK-NEXT: [[CMP11:%.*]] = fcmp ogt double [[TMP8]], [[TMP9]]
; CHECK-NEXT: br i1 [[CMP11]], label [[IF_THEN:%.*]], label [[FOR_INC]]
; CHECK: if.then:
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0))
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[X]] to <2 x float>*
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 16
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> <float undef, float poison, float poison, float undef>, float [[TMP0]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP1]], i32 2
-; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[SHUFFLE]], [[TMP5]]
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP1]], i32 1
+; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <4 x i32> <i32 undef, i32 0, i32 1, i32 undef>
+; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[SHUFFLE]], [[SHUFFLE1]]
; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x float> [[TMP6]], undef
; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], undef
; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[TMP8]], undef
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[TMP0]], zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> <i32 8, i32 poison, i32 ptrtoint (i32 ()* @fn1 to i32), i32 poison>, <4 x i32> [[TMP0]], <4 x i32> <i32 0, i32 5, i32 2, i32 undef>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
-; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SHUFFLE]], <4 x i32> <i32 0, i32 6, i32 0, i32 0>
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
-; CHECK-NEXT: store <4 x i32> [[SHUFFLE1]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> <i32 8, i32 poison, i32 ptrtoint (i32 ()* @fn1 to i32), i32 ptrtoint (i32 ()* @fn1 to i32)>, <4 x i32> [[TMP0]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 6, i32 0, i32 0>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4
; CHECK-NEXT: ret i32 0
;
entry:
; CHECK-NEXT: [[LOADVEC2:%.*]] = load <2 x double>, <2 x double>* [[VECPTR2:%.*]], align 4
; CHECK-NEXT: [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[LOADA0]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[LOADVEC]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[LOADVEC2]], [[SHUFFLE1]]
-; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[SIDX0]] to <2 x double>*
-; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[LOADA0]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[LOADVEC]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[LOADA1]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[LOADVEC2]], [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP3]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[SIDX0]] to <2 x double>*
+; CHECK-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8
; CHECK-NEXT: ret void
;
%idx0 = getelementptr inbounds double, double* %array, i64 0
; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0
; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[EXTRA1]], i32 1
; AVX-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[LOADA0]], i32 0
-; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer
-; AVX-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP2]], [[SHUFFLE]]
-; AVX-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[EXTRB0]], i32 0
-; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[EXTRB1]], i32 1
-; AVX-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0
-; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <2 x i32> zeroinitializer
-; AVX-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP6]], [[SHUFFLE1]]
-; AVX-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP4]], [[TMP8]]
-; AVX-NEXT: [[TMP10:%.*]] = bitcast double* [[SIDX0]] to <2 x double>*
-; AVX-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8
+; AVX-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[LOADA0]], i32 1
+; AVX-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]]
+; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[EXTRB0]], i32 0
+; AVX-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[EXTRB1]], i32 1
+; AVX-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0
+; AVX-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[LOADA1]], i32 1
+; AVX-NEXT: [[TMP10:%.*]] = fmul <2 x double> [[TMP7]], [[TMP9]]
+; AVX-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP5]], [[TMP10]]
+; AVX-NEXT: [[TMP12:%.*]] = bitcast double* [[SIDX0]] to <2 x double>*
+; AVX-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[TMP12]], align 8
; AVX-NEXT: ret void
;
%idx0 = getelementptr inbounds double, double* %array, i64 0
; AVX-NEXT: [[TMP0:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>*
; AVX-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0
-; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
-; AVX-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
-; AVX-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0
-; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer
-; AVX-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE1]]
-; AVX-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]]
-; AVX-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0
-; AVX-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1
-; AVX-NEXT: [[ADD3:%.*]] = fadd double [[TMP7]], [[TMP8]]
+; AVX-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[LD_2_0]], i32 1
+; AVX-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
+; AVX-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0
+; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[LD_2_1]], i32 1
+; AVX-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP1]], [[TMP6]]
+; AVX-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP4]], [[TMP7]]
+; AVX-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
+; AVX-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1
+; AVX-NEXT: [[ADD3:%.*]] = fadd double [[TMP9]], [[TMP10]]
; AVX-NEXT: ret double [[ADD3]]
;
entry:
; SSE-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
-; SSE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer
-; SSE-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP6]], [[SHUFFLE1]]
-; SSE-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0
-; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1
-; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP8]], [[TMP9]]
+; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer
+; SSE-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP6]], [[TMP7]]
+; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
+; SSE-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1
+; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP9]], [[TMP10]]
; SSE-NEXT: ret double [[RES]]
;
; AVX-LABEL: @splat_loads_with_internal_uses(
; AVX-NEXT: [[TMP0:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>*
; AVX-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0
-; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
-; AVX-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
-; AVX-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0
-; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer
-; AVX-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE1]]
-; AVX-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]]
-; AVX-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP6]], [[SHUFFLE]]
-; AVX-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0
-; AVX-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1
-; AVX-NEXT: [[RES:%.*]] = fadd double [[TMP8]], [[TMP9]]
+; AVX-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[LD_2_0]], i32 1
+; AVX-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
+; AVX-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0
+; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[LD_2_1]], i32 1
+; AVX-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP1]], [[TMP6]]
+; AVX-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP4]], [[TMP7]]
+; AVX-NEXT: [[TMP9:%.*]] = fsub <2 x double> [[TMP8]], [[TMP3]]
+; AVX-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP9]], i32 0
+; AVX-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP9]], i32 1
+; AVX-NEXT: [[RES:%.*]] = fadd double [[TMP10]], [[TMP11]]
; AVX-NEXT: ret double [[RES]]
;
entry:
; CHECK-NEXT: [[SUB102_1:%.*]] = sub nsw i32 undef, undef
; CHECK-NEXT: [[ADD78_2:%.*]] = add nsw i32 undef, undef
; CHECK-NEXT: [[SUB102_3:%.*]] = sub nsw i32 undef, undef
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 poison, i32 poison, i32 poison, i32 poison, i32 undef, i32 poison, i32 poison, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, i32 [[SUB102_1]], i32 4
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[ADD94_1]], i32 5
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[ADD78_1]], i32 6
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[SUB86_1]], i32 7
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[ADD78_2]], i32 9
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 poison, i32 poison, i32 poison, i32 poison, i32 undef, i32 undef, i32 undef, i32 undef, i32 poison, i32 undef, i32 undef, i32 poison>, i32 [[SUB86_1]], i32 4
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[ADD78_1]], i32 5
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD94_1]], i32 6
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[SUB102_1]], i32 7
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB102_3]], i32 12
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12>
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[SUB102_1]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[ADD94_1]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[ADD78_1]], i32 2
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[SUB86_1]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[ADD78_2]], i32 4
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 undef, i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[SUB86_1]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[ADD78_1]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD94_1]], i32 2
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[SUB102_1]], i32 3
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB102_3]], i32 4
+; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 4>
; CHECK-NEXT: [[TMP10:%.*]] = add nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]]
; CHECK-NEXT: [[TMP11:%.*]] = sub nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]]
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
; CHECK-NEXT: [[ICMP_A1:%.*]] = icmp eq i64 [[TMP1]], 0
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (%struct.a* @b to <2 x i64>*), align 8
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i1> poison, i1 [[ICMP_A1]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i1> [[TMP3]], <2 x i1> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[SHUFFLE]], <2 x i64> [[TMP2]], <2 x i64> [[TMP0]]
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i1> [[TMP3]], i1 [[ICMP_A1]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP4]], <2 x i64> [[TMP2]], <2 x i64> [[TMP0]]
; CHECK-NEXT: br label [[WHILE_END]]
; CHECK: while.end:
-; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i64> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP4]], [[WHILE_BODY_LR_PH]] ]
-; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* bitcast (%struct.a* @c to <2 x i64>*), align 8
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
-; CHECK-NEXT: [[ICMP_D0:%.*]] = icmp eq i64 [[TMP7]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i64> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP5]], [[WHILE_BODY_LR_PH]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (%struct.a* @c to <2 x i64>*), align 8
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0
+; CHECK-NEXT: [[ICMP_D0:%.*]] = icmp eq i64 [[TMP8]], 0
; CHECK-NEXT: br i1 [[ICMP_D0]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK: if.then:
-; CHECK-NEXT: [[AND0_TMP:%.*]] = and i64 [[TMP7]], 8
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[AND0_TMP]], i32 0
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP5]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP10:%.*]] = and <2 x i64> [[TMP9]], [[TMP6]]
-; CHECK-NEXT: store <2 x i64> [[TMP10]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8
+; CHECK-NEXT: [[AND0_TMP:%.*]] = and i64 [[TMP8]], 8
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[AND0_TMP]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP9]], <2 x i64> [[TMP6]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP11:%.*]] = and <2 x i64> [[TMP10]], [[TMP7]]
+; CHECK-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: ret void
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[SUB14]], i32 1
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], <i32 0, i32 -1, i32 -5, i32 -9>
-; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i32> [[TMP0]]
-; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], undef
-; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP4]], <4 x i32> undef
-; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i32> [[TMP6]] to <4 x i64>
-; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[TMP7]] to <4 x i32>
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0
-; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP10]]
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP8]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
-; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP12]]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP8]], i32 2
-; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64
-; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP14]]
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3
-; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64
-; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP16]]
+; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP0]], <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP3]], undef
+; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP3]], <4 x i32> undef
+; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i32> [[TMP5]] to <4 x i64>
+; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i64> [[TMP6]] to <4 x i32>
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP7]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
+; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP9]]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP7]], i32 1
+; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
+; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP11]]
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP7]], i32 2
+; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64
+; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP13]]
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP7]], i32 3
+; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
+; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP15]]
; CHECK-NEXT: unreachable
;
entry:
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
-; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 undef, i8 undef, i8 undef>, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 poison, i8 poison, i8 poison>, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
; CHECK-NEXT: ret i32 [[TMP6]]
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
-; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 undef, i8 undef>, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 poison, i8 poison>, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
; CHECK-NEXT: ret i32 [[TMP6]]
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
-; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 0, i8 undef>, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 0, i8 poison>, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
; CHECK-NEXT: ret i32 [[TMP6]]
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
-; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 poison, i8 undef>, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 poison, i8 poison>, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
; CHECK-NEXT: ret i32 [[TMP6]]
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
-; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 poison, i8 poison, i8 undef>, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 poison, i8 poison, i8 poison>, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
; CHECK-NEXT: ret i32 [[TMP6]]
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG2:%.*]], i32 3
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
-; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 poison, i8 undef>, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 poison, i8 poison>, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
; CHECK-NEXT: ret i32 [[TMP6]]
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[MUL]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = fdiv <2 x double> [[TMP4]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 1
-; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[TMP7]], 0x3EB0C6F7A0B5ED8D
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 0
-; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt double [[TMP8]], 0x3EB0C6F7A0B5ED8D
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[MUL]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 1
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[TMP8]], 0x3EB0C6F7A0B5ED8D
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 0
+; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt double [[TMP9]], 0x3EB0C6F7A0B5ED8D
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP]], [[CMP4]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[LOR_LHS_FALSE:%.*]]
; CHECK: lor.lhs.false:
-; CHECK-NEXT: [[TMP9:%.*]] = fcmp ule <2 x double> [[TMP6]], <double 1.000000e+00, double 1.000000e+00>
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1
-; CHECK-NEXT: [[NOT_OR_COND9:%.*]] = or i1 [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
+; CHECK-NEXT: [[NOT_OR_COND9:%.*]] = or i1 [[TMP11]], [[TMP12]]
; CHECK-NEXT: ret i1 [[NOT_OR_COND9]]
; CHECK: cleanup:
; CHECK-NEXT: ret i1 false
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[B]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[MUL]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = fdiv <2 x double> [[TMP5]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP8:%.*]] = fcmp uge <2 x double> [[TMP7]], <double 0x3EB0C6F7A0B5ED8D, double 0x3EB0C6F7A0B5ED8D>
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
-; CHECK-NEXT: [[NOT_OR_COND:%.*]] = or i1 [[TMP9]], [[TMP10]]
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[MUL]], i32 1
+; CHECK-NEXT: [[TMP8:%.*]] = fdiv <2 x double> [[TMP5]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = fcmp uge <2 x double> [[TMP8]], <double 0x3EB0C6F7A0B5ED8D, double 0x3EB0C6F7A0B5ED8D>
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1
+; CHECK-NEXT: [[NOT_OR_COND:%.*]] = or i1 [[TMP10]], [[TMP11]]
; CHECK-NEXT: ret i1 [[NOT_OR_COND]]
;
%fneg = fneg double %b
; CHECK-NEXT: br label [[T:%.*]]
; CHECK: t:
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> <i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
; CHECK-NEXT: [[TMP0:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>*
; CHECK-NEXT: store <8 x i16> [[TMP0]], <8 x i16>* [[TMP1]], align 2
; CHECK-NEXT: [[TMP0:%.*]] = add i64 256, 0
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP18:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP17:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP20:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP19:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX:%.*]], %struct.complex* [[A:%.*]], i64 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX]], %struct.complex* [[B:%.*]], i64 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[TMP4]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP3]] to <2 x float>*
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[TMP8]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = fmul <2 x float> [[TMP9]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[TMP9]], [[SHUFFLE1]]
-; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP13]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x float> [[TMP11]], [[SHUFFLE2]]
-; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x float> [[TMP11]], [[SHUFFLE2]]
-; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP14]], <2 x float> [[TMP15]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP17]] = fadd <2 x float> [[TMP2]], [[TMP16]]
-; CHECK-NEXT: [[TMP18]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP18]], [[TMP0]]
-; CHECK-NEXT: br i1 [[TMP19]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x float> [[TMP10]], float [[TMP5]], i32 1
+; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x float> [[TMP9]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP7]], i32 1
+; CHECK-NEXT: [[TMP15:%.*]] = fmul <2 x float> [[TMP9]], [[TMP14]]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP15]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[TMP16:%.*]] = fsub <2 x float> [[TMP12]], [[SHUFFLE]]
+; CHECK-NEXT: [[TMP17:%.*]] = fadd <2 x float> [[TMP12]], [[SHUFFLE]]
+; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> [[TMP17]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP19]] = fadd <2 x float> [[TMP2]], [[TMP18]]
+; CHECK-NEXT: [[TMP20]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP20]], [[TMP0]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX]], %struct.complex* [[RESULT:%.*]], i32 0, i32 0
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast float* [[TMP20]] to <2 x float>*
-; CHECK-NEXT: store <2 x float> [[TMP17]], <2 x float>* [[TMP21]], align 4
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX]], %struct.complex* [[RESULT:%.*]], i32 0, i32 0
+; CHECK-NEXT: [[TMP23:%.*]] = bitcast float* [[TMP22]] to <2 x float>*
+; CHECK-NEXT: store <2 x float> [[TMP19]], <2 x float>* [[TMP23]], align 4
; CHECK-NEXT: ret void
;
entry:
; CHECK-NEXT: bb1:
; CHECK-NEXT: [[LD:%.*]] = load double, double* undef, align 8
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[SHUFFLE]], <double 2.200000e+00, double 1.100000e+00>
-; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], <double 2.200000e+00, double 1.100000e+00>
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[LD]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], <double 2.200000e+00, double 1.100000e+00>
+; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], <double 2.200000e+00, double 1.100000e+00>
; CHECK-NEXT: [[PTRA1:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[PTRA1]] to <2 x double>*
-; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* [[TMP3]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[PTRA1]] to <2 x double>*
+; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb2:
-; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP2]], <double 4.400000e+00, double 3.300000e+00>
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
-; CHECK-NEXT: [[SEED:%.*]] = fcmp ogt double [[TMP6]], [[TMP5]]
+; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP3]], <double 4.400000e+00, double 3.300000e+00>
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i32 1
+; CHECK-NEXT: [[SEED:%.*]] = fcmp ogt double [[TMP7]], [[TMP6]]
; CHECK-NEXT: ret void
;
bb1:
; CHECK-NEXT: bb1:
; CHECK-NEXT: [[LD:%.*]] = load double, double* undef, align 8
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> [[SHUFFLE]], <double 1.100000e+00, double 1.200000e+00>
-; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[SHUFFLE]], <double 1.100000e+00, double 1.200000e+00>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], <double 2.100000e+00, double 2.200000e+00>
-; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], <double 3.100000e+00, double 3.200000e+00>
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[LD]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], <double 1.100000e+00, double 1.200000e+00>
+; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], <double 1.100000e+00, double 1.200000e+00>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP3]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP5:%.*]] = fdiv <2 x double> [[TMP4]], <double 2.100000e+00, double 2.200000e+00>
+; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], <double 3.100000e+00, double 3.200000e+00>
; CHECK-NEXT: [[PTRA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[PTRA0]] to <2 x double>*
-; CHECK-NEXT: store <2 x double> [[SHUFFLE1]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[PTRA0]] to <2 x double>*
+; CHECK-NEXT: store <2 x double> [[SHUFFLE]], <2 x double>* [[TMP7]], align 8
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb2:
-; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP5]], <double 4.100000e+00, double 4.200000e+00>
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1
-; CHECK-NEXT: [[SEED:%.*]] = fcmp ogt double [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP6]], <double 4.100000e+00, double 4.200000e+00>
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1
+; CHECK-NEXT: [[SEED:%.*]] = fcmp ogt double [[TMP9]], [[TMP10]]
; CHECK-NEXT: ret void
;
bb1:
; CHECK-NEXT: bb1:
; CHECK-NEXT: [[LD:%.*]] = load double, double* undef, align 8
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[SHUFFLE]], <double 1.200000e+00, double 1.100000e+00>
-; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[SHUFFLE]], <double 1.200000e+00, double 1.100000e+00>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], <double 2.200000e+00, double 2.100000e+00>
-; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], <double 3.200000e+00, double 3.100000e+00>
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[LD]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], <double 1.200000e+00, double 1.100000e+00>
+; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], <double 1.200000e+00, double 1.100000e+00>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP3]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT: [[TMP5:%.*]] = fdiv <2 x double> [[TMP4]], <double 2.200000e+00, double 2.100000e+00>
+; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], <double 3.200000e+00, double 3.100000e+00>
; CHECK-NEXT: [[PTRA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[PTRA0]] to <2 x double>*
-; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[PTRA0]] to <2 x double>*
+; CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb2:
-; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP5]], <double 4.200000e+00, double 4.100000e+00>
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1
-; CHECK-NEXT: [[SEED:%.*]] = fcmp ogt double [[TMP9]], [[TMP8]]
+; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP6]], <double 4.200000e+00, double 4.100000e+00>
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1
+; CHECK-NEXT: [[SEED:%.*]] = fcmp ogt double [[TMP10]], [[TMP9]]
; CHECK-NEXT: ret void
;
bb1:
; CHECK-NEXT: for.cond.preheader:
; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[FOR_INC_PREHEADER:%.*]]
; CHECK: for.inc.preheader:
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison, i32 undef>, i32 [[TMP0:%.*]], i32 6
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison>, i32 [[TMP0:%.*]], i32 6
; CHECK-NEXT: br i1 false, label [[FOR_END]], label [[L1_PREHEADER:%.*]]
; CHECK: for.end:
; CHECK-NEXT: [[DOTPR:%.*]] = phi i32 [ 0, [[FOR_INC_PREHEADER]] ], [ 0, [[FOR_COND_PREHEADER:%.*]] ]
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[DOTPR]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0>
; CHECK-NEXT: br label [[L1_PREHEADER]]
; CHECK: L1.preheader:
; CHECK-NEXT: [[TMP3:%.*]] = phi <8 x i32> [ [[SHUFFLE]], [[FOR_END]] ], [ [[TMP1]], [[FOR_INC_PREHEADER]] ]
; CHECK: else:
; CHECK-NEXT: [[ARG_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 8
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x ptr> poison, ptr [[ARG]], i32 0
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x ptr> [[TMP4]], <2 x ptr> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, <2 x ptr> [[SHUFFLE1]], <2 x i64> <i64 32, i64 24>
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG]], i32 3
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x ptr> [[TMP5]], <2 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x ptr> [[TMP6]], <4 x ptr> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 undef, i32 3>
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x ptr> [[TMP8]], ptr [[ARG_1]], i32 2
-; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP9]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> poison)
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x ptr> [[TMP4]], ptr [[ARG]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, <2 x ptr> [[TMP5]], <2 x i64> <i64 32, i64 24>
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG]], i32 3
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x ptr> [[TMP6]], <2 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x ptr> [[TMP7]], <4 x ptr> [[TMP8]], <4 x i32> <i32 4, i32 5, i32 undef, i32 3>
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x ptr> [[TMP9]], ptr [[ARG_1]], i32 2
+; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP10]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> poison)
; CHECK-NEXT: br label [[JOIN]]
; CHECK: join:
-; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i64> [ [[TMP3]], [[IF]] ], [ [[TMP10]], [[ELSE]] ]
+; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x i64> [ [[TMP3]], [[IF]] ], [ [[TMP11]], [[ELSE]] ]
; CHECK-NEXT: ret void
;
br i1 %c, label %if, label %else
; CHECK-NEXT: [[SUB:%.*]] = fsub float 6.553500e+04, undef
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> <float poison, float poison, float undef, float undef>, float [[SUB]], i32 0
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> poison, float [[SUB]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[CONV]], i32 1
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb2: