From 8b1eeafb91331c85570a172904460918c9785f29 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Sun, 29 Sep 2019 14:18:06 +0000 Subject: [PATCH] [SLP] Fix for PR31847: Assertion failed: (isLoopInvariant(Operands[i], L) && "SCEVAddRecExpr operand is not loop-invariant!") Initially SLP vectorizer replaced all going-to-be-vectorized instructions with Undef values. It may break ScalarEvaluation and may cause a crash. Reworked SLP vectorizer so that it does not replace vectorized instructions by UndefValue anymore. Instead vectorized instructions are marked for deletion inside if BoUpSLP class and deleted upon class destruction. Reviewers: mzolotukhin, mkuper, hfinkel, RKSimon, davide, spatel Subscribers: RKSimon, Gerolf, anemet, hans, majnemer, llvm-commits, sanjoy Differential Revision: https://reviews.llvm.org/D29641 llvm-svn: 373166 --- .../llvm/Transforms/Vectorize/SLPVectorizer.h | 9 +- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 152 +++--- .../SLPVectorizer/AArch64/gather-root.ll | 102 ++-- .../Transforms/SLPVectorizer/AArch64/horizontal.ll | 16 - .../SLPVectorizer/AArch64/spillcost-di.ll | 4 +- llvm/test/Transforms/SLPVectorizer/X86/PR31847.ll | 153 ++++++ .../test/Transforms/SLPVectorizer/X86/PR35628_1.ll | 13 +- .../test/Transforms/SLPVectorizer/X86/PR35628_2.ll | 5 - llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll | 72 +-- llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll | 16 - .../Transforms/SLPVectorizer/X86/bad-reduction.ll | 28 -- .../SLPVectorizer/X86/horizontal-list.ll | 354 +------------- .../SLPVectorizer/X86/horizontal-minmax.ll | 476 ++++-------------- .../Transforms/SLPVectorizer/X86/horizontal.ll | 148 ------ .../Transforms/SLPVectorizer/X86/long_chains.ll | 8 +- .../SLPVectorizer/X86/reassociated-loads.ll | 31 -- .../SLPVectorizer/X86/reduction_loads.ll | 24 - .../SLPVectorizer/X86/reduction_unrolled.ll | 35 +- .../Transforms/SLPVectorizer/X86/remark_horcost.ll | 4 - .../SLPVectorizer/X86/reorder_repeated_ops.ll | 22 - .../Transforms/SLPVectorizer/X86/undef_vect.ll | 10 - .../SLPVectorizer/X86/used-reduced-op.ll | 529 +++++++++++++++++++++ .../SLPVectorizer/X86/vectorize-reorder-reuse.ll | 42 -- 23 files changed, 918 insertions(+), 1335 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/PR31847.ll create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll diff --git a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h index ac6afb7..32ccc8a 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -24,7 +24,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/PassManager.h" -#include "llvm/IR/ValueHandle.h" namespace llvm { @@ -60,8 +59,8 @@ extern cl::opt RunSLPVectorization; struct SLPVectorizerPass : public PassInfoMixin { using StoreList = SmallVector; using StoreListMap = MapVector; - using WeakTrackingVHList = SmallVector; - using WeakTrackingVHListMap = MapVector; + using GEPList = SmallVector; + using GEPListMap = MapVector; ScalarEvolution *SE = nullptr; TargetTransformInfo *TTI = nullptr; @@ -131,7 +130,7 @@ private: /// Tries to vectorize constructs started from CmpInst, InsertValueInst or /// InsertElementInst instructions. - bool vectorizeSimpleInstructions(SmallVectorImpl &Instructions, + bool vectorizeSimpleInstructions(SmallVectorImpl &Instructions, BasicBlock *BB, slpvectorizer::BoUpSLP &R); /// Scan the basic block and look for patterns that are likely to start @@ -147,7 +146,7 @@ private: StoreListMap Stores; /// The getelementptr instructions in a basic block organized by base pointer. - WeakTrackingVHListMap GEPs; + GEPListMap GEPs; }; } // end namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 12c114f..6be03d1 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1121,6 +1121,14 @@ public: #endif }; + /// Checks if the instruction is marked for deletion. + bool isDeleted(Instruction *I) const { return DeletedInstructions.count(I); } + + /// Marks values operands for later deletion by replacing them with Undefs. + void eraseInstructions(ArrayRef AV); + + ~BoUpSLP(); + private: /// Checks if all users of \p I are the part of the vectorization tree. bool areAllUsersVectorized(Instruction *I) const; @@ -1490,15 +1498,14 @@ private: /// This is required to ensure that there are no incorrect collisions in the /// AliasCache, which can happen if a new instruction is allocated at the /// same address as a previously deleted instruction. - void eraseInstruction(Instruction *I) { - I->removeFromParent(); - I->dropAllReferences(); - DeletedInstructions.emplace_back(I); + void eraseInstruction(Instruction *I, bool ReplaceOpsWithUndef = false) { + auto It = DeletedInstructions.try_emplace(I, ReplaceOpsWithUndef).first; + It->getSecond() = It->getSecond() && ReplaceOpsWithUndef; } /// Temporary store for deleted instructions. Instructions will be deleted /// eventually when the BoUpSLP is destructed. - SmallVector DeletedInstructions; + DenseMap DeletedInstructions; /// A list of values that need to extracted out of the tree. /// This list holds pairs of (Internal Scalar : External User). External User @@ -2055,6 +2062,30 @@ template <> struct DOTGraphTraits : public DefaultDOTGraphTraits { } // end namespace llvm +BoUpSLP::~BoUpSLP() { + for (const auto &Pair : DeletedInstructions) { + // Replace operands of ignored instructions with Undefs in case if they were + // marked for deletion. + if (Pair.getSecond()) { + Value *Undef = UndefValue::get(Pair.getFirst()->getType()); + Pair.getFirst()->replaceAllUsesWith(Undef); + } + Pair.getFirst()->dropAllReferences(); + } + for (const auto &Pair : DeletedInstructions) { + assert(Pair.getFirst()->use_empty() && + "trying to erase instruction with users."); + Pair.getFirst()->eraseFromParent(); + } +} + +void BoUpSLP::eraseInstructions(ArrayRef AV) { + for (auto *V : AV) { + if (auto *I = dyn_cast(V)) + eraseInstruction(I, /*ReplaceWithUndef=*/true); + }; +} + void BoUpSLP::buildTree(ArrayRef Roots, ArrayRef UserIgnoreLst) { ExtraValueToDebugLocsMap ExternallyUsedValues; @@ -3541,7 +3572,7 @@ Value *BoUpSLP::Gather(ArrayRef VL, VectorType *Ty) { // Generate the 'InsertElement' instruction. for (unsigned i = 0; i < Ty->getNumElements(); ++i) { Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i)); - if (Instruction *Insrt = dyn_cast(Vec)) { + if (auto *Insrt = dyn_cast(Vec)) { GatherSeq.insert(Insrt); CSEBlocks.insert(Insrt->getParent()); @@ -4290,20 +4321,18 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { Value *Scalar = Entry->Scalars[Lane]; +#ifndef NDEBUG Type *Ty = Scalar->getType(); if (!Ty->isVoidTy()) { -#ifndef NDEBUG for (User *U : Scalar->users()) { LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); - // It is legal to replace users in the ignorelist by undef. + // It is legal to delete users in the ignorelist. assert((getTreeEntry(U) || is_contained(UserIgnoreList, U)) && - "Replacing out-of-tree value with undef"); + "Deleting out-of-tree value"); } -#endif - Value *Undef = UndefValue::get(Ty); - Scalar->replaceAllUsesWith(Undef); } +#endif LLVM_DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n"); eraseInstruction(cast(Scalar)); } @@ -4319,7 +4348,7 @@ void BoUpSLP::optimizeGatherSequence() { << " gather sequences instructions.\n"); // LICM InsertElementInst sequences. for (Instruction *I : GatherSeq) { - if (!isa(I) && !isa(I)) + if (isDeleted(I)) continue; // Check if this block is inside a loop. @@ -4373,6 +4402,8 @@ void BoUpSLP::optimizeGatherSequence() { // For all instructions in blocks containing gather sequences: for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) { Instruction *In = &*it++; + if (isDeleted(In)) + continue; if (!isa(In) && !isa(In)) continue; @@ -5255,19 +5286,6 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, return Changed; } -/// Check that the Values in the slice in VL array are still existent in -/// the WeakTrackingVH array. -/// Vectorization of part of the VL array may cause later values in the VL array -/// to become invalid. We track when this has happened in the WeakTrackingVH -/// array. -static bool hasValueBeenRAUWed(ArrayRef VL, - ArrayRef VH, unsigned SliceBegin, - unsigned SliceSize) { - VL = VL.slice(SliceBegin, SliceSize); - VH = VH.slice(SliceBegin, SliceSize); - return !std::equal(VL.begin(), VL.end(), VH.begin()); -} - bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, unsigned VecRegSize) { const unsigned ChainLen = Chain.size(); @@ -5279,20 +5297,20 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, if (!isPowerOf2_32(Sz) || VF < 2) return false; - // Keep track of values that were deleted by vectorizing in the loop below. - const SmallVector TrackValues(Chain.begin(), Chain.end()); - bool Changed = false; // Look for profitable vectorizable trees at all offsets, starting at zero. for (unsigned i = 0, e = ChainLen; i + VF <= e; ++i) { + ArrayRef Operands = Chain.slice(i, VF); // Check that a previous iteration of this loop did not delete the Value. - if (hasValueBeenRAUWed(Chain, TrackValues, i, VF)) + if (llvm::any_of(Operands, [&R](Value *V) { + auto *I = dyn_cast(V); + return I && R.isDeleted(I); + })) continue; LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i << "\n"); - ArrayRef Operands = Chain.slice(i, VF); R.buildTree(Operands); if (R.isTreeTinyAndNotFullyVectorizable()) @@ -5484,9 +5502,6 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, bool CandidateFound = false; int MinCost = SLPCostThreshold; - // Keep track of values that were deleted by vectorizing in the loop below. - SmallVector TrackValues(VL.begin(), VL.end()); - unsigned NextInst = 0, MaxInst = VL.size(); for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; VF /= 2) { // No actual vectorization should happen, if number of parts is the same as @@ -5506,13 +5521,16 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2) break; + ArrayRef Ops = VL.slice(I, OpsWidth); // Check that a previous iteration of this loop did not delete the Value. - if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth)) + if (llvm::any_of(Ops, [&R](Value *V) { + auto *I = dyn_cast(V); + return I && R.isDeleted(I); + })) continue; LLVM_DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations " << "\n"); - ArrayRef Ops = VL.slice(I, OpsWidth); R.buildTree(Ops); Optional> Order = R.bestOrder(); @@ -5733,23 +5751,23 @@ class HorizontalReduction { case RK_Min: Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSLT(LHS, RHS) : Builder.CreateFCmpOLT(LHS, RHS); - break; + return Builder.CreateSelect(Cmp, LHS, RHS, Name); case RK_Max: Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSGT(LHS, RHS) : Builder.CreateFCmpOGT(LHS, RHS); - break; + return Builder.CreateSelect(Cmp, LHS, RHS, Name); case RK_UMin: assert(Opcode == Instruction::ICmp && "Expected integer types."); Cmp = Builder.CreateICmpULT(LHS, RHS); - break; + return Builder.CreateSelect(Cmp, LHS, RHS, Name); case RK_UMax: assert(Opcode == Instruction::ICmp && "Expected integer types."); Cmp = Builder.CreateICmpUGT(LHS, RHS); - break; + return Builder.CreateSelect(Cmp, LHS, RHS, Name); case RK_None: - llvm_unreachable("Unknown reduction operation."); + break; } - return Builder.CreateSelect(Cmp, LHS, RHS, Name); + llvm_unreachable("Unknown reduction operation."); } public: @@ -6429,6 +6447,9 @@ public: } // Update users. ReductionRoot->replaceAllUsesWith(VectorizedTree); + // Mark all scalar reduction ops for deletion, they are replaced by the + // vector reductions. + V.eraseInstructions(IgnoreList); } return VectorizedTree != nullptr; } @@ -6683,18 +6704,13 @@ static bool tryToVectorizeHorReductionOrInstOperands( // horizontal reduction. // Interrupt the process if the Root instruction itself was vectorized or all // sub-trees not higher that RecursionMaxDepth were analyzed/vectorized. - SmallVector, 8> Stack(1, {Root, 0}); + SmallVector, 8> Stack(1, {Root, 0}); SmallPtrSet VisitedInstrs; bool Res = false; while (!Stack.empty()) { - Value *V; + Instruction *Inst; unsigned Level; - std::tie(V, Level) = Stack.pop_back_val(); - if (!V) - continue; - auto *Inst = dyn_cast(V); - if (!Inst) - continue; + std::tie(Inst, Level) = Stack.pop_back_val(); auto *BI = dyn_cast(Inst); auto *SI = dyn_cast(Inst); if (BI || SI) { @@ -6735,8 +6751,8 @@ static bool tryToVectorizeHorReductionOrInstOperands( for (auto *Op : Inst->operand_values()) if (VisitedInstrs.insert(Op).second) if (auto *I = dyn_cast(Op)) - if (!isa(I) && I->getParent() == BB) - Stack.emplace_back(Op, Level); + if (!isa(I) && !R.isDeleted(I) && I->getParent() == BB) + Stack.emplace_back(I, Level); } return Res; } @@ -6805,11 +6821,10 @@ bool SLPVectorizerPass::vectorizeCmpInst(CmpInst *CI, BasicBlock *BB, } bool SLPVectorizerPass::vectorizeSimpleInstructions( - SmallVectorImpl &Instructions, BasicBlock *BB, BoUpSLP &R) { + SmallVectorImpl &Instructions, BasicBlock *BB, BoUpSLP &R) { bool OpsChanged = false; - for (auto &VH : reverse(Instructions)) { - auto *I = dyn_cast_or_null(VH); - if (!I) + for (auto *I : reverse(Instructions)) { + if (R.isDeleted(I)) continue; if (auto *LastInsertValue = dyn_cast(I)) OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R); @@ -6838,7 +6853,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (!P) break; - if (!VisitedInstrs.count(P)) + if (!VisitedInstrs.count(P) && !R.isDeleted(P)) Incoming.push_back(P); } @@ -6882,9 +6897,12 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { VisitedInstrs.clear(); - SmallVector PostProcessInstructions; + SmallVector PostProcessInstructions; SmallDenseSet KeyNodes; for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + // Skip instructions marked for the deletion. + if (R.isDeleted(&*it)) + continue; // We may go through BB multiple times so skip the one we have checked. if (!VisitedInstrs.insert(&*it).second) { if (it->use_empty() && KeyNodes.count(&*it) > 0 && @@ -6977,10 +6995,10 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { SetVector Candidates(GEPList.begin(), GEPList.end()); // Some of the candidates may have already been vectorized after we - // initially collected them. If so, the WeakTrackingVHs will have - // nullified the - // values, so remove them from the set of candidates. - Candidates.remove(nullptr); + // initially collected them. If so, they are marked as deleted, so remove + // them from the set of candidates. + Candidates.remove_if( + [&R](Value *I) { return R.isDeleted(cast(I)); }); // Remove from the set of candidates all pairs of getelementptrs with // constant differences. Such getelementptrs are likely not good @@ -6988,18 +7006,18 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { // computed from the other. We also ensure all candidate getelementptr // indices are unique. for (int I = 0, E = GEPList.size(); I < E && Candidates.size() > 1; ++I) { - auto *GEPI = cast(GEPList[I]); + auto *GEPI = GEPList[I]; if (!Candidates.count(GEPI)) continue; auto *SCEVI = SE->getSCEV(GEPList[I]); for (int J = I + 1; J < E && Candidates.size() > 1; ++J) { - auto *GEPJ = cast(GEPList[J]); + auto *GEPJ = GEPList[J]; auto *SCEVJ = SE->getSCEV(GEPList[J]); if (isa(SE->getMinusSCEV(SCEVI, SCEVJ))) { - Candidates.remove(GEPList[I]); - Candidates.remove(GEPList[J]); + Candidates.remove(GEPI); + Candidates.remove(GEPJ); } else if (GEPI->idx_begin()->get() == GEPJ->idx_begin()->get()) { - Candidates.remove(GEPList[J]); + Candidates.remove(GEPJ); } } } diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll index 5e6db8b..5f8cbd5 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll @@ -17,16 +17,8 @@ define void @PR28330(i32 %n) { ; DEFAULT: for.body: ; DEFAULT-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> -; DEFAULT-NEXT: [[P20:%.*]] = add i32 [[P17]], undef -; DEFAULT-NEXT: [[P22:%.*]] = add i32 [[P20]], undef -; DEFAULT-NEXT: [[P24:%.*]] = add i32 [[P22]], undef -; DEFAULT-NEXT: [[P26:%.*]] = add i32 [[P24]], undef -; DEFAULT-NEXT: [[P28:%.*]] = add i32 [[P26]], undef -; DEFAULT-NEXT: [[P30:%.*]] = add i32 [[P28]], undef -; DEFAULT-NEXT: [[P32:%.*]] = add i32 [[P30]], undef ; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; DEFAULT-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]] -; DEFAULT-NEXT: [[P34:%.*]] = add i32 [[P32]], undef ; DEFAULT-NEXT: br label [[FOR_BODY]] ; ; GATHER-LABEL: @PR28330( @@ -36,37 +28,30 @@ define void @PR28330(i32 %n) { ; GATHER-NEXT: br label [[FOR_BODY:%.*]] ; GATHER: for.body: ; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 -; GATHER-NEXT: [[TMP3:%.*]] = insertelement <8 x i1> undef, i1 [[TMP2]], i32 0 -; GATHER-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1 -; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i1> [[TMP3]], i1 [[TMP4]], i32 1 -; GATHER-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2 -; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i1> [[TMP5]], i1 [[TMP6]], i32 2 -; GATHER-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3 -; GATHER-NEXT: [[TMP9:%.*]] = insertelement <8 x i1> [[TMP7]], i1 [[TMP8]], i32 3 -; GATHER-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4 -; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i1> [[TMP9]], i1 [[TMP10]], i32 4 -; GATHER-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5 -; GATHER-NEXT: [[TMP13:%.*]] = insertelement <8 x i1> [[TMP11]], i1 [[TMP12]], i32 5 -; GATHER-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6 -; GATHER-NEXT: [[TMP15:%.*]] = insertelement <8 x i1> [[TMP13]], i1 [[TMP14]], i32 6 -; GATHER-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7 -; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP15]], i1 [[TMP16]], i32 7 +; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7 +; GATHER-NEXT: [[TMP3:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 +; GATHER-NEXT: [[TMP4:%.*]] = insertelement <8 x i1> undef, i1 [[TMP3]], i32 0 +; GATHER-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1 +; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i1> [[TMP4]], i1 [[TMP5]], i32 1 +; GATHER-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2 +; GATHER-NEXT: [[TMP8:%.*]] = insertelement <8 x i1> [[TMP6]], i1 [[TMP7]], i32 2 +; GATHER-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3 +; GATHER-NEXT: [[TMP10:%.*]] = insertelement <8 x i1> [[TMP8]], i1 [[TMP9]], i32 3 +; GATHER-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4 +; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i1> [[TMP10]], i1 [[TMP11]], i32 4 +; GATHER-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5 +; GATHER-NEXT: [[TMP14:%.*]] = insertelement <8 x i1> [[TMP12]], i1 [[TMP13]], i32 5 +; GATHER-NEXT: [[TMP15:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6 +; GATHER-NEXT: [[TMP16:%.*]] = insertelement <8 x i1> [[TMP14]], i1 [[TMP15]], i32 6 +; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP16]], i1 [[TMP2]], i32 7 ; GATHER-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32> , <8 x i32> ; GATHER-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]], i32 0 -; GATHER-NEXT: [[P20:%.*]] = add i32 [[P17]], [[TMP19]] ; GATHER-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]], i32 1 -; GATHER-NEXT: [[P22:%.*]] = add i32 [[P20]], [[TMP20]] ; GATHER-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]], i32 2 -; GATHER-NEXT: [[P24:%.*]] = add i32 [[P22]], [[TMP21]] ; GATHER-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]], i32 3 -; GATHER-NEXT: [[P26:%.*]] = add i32 [[P24]], [[TMP22]] ; GATHER-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]], i32 4 -; GATHER-NEXT: [[P28:%.*]] = add i32 [[P26]], [[TMP23]] ; GATHER-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]], i32 5 -; GATHER-NEXT: [[P30:%.*]] = add i32 [[P28]], [[TMP24]] ; GATHER-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]], i32 6 -; GATHER-NEXT: [[P32:%.*]] = add i32 [[P30]], [[TMP25]] ; GATHER-NEXT: [[TMP26:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0 ; GATHER-NEXT: [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32 [[TMP20]], i32 1 ; GATHER-NEXT: [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP21]], i32 2 @@ -78,7 +63,6 @@ define void @PR28330(i32 %n) { ; GATHER-NEXT: [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP33]], i32 7 ; GATHER-NEXT: [[TMP35:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP34]]) ; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP35]], [[P17]] -; GATHER-NEXT: [[P34:%.*]] = add i32 [[P32]], [[TMP33]] ; GATHER-NEXT: br label [[FOR_BODY]] ; ; MAX-COST-LABEL: @PR28330( @@ -169,16 +153,8 @@ define void @PR32038(i32 %n) { ; DEFAULT: for.body: ; DEFAULT-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> -; DEFAULT-NEXT: [[P20:%.*]] = add i32 -5, undef -; DEFAULT-NEXT: [[P22:%.*]] = add i32 [[P20]], undef -; DEFAULT-NEXT: [[P24:%.*]] = add i32 [[P22]], undef -; DEFAULT-NEXT: [[P26:%.*]] = add i32 [[P24]], undef -; DEFAULT-NEXT: [[P28:%.*]] = add i32 [[P26]], undef -; DEFAULT-NEXT: [[P30:%.*]] = add i32 [[P28]], undef -; DEFAULT-NEXT: [[P32:%.*]] = add i32 [[P30]], undef ; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; DEFAULT-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], -5 -; DEFAULT-NEXT: [[P34:%.*]] = add i32 [[P32]], undef ; DEFAULT-NEXT: br label [[FOR_BODY]] ; ; GATHER-LABEL: @PR32038( @@ -188,37 +164,30 @@ define void @PR32038(i32 %n) { ; GATHER-NEXT: br label [[FOR_BODY:%.*]] ; GATHER: for.body: ; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 -; GATHER-NEXT: [[TMP3:%.*]] = insertelement <8 x i1> undef, i1 [[TMP2]], i32 0 -; GATHER-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1 -; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i1> [[TMP3]], i1 [[TMP4]], i32 1 -; GATHER-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2 -; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i1> [[TMP5]], i1 [[TMP6]], i32 2 -; GATHER-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3 -; GATHER-NEXT: [[TMP9:%.*]] = insertelement <8 x i1> [[TMP7]], i1 [[TMP8]], i32 3 -; GATHER-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4 -; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i1> [[TMP9]], i1 [[TMP10]], i32 4 -; GATHER-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5 -; GATHER-NEXT: [[TMP13:%.*]] = insertelement <8 x i1> [[TMP11]], i1 [[TMP12]], i32 5 -; GATHER-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6 -; GATHER-NEXT: [[TMP15:%.*]] = insertelement <8 x i1> [[TMP13]], i1 [[TMP14]], i32 6 -; GATHER-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7 -; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP15]], i1 [[TMP16]], i32 7 +; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7 +; GATHER-NEXT: [[TMP3:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 +; GATHER-NEXT: [[TMP4:%.*]] = insertelement <8 x i1> undef, i1 [[TMP3]], i32 0 +; GATHER-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1 +; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i1> [[TMP4]], i1 [[TMP5]], i32 1 +; GATHER-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2 +; GATHER-NEXT: [[TMP8:%.*]] = insertelement <8 x i1> [[TMP6]], i1 [[TMP7]], i32 2 +; GATHER-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3 +; GATHER-NEXT: [[TMP10:%.*]] = insertelement <8 x i1> [[TMP8]], i1 [[TMP9]], i32 3 +; GATHER-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4 +; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i1> [[TMP10]], i1 [[TMP11]], i32 4 +; GATHER-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5 +; GATHER-NEXT: [[TMP14:%.*]] = insertelement <8 x i1> [[TMP12]], i1 [[TMP13]], i32 5 +; GATHER-NEXT: [[TMP15:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6 +; GATHER-NEXT: [[TMP16:%.*]] = insertelement <8 x i1> [[TMP14]], i1 [[TMP15]], i32 6 +; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP16]], i1 [[TMP2]], i32 7 ; GATHER-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32> , <8 x i32> ; GATHER-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]], i32 0 -; GATHER-NEXT: [[P20:%.*]] = add i32 -5, [[TMP19]] ; GATHER-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]], i32 1 -; GATHER-NEXT: [[P22:%.*]] = add i32 [[P20]], [[TMP20]] ; GATHER-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]], i32 2 -; GATHER-NEXT: [[P24:%.*]] = add i32 [[P22]], [[TMP21]] ; GATHER-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]], i32 3 -; GATHER-NEXT: [[P26:%.*]] = add i32 [[P24]], [[TMP22]] ; GATHER-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]], i32 4 -; GATHER-NEXT: [[P28:%.*]] = add i32 [[P26]], [[TMP23]] ; GATHER-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]], i32 5 -; GATHER-NEXT: [[P30:%.*]] = add i32 [[P28]], [[TMP24]] ; GATHER-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]], i32 6 -; GATHER-NEXT: [[P32:%.*]] = add i32 [[P30]], [[TMP25]] ; GATHER-NEXT: [[TMP26:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0 ; GATHER-NEXT: [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32 [[TMP20]], i32 1 ; GATHER-NEXT: [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP21]], i32 2 @@ -230,7 +199,6 @@ define void @PR32038(i32 %n) { ; GATHER-NEXT: [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP33]], i32 7 ; GATHER-NEXT: [[TMP35:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP34]]) ; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP35]], -5 -; GATHER-NEXT: [[P34:%.*]] = add i32 [[P32]], [[TMP33]] ; GATHER-NEXT: br label [[FOR_BODY]] ; ; MAX-COST-LABEL: @PR32038( @@ -259,18 +227,12 @@ define void @PR32038(i32 %n) { ; MAX-COST-NEXT: [[TMP6:%.*]] = insertelement <4 x i1> [[TMP5]], i1 [[P5]], i32 2 ; MAX-COST-NEXT: [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1 [[P7]], i32 3 ; MAX-COST-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> , <4 x i32> -; MAX-COST-NEXT: [[P20:%.*]] = add i32 -5, undef -; MAX-COST-NEXT: [[P22:%.*]] = add i32 [[P20]], undef -; MAX-COST-NEXT: [[P24:%.*]] = add i32 [[P22]], undef -; MAX-COST-NEXT: [[P26:%.*]] = add i32 [[P24]], undef ; MAX-COST-NEXT: [[P27:%.*]] = select i1 [[P9]], i32 -720, i32 -80 -; MAX-COST-NEXT: [[P28:%.*]] = add i32 [[P26]], [[P27]] ; MAX-COST-NEXT: [[P29:%.*]] = select i1 [[P11]], i32 -720, i32 -80 ; MAX-COST-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) ; MAX-COST-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[P27]] ; MAX-COST-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[P29]] ; MAX-COST-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP11]], -5 -; MAX-COST-NEXT: [[P30:%.*]] = add i32 [[P28]], [[P29]] ; MAX-COST-NEXT: [[P31:%.*]] = select i1 [[P13]], i32 -720, i32 -80 ; MAX-COST-NEXT: [[P32:%.*]] = add i32 [[OP_EXTRA]], [[P31]] ; MAX-COST-NEXT: [[P33:%.*]] = select i1 [[P15]], i32 -720, i32 -80 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll index 7e4b95d..b3ce90d 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll @@ -46,12 +46,8 @@ define i32 @test_select(i32* noalias nocapture readonly %blk1, i32* noalias noca ; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]] -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, [[S_026]] -; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD]], undef -; CHECK-NEXT: [[ADD19:%.*]] = add nsw i32 [[ADD11]], undef ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) ; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP8]], [[S_026]] -; CHECK-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD19]], undef ; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, i32* [[P1_023]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD_PTR29]] = getelementptr inbounds i32, i32* [[P2_024]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[J_025]], 1 @@ -173,12 +169,8 @@ define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalia ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[P2_018]] to <4 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, [[S_020]] -; CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD]], undef -; CHECK-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD5]], undef ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP5]], [[S_020]] -; CHECK-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD9]], undef ; CHECK-NEXT: [[CMP14:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]] ; CHECK-NEXT: br i1 [[CMP14]], label [[IF_END]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: if.end: @@ -293,16 +285,8 @@ define i32 @test_unrolled_select(i8* noalias nocapture readonly %blk1, i8* noali ; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <8 x i32> [[TMP6]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = sub nsw <8 x i32> zeroinitializer, [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP7]], <8 x i32> [[TMP8]], <8 x i32> [[TMP6]] -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, [[S_047]] -; CHECK-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD]], undef -; CHECK-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD16]], undef -; CHECK-NEXT: [[ADD38:%.*]] = add nsw i32 [[ADD27]], undef -; CHECK-NEXT: [[ADD49:%.*]] = add nsw i32 [[ADD38]], undef -; CHECK-NEXT: [[ADD60:%.*]] = add nsw i32 [[ADD49]], undef -; CHECK-NEXT: [[ADD71:%.*]] = add nsw i32 [[ADD60]], undef ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP9]]) ; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP10]], [[S_047]] -; CHECK-NEXT: [[ADD82:%.*]] = add nsw i32 [[ADD71]], undef ; CHECK-NEXT: [[CMP83:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]] ; CHECK-NEXT: br i1 [[CMP83]], label [[IF_END_86]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: if.end.86: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll index 98a9fd4..90771e4 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll @@ -13,11 +13,11 @@ define void @patatino(i64 %n, i64 %i, %struct.S* %p) !dbg !7 { ; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[I:%.*]], metadata !19, metadata !DIExpression()), !dbg !24 ; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.S* [[P:%.*]], metadata !20, metadata !DIExpression()), !dbg !25 ; CHECK-NEXT: [[X1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[P]], i64 [[N]], i32 0, !dbg !26 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 undef, metadata !21, metadata !DIExpression()), !dbg !27 +; CHECK-NEXT: call void @llvm.dbg.value(metadata !2, metadata !21, metadata !DIExpression()), !dbg !27 ; CHECK-NEXT: [[Y3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[N]], i32 1, !dbg !28 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[X1]] to <2 x i64>*, !dbg !26 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8, !dbg !26, !tbaa !29 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 undef, metadata !22, metadata !DIExpression()), !dbg !33 +; CHECK-NEXT: call void @llvm.dbg.value(metadata !2, metadata !22, metadata !DIExpression()), !dbg !33 ; CHECK-NEXT: [[X5:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[I]], i32 0, !dbg !34 ; CHECK-NEXT: [[Y7:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[I]], i32 1, !dbg !35 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[X5]] to <2 x i64>*, !dbg !36 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR31847.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR31847.ll new file mode 100644 index 0000000..2fe3424 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR31847.ll @@ -0,0 +1,153 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -slp-vectorizer -S -o - -mtriple=i386 -mcpu=haswell < %s | FileCheck %s +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" + +@shift = common local_unnamed_addr global [10 x i32] zeroinitializer, align 4 +@data = common local_unnamed_addr global [10 x i8*] zeroinitializer, align 4 + +define void @flat(i32 %intensity) { +; CHECK-LABEL: @flat( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 0), align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 1), align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 0), align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 1), align 4 +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 1, [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 [[SHR]] +; CHECK-NEXT: [[SHR1:%.*]] = lshr i32 1, [[TMP1]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i32 [[SHR1]] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[D1_DATA_046:%.*]] = phi i8* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ADD_PTR23_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Y_045:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP4]] to i32 +; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[CONV]], -128 +; CHECK-NEXT: [[TMP5:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[CONV3:%.*]] = zext i8 [[TMP5]] to i32 +; CHECK-NEXT: [[SUB4:%.*]] = add nsw i32 [[CONV3]], -128 +; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[SUB]], -1 +; CHECK-NEXT: [[SUB7:%.*]] = sub nsw i32 128, [[CONV]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP5]], i32 [[SUB]], i32 [[SUB7]] +; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[SUB4]], -1 +; CHECK-NEXT: [[SUB12:%.*]] = sub nsw i32 128, [[CONV3]] +; CHECK-NEXT: [[COND14:%.*]] = select i1 [[CMP8]], i32 [[SUB4]], i32 [[SUB12]] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[COND14]], [[COND]] +; CHECK-NEXT: [[IDX_NEG:%.*]] = sub nsw i32 0, [[ADD]] +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[IDX_NEG]] +; CHECK-NEXT: [[TMP6:%.*]] = load i8, i8* [[ADD_PTR]], align 1 +; CHECK-NEXT: [[CONV15:%.*]] = zext i8 [[TMP6]] to i32 +; CHECK-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], [[INTENSITY:%.*]] +; CHECK-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 +; CHECK-NEXT: store i8 [[CONV17]], i8* [[ADD_PTR]], align 1 +; CHECK-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[ADD]] +; CHECK-NEXT: [[TMP7:%.*]] = load i8, i8* [[ADD_PTR18]], align 1 +; CHECK-NEXT: [[NOT_TOBOOL:%.*]] = icmp eq i8 [[TMP7]], 0 +; CHECK-NEXT: [[CONV21:%.*]] = zext i1 [[NOT_TOBOOL]] to i8 +; CHECK-NEXT: store i8 [[CONV21]], i8* [[ADD_PTR18]], align 1 +; CHECK-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CONV_1:%.*]] = zext i8 [[TMP8]] to i32 +; CHECK-NEXT: [[SUB_1:%.*]] = add nsw i32 [[CONV_1]], -128 +; CHECK-NEXT: [[TMP9:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[CONV3_1:%.*]] = zext i8 [[TMP9]] to i32 +; CHECK-NEXT: [[SUB4_1:%.*]] = add nsw i32 [[CONV3_1]], -128 +; CHECK-NEXT: [[CMP5_1:%.*]] = icmp sgt i32 [[SUB_1]], -1 +; CHECK-NEXT: [[SUB7_1:%.*]] = sub nsw i32 128, [[CONV_1]] +; CHECK-NEXT: [[COND_1:%.*]] = select i1 [[CMP5_1]], i32 [[SUB_1]], i32 [[SUB7_1]] +; CHECK-NEXT: [[CMP8_1:%.*]] = icmp sgt i32 [[SUB4_1]], -1 +; CHECK-NEXT: [[SUB12_1:%.*]] = sub nsw i32 128, [[CONV3_1]] +; CHECK-NEXT: [[COND14_1:%.*]] = select i1 [[CMP8_1]], i32 [[SUB4_1]], i32 [[SUB12_1]] +; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[COND14_1]], [[COND_1]] +; CHECK-NEXT: [[IDX_NEG_1:%.*]] = sub nsw i32 0, [[ADD_1]] +; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[IDX_NEG_1]] +; CHECK-NEXT: [[TMP10:%.*]] = load i8, i8* [[ADD_PTR_1]], align 1 +; CHECK-NEXT: [[CONV15_1:%.*]] = zext i8 [[TMP10]] to i32 +; CHECK-NEXT: [[ADD16_1:%.*]] = add nsw i32 [[CONV15_1]], [[INTENSITY]] +; CHECK-NEXT: [[CONV17_1:%.*]] = trunc i32 [[ADD16_1]] to i8 +; CHECK-NEXT: store i8 [[CONV17_1]], i8* [[ADD_PTR_1]], align 1 +; CHECK-NEXT: [[ADD_PTR18_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[ADD_1]] +; CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[ADD_PTR18_1]], align 1 +; CHECK-NEXT: [[NOT_TOBOOL_1:%.*]] = icmp eq i8 [[TMP11]], 0 +; CHECK-NEXT: [[CONV21_1:%.*]] = zext i1 [[NOT_TOBOOL_1]] to i8 +; CHECK-NEXT: store i8 [[CONV21_1]], i8* [[ADD_PTR18_1]], align 1 +; CHECK-NEXT: [[ADD_PTR23_1]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[TMP1]] +; CHECK-NEXT: [[INC_1]] = add nsw i32 [[Y_045]], 2 +; CHECK-NEXT: [[EXITCOND_1:%.*]] = icmp eq i32 [[INC_1]], 128 +; CHECK-NEXT: br i1 [[EXITCOND_1]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; +entry: + %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 0), align 4 + %1 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 1), align 4 + %2 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 0), align 4 + %3 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 1), align 4 + %shr = lshr i32 1, %0 + %arrayidx = getelementptr inbounds i8, i8* %2, i32 %shr + %shr1 = lshr i32 1, %1 + %arrayidx2 = getelementptr inbounds i8, i8* %3, i32 %shr1 + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret void + +for.body: ; preds = %for.body, %entry + %d1_data.046 = phi i8* [ %3, %entry ], [ %add.ptr23.1, %for.body ] + %y.045 = phi i32 [ 0, %entry ], [ %inc.1, %for.body ] + %4 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %4 to i32 + %sub = add nsw i32 %conv, -128 + %5 = load i8, i8* %arrayidx2, align 1 + %conv3 = zext i8 %5 to i32 + %sub4 = add nsw i32 %conv3, -128 + %cmp5 = icmp sgt i32 %sub, -1 + %sub7 = sub nsw i32 128, %conv + %cond = select i1 %cmp5, i32 %sub, i32 %sub7 + %cmp8 = icmp sgt i32 %sub4, -1 + %sub12 = sub nsw i32 128, %conv3 + %cond14 = select i1 %cmp8, i32 %sub4, i32 %sub12 + %add = add nsw i32 %cond14, %cond + %idx.neg = sub nsw i32 0, %add + %add.ptr = getelementptr inbounds i8, i8* %d1_data.046, i32 %idx.neg + %6 = load i8, i8* %add.ptr, align 1 + %conv15 = zext i8 %6 to i32 + %add16 = add nsw i32 %conv15, %intensity + %conv17 = trunc i32 %add16 to i8 + store i8 %conv17, i8* %add.ptr, align 1 + %add.ptr18 = getelementptr inbounds i8, i8* %d1_data.046, i32 %add + %7 = load i8, i8* %add.ptr18, align 1 + %not.tobool = icmp eq i8 %7, 0 + %conv21 = zext i1 %not.tobool to i8 + store i8 %conv21, i8* %add.ptr18, align 1 + %add.ptr23 = getelementptr inbounds i8, i8* %d1_data.046, i32 %1 + %8 = load i8, i8* %arrayidx, align 1 + %conv.1 = zext i8 %8 to i32 + %sub.1 = add nsw i32 %conv.1, -128 + %9 = load i8, i8* %arrayidx2, align 1 + %conv3.1 = zext i8 %9 to i32 + %sub4.1 = add nsw i32 %conv3.1, -128 + %cmp5.1 = icmp sgt i32 %sub.1, -1 + %sub7.1 = sub nsw i32 128, %conv.1 + %cond.1 = select i1 %cmp5.1, i32 %sub.1, i32 %sub7.1 + %cmp8.1 = icmp sgt i32 %sub4.1, -1 + %sub12.1 = sub nsw i32 128, %conv3.1 + %cond14.1 = select i1 %cmp8.1, i32 %sub4.1, i32 %sub12.1 + %add.1 = add nsw i32 %cond14.1, %cond.1 + %idx.neg.1 = sub nsw i32 0, %add.1 + %add.ptr.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %idx.neg.1 + %10 = load i8, i8* %add.ptr.1, align 1 + %conv15.1 = zext i8 %10 to i32 + %add16.1 = add nsw i32 %conv15.1, %intensity + %conv17.1 = trunc i32 %add16.1 to i8 + store i8 %conv17.1, i8* %add.ptr.1, align 1 + %add.ptr18.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %add.1 + %11 = load i8, i8* %add.ptr18.1, align 1 + %not.tobool.1 = icmp eq i8 %11, 0 + %conv21.1 = zext i1 %not.tobool.1 to i8 + store i8 %conv21.1, i8* %add.ptr18.1, align 1 + %add.ptr23.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %1 + %inc.1 = add nsw i32 %y.045, 2 + %exitcond.1 = icmp eq i32 %inc.1, 128 + br i1 %exitcond.1, label %for.cond.cleanup, label %for.body +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll index 625748f6..e033ed7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll @@ -18,23 +18,16 @@ define void @mainTest(i32* %ptr) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[TMP4]], [[TMP4]] -; CHECK-NEXT: [[TMP9:%.*]] = add i32 1, undef -; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP7]] -; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], undef -; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], [[TMP6]] -; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], undef -; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP6]] to i64 -; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP13]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP6]] to i64 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP8]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP16]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 +; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP10]], 1 ; CHECK-NEXT: [[OP_EXTRA3:%.*]] = add i32 [[OP_EXTRA]], [[TMP7]] ; CHECK-NEXT: [[OP_EXTRA4:%.*]] = add i32 [[OP_EXTRA3]], [[TMP6]] ; CHECK-NEXT: [[OP_EXTRA5]] = add i32 [[OP_EXTRA4]], [[TMP5]] -; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP15]], undef ; CHECK-NEXT: br label [[LOOP]] ; CHECK: bail_out: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll index 712ff04..4916d28 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll @@ -20,10 +20,6 @@ define void @test() #0 { ; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP7]], 32 ; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> , [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = ashr exact <4 x i64> [[TMP8]], -; CHECK-NEXT: [[SUM1:%.*]] = add i64 undef, undef -; CHECK-NEXT: [[SUM2:%.*]] = add i64 [[SUM1]], undef -; CHECK-NEXT: [[ZSUM:%.*]] = add i64 [[SUM2]], 0 -; CHECK-NEXT: [[JOIN:%.*]] = add i64 [[TMP6]], [[ZSUM]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i64> [[TMP9]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> @@ -31,7 +27,6 @@ define void @test() #0 { ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[BIN_RDX2]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i64 [[TMP10]], 0 ; CHECK-NEXT: [[OP_EXTRA3]] = add i64 [[OP_EXTRA]], [[TMP6]] -; CHECK-NEXT: [[LAST:%.*]] = add i64 [[JOIN]], undef ; CHECK-NEXT: br label [[LOOP]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll index 24f75b3..db02d64 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll @@ -11,40 +11,6 @@ define void @Test(i32) { ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef -; CHECK-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]] -; CHECK-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]] -; CHECK-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]] -; CHECK-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]] -; CHECK-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef -; CHECK-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]] -; CHECK-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]] -; CHECK-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]] -; CHECK-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef -; CHECK-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]] -; CHECK-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]] -; CHECK-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]] -; CHECK-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]] -; CHECK-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]] -; CHECK-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef -; CHECK-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], undef -; CHECK-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]] -; CHECK-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]] -; CHECK-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]] -; CHECK-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]] -; CHECK-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]] -; CHECK-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]] -; CHECK-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]] -; CHECK-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]] -; CHECK-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]] -; CHECK-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]] -; CHECK-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]] -; CHECK-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]] -; CHECK-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], undef -; CHECK-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]] -; CHECK-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]] -; CHECK-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]] -; CHECK-NEXT: [[VAL_40:%.*]] = and i32 [[VAL_38]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = and <8 x i32> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -52,7 +18,7 @@ define void @Test(i32) { ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0:%.*]] ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]] ; CHECK-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]] ; CHECK-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]] @@ -79,7 +45,6 @@ define void @Test(i32) { ; CHECK-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]] ; CHECK-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP0]] ; CHECK-NEXT: [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]] -; CHECK-NEXT: [[VAL_42:%.*]] = and i32 [[VAL_40]], undef ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> undef, i32 [[OP_EXTRA30]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 14910, i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0 @@ -101,40 +66,8 @@ define void @Test(i32) { ; FORCE_REDUCTION-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32> ; FORCE_REDUCTION-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1 ; FORCE_REDUCTION-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], -; FORCE_REDUCTION-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef -; FORCE_REDUCTION-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]] -; FORCE_REDUCTION-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef -; FORCE_REDUCTION-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef -; FORCE_REDUCTION-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef ; FORCE_REDUCTION-NEXT: [[VAL_20:%.*]] = add i32 [[TMP2]], 1496 -; FORCE_REDUCTION-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], [[VAL_20]] -; FORCE_REDUCTION-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]] ; FORCE_REDUCTION-NEXT: [[VAL_34:%.*]] = add i32 [[TMP2]], 8555 -; FORCE_REDUCTION-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], [[VAL_34]] -; FORCE_REDUCTION-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]] ; FORCE_REDUCTION-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <4 x i32> ; FORCE_REDUCTION-NEXT: [[BIN_RDX:%.*]] = and <4 x i32> [[TMP3]], [[RDX_SHUF]] ; FORCE_REDUCTION-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> @@ -142,7 +75,7 @@ define void @Test(i32) { ; FORCE_REDUCTION-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 ; FORCE_REDUCTION-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], [[VAL_20]] ; FORCE_REDUCTION-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], [[VAL_34]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP6]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP6]], [[TMP0:%.*]] ; FORCE_REDUCTION-NEXT: [[OP_EXTRA3:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]] ; FORCE_REDUCTION-NEXT: [[OP_EXTRA4:%.*]] = and i32 [[OP_EXTRA3]], [[TMP0]] ; FORCE_REDUCTION-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA4]], [[TMP0]] @@ -170,7 +103,6 @@ define void @Test(i32) { ; FORCE_REDUCTION-NEXT: [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]] ; FORCE_REDUCTION-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]] ; FORCE_REDUCTION-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP2]] -; FORCE_REDUCTION-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]] ; FORCE_REDUCTION-NEXT: [[VAL_39:%.*]] = add i32 [[TMP2]], 12529 ; FORCE_REDUCTION-NEXT: [[VAL_40:%.*]] = and i32 [[OP_EXTRA29]], [[VAL_39]] ; FORCE_REDUCTION-NEXT: [[VAL_41:%.*]] = add i32 [[TMP2]], 13685 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll index 2a0b66e..4622c77 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll @@ -13,21 +13,6 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) { ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 15 ; CHECK-NEXT: store atomic i32 [[TMP3]], i32* [[VALS:%.*]] unordered, align 4 ; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[V14:%.*]] = and i32 [[TMP2]], undef -; CHECK-NEXT: [[V16:%.*]] = and i32 undef, [[V14]] -; CHECK-NEXT: [[V18:%.*]] = and i32 undef, [[V16]] -; CHECK-NEXT: [[V20:%.*]] = and i32 undef, [[V18]] -; CHECK-NEXT: [[V22:%.*]] = and i32 undef, [[V20]] -; CHECK-NEXT: [[V24:%.*]] = and i32 undef, [[V22]] -; CHECK-NEXT: [[V26:%.*]] = and i32 undef, [[V24]] -; CHECK-NEXT: [[V28:%.*]] = and i32 undef, [[V26]] -; CHECK-NEXT: [[V30:%.*]] = and i32 undef, [[V28]] -; CHECK-NEXT: [[V32:%.*]] = and i32 undef, [[V30]] -; CHECK-NEXT: [[V34:%.*]] = and i32 undef, [[V32]] -; CHECK-NEXT: [[V36:%.*]] = and i32 undef, [[V34]] -; CHECK-NEXT: [[V38:%.*]] = and i32 undef, [[V36]] -; CHECK-NEXT: [[V40:%.*]] = and i32 undef, [[V38]] -; CHECK-NEXT: [[V42:%.*]] = and i32 undef, [[V40]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> undef, <16 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = and <16 x i32> [[TMP4]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[BIN_RDX]], <16 x i32> undef, <16 x i32> @@ -38,7 +23,6 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) { ; CHECK-NEXT: [[BIN_RDX6:%.*]] = and <16 x i32> [[BIN_RDX4]], [[RDX_SHUF5]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP5]], [[TMP2]] -; CHECK-NEXT: [[V43:%.*]] = and i32 undef, [[V42]] ; CHECK-NEXT: [[V44:%.*]] = add i32 [[TMP2]], 16 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> undef, i32 [[V44]], i32 0 ; CHECK-NEXT: [[TMP7]] = insertelement <2 x i32> [[TMP6]], i32 [[OP_EXTRA]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll index 8637df4..e3452e1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll @@ -30,12 +30,6 @@ define i64 @load_bswap(%v8i8* %p) { ; CHECK-NEXT: [[SH4:%.*]] = shl nuw nsw i64 [[Z4]], 24 ; CHECK-NEXT: [[SH5:%.*]] = shl nuw nsw i64 [[Z5]], 16 ; CHECK-NEXT: [[SH6:%.*]] = shl nuw nsw i64 [[Z6]], 8 -; CHECK-NEXT: [[OR01:%.*]] = or i64 undef, undef -; CHECK-NEXT: [[OR012:%.*]] = or i64 [[OR01]], undef -; CHECK-NEXT: [[OR0123:%.*]] = or i64 [[OR012]], undef -; CHECK-NEXT: [[OR01234:%.*]] = or i64 [[OR0123]], [[SH4]] -; CHECK-NEXT: [[OR012345:%.*]] = or i64 [[OR01234]], [[SH5]] -; CHECK-NEXT: [[OR0123456:%.*]] = or i64 [[OR012345]], [[SH6]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> @@ -45,7 +39,6 @@ define i64 @load_bswap(%v8i8* %p) { ; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[SH5]] ; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], [[SH6]] ; CHECK-NEXT: [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z7]] -; CHECK-NEXT: [[OR01234567:%.*]] = or i64 [[OR0123456]], [[Z7]] ; CHECK-NEXT: ret i64 [[OP_EXTRA]] ; %g0 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 0 @@ -108,12 +101,6 @@ define i64 @load_bswap_nop_shift(%v8i8* %p) { ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]], -; CHECK-NEXT: [[OR01:%.*]] = or i64 undef, undef -; CHECK-NEXT: [[OR012:%.*]] = or i64 [[OR01]], undef -; CHECK-NEXT: [[OR0123:%.*]] = or i64 [[OR012]], undef -; CHECK-NEXT: [[OR01234:%.*]] = or i64 [[OR0123]], undef -; CHECK-NEXT: [[OR012345:%.*]] = or i64 [[OR01234]], undef -; CHECK-NEXT: [[OR0123456:%.*]] = or i64 [[OR012345]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i64> [[BIN_RDX]], <8 x i64> undef, <8 x i32> @@ -121,7 +108,6 @@ define i64 @load_bswap_nop_shift(%v8i8* %p) { ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i64> [[BIN_RDX2]], <8 x i64> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]], i32 0 -; CHECK-NEXT: [[OR01234567:%.*]] = or i64 [[OR0123456]], undef ; CHECK-NEXT: ret i64 [[TMP5]] ; %g0 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 0 @@ -196,12 +182,6 @@ define i64 @load64le(i8* %arg) { ; CHECK-NEXT: [[S5:%.*]] = shl nuw nsw i64 [[Z5]], 40 ; CHECK-NEXT: [[S6:%.*]] = shl nuw nsw i64 [[Z6]], 48 ; CHECK-NEXT: [[S7:%.*]] = shl nuw i64 [[Z7]], 56 -; CHECK-NEXT: [[O1:%.*]] = or i64 undef, [[Z0]] -; CHECK-NEXT: [[O2:%.*]] = or i64 [[O1]], undef -; CHECK-NEXT: [[O3:%.*]] = or i64 [[O2]], undef -; CHECK-NEXT: [[O4:%.*]] = or i64 [[O3]], undef -; CHECK-NEXT: [[O5:%.*]] = or i64 [[O4]], [[S5]] -; CHECK-NEXT: [[O6:%.*]] = or i64 [[O5]], [[S6]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> @@ -211,7 +191,6 @@ define i64 @load64le(i8* %arg) { ; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[S6]] ; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], [[S7]] ; CHECK-NEXT: [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z0]] -; CHECK-NEXT: [[O7:%.*]] = or i64 [[O6]], [[S7]] ; CHECK-NEXT: ret i64 [[OP_EXTRA]] ; %g1 = getelementptr inbounds i8, i8* %arg, i64 1 @@ -272,12 +251,6 @@ define i64 @load64le_nop_shift(i8* %arg) { ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]], -; CHECK-NEXT: [[O1:%.*]] = or i64 undef, undef -; CHECK-NEXT: [[O2:%.*]] = or i64 [[O1]], undef -; CHECK-NEXT: [[O3:%.*]] = or i64 [[O2]], undef -; CHECK-NEXT: [[O4:%.*]] = or i64 [[O3]], undef -; CHECK-NEXT: [[O5:%.*]] = or i64 [[O4]], undef -; CHECK-NEXT: [[O6:%.*]] = or i64 [[O5]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i64> [[BIN_RDX]], <8 x i64> undef, <8 x i32> @@ -285,7 +258,6 @@ define i64 @load64le_nop_shift(i8* %arg) { ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i64> [[BIN_RDX2]], <8 x i64> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]], i32 0 -; CHECK-NEXT: [[O7:%.*]] = or i64 [[O6]], undef ; CHECK-NEXT: ret i64 [[TMP5]] ; %g1 = getelementptr inbounds i8, i8* %arg, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll index 6c3994b..1a2c66c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -100,16 +100,8 @@ define float @bazz() { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]] -; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] -; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] ; CHECK-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2 ; CHECK-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float -; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]] -; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float undef, [[ADD7]] -; CHECK-NEXT: [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]] -; CHECK-NEXT: [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -119,7 +111,6 @@ define float @bazz() { ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]] ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]] -; CHECK-NEXT: [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]] ; CHECK-NEXT: store float [[OP_EXTRA5]], float* @res, align 4 ; CHECK-NEXT: ret float [[OP_EXTRA5]] ; @@ -131,16 +122,8 @@ define float @bazz() { ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]] -; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]] -; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] -; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] ; THRESHOLD-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2 ; THRESHOLD-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float -; THRESHOLD-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]] -; THRESHOLD-NEXT: [[ADD19:%.*]] = fadd fast float undef, [[ADD7]] -; THRESHOLD-NEXT: [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]] -; THRESHOLD-NEXT: [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -150,7 +133,6 @@ define float @bazz() { ; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]] ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]] -; THRESHOLD-NEXT: [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]] ; THRESHOLD-NEXT: store float [[OP_EXTRA5]], float* @res, align 4 ; THRESHOLD-NEXT: ret float [[OP_EXTRA5]] ; @@ -205,17 +187,14 @@ define float @bazzz() { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef -; CHECK-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]] -; CHECK-NEXT: store float [[TMP8]], float* @res, align 4 -; CHECK-NEXT: ret float [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] +; CHECK-NEXT: store float [[TMP5]], float* @res, align 4 +; CHECK-NEXT: ret float [[TMP5]] ; ; THRESHOLD-LABEL: @bazzz( ; THRESHOLD-NEXT: entry: @@ -224,17 +203,14 @@ define float @bazzz() { ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] -; THRESHOLD-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef -; THRESHOLD-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]] -; THRESHOLD-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]] -; THRESHOLD-NEXT: store float [[TMP8]], float* @res, align 4 -; THRESHOLD-NEXT: ret float [[TMP8]] +; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] +; THRESHOLD-NEXT: store float [[TMP5]], float* @res, align 4 +; THRESHOLD-NEXT: ret float [[TMP5]] ; entry: %0 = load i32, i32* @n, align 4 @@ -267,16 +243,13 @@ define i32 @foo() { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef -; CHECK-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]] -; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP8]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] +; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32 ; CHECK-NEXT: store i32 [[CONV4]], i32* @n, align 4 ; CHECK-NEXT: ret i32 [[CONV4]] ; @@ -287,16 +260,13 @@ define i32 @foo() { ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] -; THRESHOLD-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef -; THRESHOLD-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]] -; THRESHOLD-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]] -; THRESHOLD-NEXT: [[CONV4:%.*]] = fptosi float [[TMP8]] to i32 +; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] +; THRESHOLD-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32 ; THRESHOLD-NEXT: store i32 [[CONV4]], i32* @n, align 4 ; THRESHOLD-NEXT: ret i32 [[CONV4]] ; @@ -330,11 +300,6 @@ define float @bar() { ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]] -; CHECK-NEXT: [[CMP4:%.*]] = fcmp fast ogt float undef, undef -; CHECK-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float undef, float undef -; CHECK-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], undef -; CHECK-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float undef -; CHECK-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]] @@ -342,7 +307,6 @@ define float @bar() { ; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0 -; CHECK-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float undef ; CHECK-NEXT: store float [[TMP3]], float* @res, align 4 ; CHECK-NEXT: ret float [[TMP3]] ; @@ -351,11 +315,6 @@ define float @bar() { ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]] -; THRESHOLD-NEXT: [[CMP4:%.*]] = fcmp fast ogt float undef, undef -; THRESHOLD-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float undef, float undef -; THRESHOLD-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], undef -; THRESHOLD-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float undef -; THRESHOLD-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], undef ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float> [[TMP2]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]] @@ -363,7 +322,6 @@ define float @bar() { ; THRESHOLD-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; THRESHOLD-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]] ; THRESHOLD-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0 -; THRESHOLD-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float undef ; THRESHOLD-NEXT: store float [[TMP3]], float* @res, align 4 ; THRESHOLD-NEXT: ret float [[TMP3]] ; @@ -410,21 +368,6 @@ define float @f(float* nocapture readonly %x) { ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, undef -; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] -; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] -; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] -; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] -; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] -; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] -; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] -; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] -; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] -; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] -; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] -; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] -; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 ; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 @@ -459,37 +402,6 @@ define float @f(float* nocapture readonly %x) { ; CHECK-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>* ; CHECK-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4 -; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] -; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] -; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] -; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] -; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] -; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] -; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] -; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] -; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] -; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] -; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] -; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] -; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] -; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] -; CHECK-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]] -; CHECK-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]] -; CHECK-NEXT: [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]] -; CHECK-NEXT: [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]] -; CHECK-NEXT: [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]] -; CHECK-NEXT: [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]] -; CHECK-NEXT: [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]] -; CHECK-NEXT: [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]] -; CHECK-NEXT: [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]] -; CHECK-NEXT: [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]] -; CHECK-NEXT: [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]] -; CHECK-NEXT: [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]] -; CHECK-NEXT: [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]] -; CHECK-NEXT: [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]] -; CHECK-NEXT: [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]] -; CHECK-NEXT: [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]] -; CHECK-NEXT: [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP3]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> @@ -511,7 +423,6 @@ define float @f(float* nocapture readonly %x) { ; CHECK-NEXT: [[BIN_RDX16:%.*]] = fadd fast <16 x float> [[BIN_RDX14]], [[RDX_SHUF15]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x float> [[BIN_RDX16]], i32 0 ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]] -; CHECK-NEXT: [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]] ; CHECK-NEXT: ret float [[OP_RDX]] ; ; THRESHOLD-LABEL: @f( @@ -533,21 +444,6 @@ define float @f(float* nocapture readonly %x) { ; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4 -; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float undef, undef -; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] -; THRESHOLD-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] -; THRESHOLD-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] -; THRESHOLD-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] -; THRESHOLD-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] -; THRESHOLD-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] -; THRESHOLD-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] -; THRESHOLD-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] -; THRESHOLD-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] -; THRESHOLD-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] -; THRESHOLD-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] -; THRESHOLD-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] -; THRESHOLD-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; THRESHOLD-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 ; THRESHOLD-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 @@ -582,37 +478,6 @@ define float @f(float* nocapture readonly %x) { ; THRESHOLD-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47 ; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>* ; THRESHOLD-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4 -; THRESHOLD-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] -; THRESHOLD-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] -; THRESHOLD-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] -; THRESHOLD-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] -; THRESHOLD-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] -; THRESHOLD-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] -; THRESHOLD-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] -; THRESHOLD-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] -; THRESHOLD-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] -; THRESHOLD-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] -; THRESHOLD-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] -; THRESHOLD-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] -; THRESHOLD-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] -; THRESHOLD-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] -; THRESHOLD-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]] -; THRESHOLD-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]] -; THRESHOLD-NEXT: [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]] -; THRESHOLD-NEXT: [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]] -; THRESHOLD-NEXT: [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]] -; THRESHOLD-NEXT: [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]] -; THRESHOLD-NEXT: [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]] -; THRESHOLD-NEXT: [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]] -; THRESHOLD-NEXT: [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]] -; THRESHOLD-NEXT: [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]] -; THRESHOLD-NEXT: [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]] -; THRESHOLD-NEXT: [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]] -; THRESHOLD-NEXT: [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]] -; THRESHOLD-NEXT: [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]] -; THRESHOLD-NEXT: [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]] -; THRESHOLD-NEXT: [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]] -; THRESHOLD-NEXT: [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP3]], <32 x float> undef, <32 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> @@ -634,7 +499,6 @@ define float @f(float* nocapture readonly %x) { ; THRESHOLD-NEXT: [[BIN_RDX16:%.*]] = fadd fast <16 x float> [[BIN_RDX14]], [[RDX_SHUF15]] ; THRESHOLD-NEXT: [[TMP5:%.*]] = extractelement <16 x float> [[BIN_RDX16]], i32 0 ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]] -; THRESHOLD-NEXT: [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]] ; THRESHOLD-NEXT: ret float [[OP_RDX]] ; entry: @@ -821,37 +685,6 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) { ; CHECK-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]] -; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] -; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] -; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] -; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] -; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] -; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] -; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] -; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] -; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] -; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] -; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] -; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] -; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] -; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] -; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] -; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] -; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] -; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] -; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] -; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] -; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] -; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] -; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] -; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] -; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] -; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] -; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] -; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] -; CHECK-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> @@ -864,7 +697,6 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) { ; CHECK-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]] -; CHECK-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]] ; CHECK-NEXT: ret float [[OP_EXTRA]] ; ; THRESHOLD-LABEL: @f1( @@ -904,37 +736,6 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) { ; THRESHOLD-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 -; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]] -; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] -; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] -; THRESHOLD-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] -; THRESHOLD-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] -; THRESHOLD-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] -; THRESHOLD-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] -; THRESHOLD-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] -; THRESHOLD-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] -; THRESHOLD-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] -; THRESHOLD-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] -; THRESHOLD-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] -; THRESHOLD-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] -; THRESHOLD-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] -; THRESHOLD-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] -; THRESHOLD-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] -; THRESHOLD-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] -; THRESHOLD-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] -; THRESHOLD-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] -; THRESHOLD-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] -; THRESHOLD-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] -; THRESHOLD-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] -; THRESHOLD-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] -; THRESHOLD-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] -; THRESHOLD-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] -; THRESHOLD-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] -; THRESHOLD-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] -; THRESHOLD-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] -; THRESHOLD-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] -; THRESHOLD-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]], <32 x float> undef, <32 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> @@ -947,7 +748,6 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) { ; THRESHOLD-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]] ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]] -; THRESHOLD-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]] ; THRESHOLD-NEXT: ret float [[OP_EXTRA]] ; entry: @@ -1058,17 +858,12 @@ define float @loadadd31(float* nocapture readonly %x) { ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4 -; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]] ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 -; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] -; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] -; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 ; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 @@ -1079,14 +874,6 @@ define float @loadadd31(float* nocapture readonly %x) { ; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>* ; CHECK-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 -; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] -; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] -; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] -; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] -; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] -; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] -; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] -; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] ; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 @@ -1105,21 +892,6 @@ define float @loadadd31(float* nocapture readonly %x) { ; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>* ; CHECK-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4 -; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] -; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] -; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] -; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] -; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] -; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] -; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] -; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] -; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] -; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] -; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] -; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] -; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] -; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] -; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> undef, <16 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> @@ -1145,7 +917,6 @@ define float @loadadd31(float* nocapture readonly %x) { ; CHECK-NEXT: [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX17]], [[TMP1]] ; CHECK-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]] -; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] ; CHECK-NEXT: ret float [[TMP12]] ; ; THRESHOLD-LABEL: @loadadd31( @@ -1154,17 +925,12 @@ define float @loadadd31(float* nocapture readonly %x) { ; THRESHOLD-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 ; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4 -; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]] ; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; THRESHOLD-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 ; THRESHOLD-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 ; THRESHOLD-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>* ; THRESHOLD-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 -; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] -; THRESHOLD-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] -; THRESHOLD-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] ; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 ; THRESHOLD-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 @@ -1175,14 +941,6 @@ define float @loadadd31(float* nocapture readonly %x) { ; THRESHOLD-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 ; THRESHOLD-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 -; THRESHOLD-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] -; THRESHOLD-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] -; THRESHOLD-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] -; THRESHOLD-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] -; THRESHOLD-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] -; THRESHOLD-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] -; THRESHOLD-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] -; THRESHOLD-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] ; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 @@ -1201,21 +959,6 @@ define float @loadadd31(float* nocapture readonly %x) { ; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 ; THRESHOLD-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>* ; THRESHOLD-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4 -; THRESHOLD-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] -; THRESHOLD-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] -; THRESHOLD-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] -; THRESHOLD-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] -; THRESHOLD-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] -; THRESHOLD-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] -; THRESHOLD-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] -; THRESHOLD-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] -; THRESHOLD-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] -; THRESHOLD-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] -; THRESHOLD-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] -; THRESHOLD-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] -; THRESHOLD-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] -; THRESHOLD-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] -; THRESHOLD-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> undef, <16 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> @@ -1241,7 +984,6 @@ define float @loadadd31(float* nocapture readonly %x) { ; THRESHOLD-NEXT: [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]] ; THRESHOLD-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX17]], [[TMP1]] ; THRESHOLD-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]] -; THRESHOLD-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] ; THRESHOLD-NEXT: ret float [[TMP12]] ; entry: @@ -1352,14 +1094,6 @@ define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) { ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] -; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] -; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]] -; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]] -; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]] -; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] -; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]] -; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1369,7 +1103,6 @@ define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) { ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] -; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; CHECK-NEXT: ret float [[OP_EXTRA5]] ; ; THRESHOLD-LABEL: @extra_args( @@ -1386,14 +1119,6 @@ define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) { ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] -; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] -; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]] -; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]] -; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]] -; THRESHOLD-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] -; THRESHOLD-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]] -; THRESHOLD-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1403,7 +1128,6 @@ define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) { ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] -; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; THRESHOLD-NEXT: ret float [[OP_EXTRA5]] ; entry: @@ -1452,16 +1176,6 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] -; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] -; CHECK-NEXT: [[ADD41:%.*]] = fadd fast float [[ADD4]], 5.000000e+00 -; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD41]], [[CONV]] -; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]] -; CHECK-NEXT: [[ADD4_11:%.*]] = fadd fast float [[ADD4_1]], 5.000000e+00 -; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_11]] -; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] -; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]] -; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1473,7 +1187,6 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00 ; CHECK-NEXT: [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]], 5.000000e+00 ; CHECK-NEXT: [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]], [[CONV]] -; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; CHECK-NEXT: ret float [[OP_EXTRA7]] ; ; THRESHOLD-LABEL: @extra_args_same_several_times( @@ -1490,16 +1203,6 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] -; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] -; THRESHOLD-NEXT: [[ADD41:%.*]] = fadd fast float [[ADD4]], 5.000000e+00 -; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD41]], [[CONV]] -; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]] -; THRESHOLD-NEXT: [[ADD4_11:%.*]] = fadd fast float [[ADD4_1]], 5.000000e+00 -; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_11]] -; THRESHOLD-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] -; THRESHOLD-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]] -; THRESHOLD-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1511,7 +1214,6 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00 ; THRESHOLD-NEXT: [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]], 5.000000e+00 ; THRESHOLD-NEXT: [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]], [[CONV]] -; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; THRESHOLD-NEXT: ret float [[OP_EXTRA7]] ; entry: @@ -1564,14 +1266,6 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] -; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] -; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]] -; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]] -; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] -; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]] -; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]] -; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1581,7 +1275,6 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] -; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; CHECK-NEXT: ret float [[OP_EXTRA5]] ; ; THRESHOLD-LABEL: @extra_args_no_replace( @@ -1600,14 +1293,6 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] -; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] -; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]] -; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]] -; THRESHOLD-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] -; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]] -; THRESHOLD-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]] -; THRESHOLD-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1617,7 +1302,6 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] -; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; THRESHOLD-NEXT: ret float [[OP_EXTRA5]] ; entry: @@ -1668,10 +1352,6 @@ define i32 @wobble(i32 %arg, i32 %bar) { ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> -; CHECK-NEXT: [[R1:%.*]] = add nuw i32 [[ARG]], undef -; CHECK-NEXT: [[R2:%.*]] = add nsw i32 [[R1]], undef -; CHECK-NEXT: [[R3:%.*]] = add nsw i32 [[R2]], undef -; CHECK-NEXT: [[R4:%.*]] = add nsw i32 [[R3]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> @@ -1679,7 +1359,6 @@ define i32 @wobble(i32 %arg, i32 %bar) { ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]] ; CHECK-NEXT: [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]] -; CHECK-NEXT: [[R5:%.*]] = add nsw i32 [[R4]], [[TMP9]] ; CHECK-NEXT: ret i32 [[OP_EXTRA3]] ; ; THRESHOLD-LABEL: @wobble( @@ -1696,10 +1375,6 @@ define i32 @wobble(i32 %arg, i32 %bar) { ; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 ; THRESHOLD-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer ; THRESHOLD-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> -; THRESHOLD-NEXT: [[R1:%.*]] = add nuw i32 [[ARG]], undef -; THRESHOLD-NEXT: [[R2:%.*]] = add nsw i32 [[R1]], undef -; THRESHOLD-NEXT: [[R3:%.*]] = add nsw i32 [[R2]], undef -; THRESHOLD-NEXT: [[R4:%.*]] = add nsw i32 [[R3]], undef ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> @@ -1707,7 +1382,6 @@ define i32 @wobble(i32 %arg, i32 %bar) { ; THRESHOLD-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]] ; THRESHOLD-NEXT: [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]] -; THRESHOLD-NEXT: [[R5:%.*]] = add nsw i32 [[R4]], [[TMP9]] ; THRESHOLD-NEXT: ret i32 [[OP_EXTRA3]] ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll index 8432b91..8b0ae66 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -12,19 +12,6 @@ define i32 @maxi8(i32) { ; CHECK-LABEL: @maxi8( ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef -; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef -; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef -; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef -; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef -; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef -; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]] @@ -34,9 +21,8 @@ define i32 @maxi8(i32) { ; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 -; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef -; CHECK-NEXT: ret i32 [[TMP16]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 +; CHECK-NEXT: ret i32 [[TMP3]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 @@ -66,35 +52,6 @@ define i32 @maxi8(i32) { define i32 @maxi16(i32) { ; CHECK-LABEL: @maxi16( ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef -; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef -; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef -; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef -; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef -; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef -; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef -; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef -; CHECK-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef -; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef -; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef -; CHECK-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef -; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef -; CHECK-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef -; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef -; CHECK-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef -; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef -; CHECK-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef -; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef -; CHECK-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef -; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef -; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> undef, <16 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <16 x i32> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]] @@ -107,9 +64,8 @@ define i32 @maxi16(i32) { ; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> undef, <16 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> [[RDX_SHUF7]] -; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 -; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef -; CHECK-NEXT: ret i32 [[TMP32]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 +; CHECK-NEXT: ret i32 [[TMP3]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 @@ -163,67 +119,6 @@ define i32 @maxi16(i32) { define i32 @maxi32(i32) { ; CHECK-LABEL: @maxi32( ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef -; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef -; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef -; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef -; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef -; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef -; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef -; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef -; CHECK-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef -; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef -; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef -; CHECK-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef -; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef -; CHECK-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef -; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef -; CHECK-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef -; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef -; CHECK-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef -; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef -; CHECK-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef -; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef -; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef -; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef -; CHECK-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], undef -; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 undef -; CHECK-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP34]], undef -; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP34]], i32 undef -; CHECK-NEXT: [[TMP37:%.*]] = icmp sgt i32 [[TMP36]], undef -; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP36]], i32 undef -; CHECK-NEXT: [[TMP39:%.*]] = icmp sgt i32 [[TMP38]], undef -; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[TMP38]], i32 undef -; CHECK-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP40]], undef -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP40]], i32 undef -; CHECK-NEXT: [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], undef -; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP42]], i32 undef -; CHECK-NEXT: [[TMP45:%.*]] = icmp sgt i32 [[TMP44]], undef -; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[TMP44]], i32 undef -; CHECK-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP46]], undef -; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP46]], i32 undef -; CHECK-NEXT: [[TMP49:%.*]] = icmp sgt i32 [[TMP48]], undef -; CHECK-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[TMP48]], i32 undef -; CHECK-NEXT: [[TMP51:%.*]] = icmp sgt i32 [[TMP50]], undef -; CHECK-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], i32 [[TMP50]], i32 undef -; CHECK-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP52]], undef -; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP52]], i32 undef -; CHECK-NEXT: [[TMP55:%.*]] = icmp sgt i32 [[TMP54]], undef -; CHECK-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], i32 [[TMP54]], i32 undef -; CHECK-NEXT: [[TMP57:%.*]] = icmp sgt i32 [[TMP56]], undef -; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 undef -; CHECK-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP58]], undef -; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP58]], i32 undef -; CHECK-NEXT: [[TMP61:%.*]] = icmp sgt i32 [[TMP60]], undef -; CHECK-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[TMP60]], i32 undef -; CHECK-NEXT: [[TMP63:%.*]] = icmp sgt i32 [[TMP62]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]] @@ -239,9 +134,8 @@ define i32 @maxi32(i32) { ; CHECK-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> [[RDX_SHUF10]] -; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 -; CHECK-NEXT: [[TMP65:%.*]] = select i1 [[TMP63]], i32 [[TMP62]], i32 undef -; CHECK-NEXT: ret i32 [[TMP64]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 +; CHECK-NEXT: ret i32 [[TMP3]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 @@ -343,19 +237,6 @@ define i32 @maxi32(i32) { define float @maxf8(float) { ; CHECK-LABEL: @maxf8( ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef -; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef -; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef -; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef -; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef -; CHECK-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef -; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef -; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]] @@ -365,9 +246,8 @@ define float @maxf8(float) { ; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> [[RDX_SHUF4]] -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 -; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef -; CHECK-NEXT: ret float [[TMP16]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 +; CHECK-NEXT: ret float [[TMP3]] ; %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 @@ -397,35 +277,6 @@ define float @maxf8(float) { define float @maxf16(float) { ; CHECK-LABEL: @maxf16( ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef -; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef -; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef -; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef -; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef -; CHECK-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef -; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef -; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef -; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef -; CHECK-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef -; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef -; CHECK-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef -; CHECK-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef -; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef -; CHECK-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef -; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef -; CHECK-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef -; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef -; CHECK-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef -; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef -; CHECK-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef -; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef -; CHECK-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]] @@ -438,9 +289,8 @@ define float @maxf16(float) { ; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> [[RDX_SHUF7]] -; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 -; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef -; CHECK-NEXT: ret float [[TMP32]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 +; CHECK-NEXT: ret float [[TMP3]] ; %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 @@ -494,67 +344,6 @@ define float @maxf16(float) { define float @maxf32(float) { ; CHECK-LABEL: @maxf32( ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x float>, <32 x float>* bitcast ([32 x float]* @arr1 to <32 x float>*), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef -; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef -; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef -; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef -; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef -; CHECK-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef -; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef -; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef -; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef -; CHECK-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef -; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef -; CHECK-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef -; CHECK-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef -; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef -; CHECK-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef -; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef -; CHECK-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef -; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef -; CHECK-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef -; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef -; CHECK-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef -; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef -; CHECK-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef -; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef -; CHECK-NEXT: [[TMP33:%.*]] = fcmp fast ogt float [[TMP32]], undef -; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], float [[TMP32]], float undef -; CHECK-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP34]], undef -; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP34]], float undef -; CHECK-NEXT: [[TMP37:%.*]] = fcmp fast ogt float [[TMP36]], undef -; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP36]], float undef -; CHECK-NEXT: [[TMP39:%.*]] = fcmp fast ogt float [[TMP38]], undef -; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], float [[TMP38]], float undef -; CHECK-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP40]], undef -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP40]], float undef -; CHECK-NEXT: [[TMP43:%.*]] = fcmp fast ogt float [[TMP42]], undef -; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP42]], float undef -; CHECK-NEXT: [[TMP45:%.*]] = fcmp fast ogt float [[TMP44]], undef -; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], float [[TMP44]], float undef -; CHECK-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP46]], undef -; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP46]], float undef -; CHECK-NEXT: [[TMP49:%.*]] = fcmp fast ogt float [[TMP48]], undef -; CHECK-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], float [[TMP48]], float undef -; CHECK-NEXT: [[TMP51:%.*]] = fcmp fast ogt float [[TMP50]], undef -; CHECK-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], float [[TMP50]], float undef -; CHECK-NEXT: [[TMP53:%.*]] = fcmp fast ogt float [[TMP52]], undef -; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP52]], float undef -; CHECK-NEXT: [[TMP55:%.*]] = fcmp fast ogt float [[TMP54]], undef -; CHECK-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], float [[TMP54]], float undef -; CHECK-NEXT: [[TMP57:%.*]] = fcmp fast ogt float [[TMP56]], undef -; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], float [[TMP56]], float undef -; CHECK-NEXT: [[TMP59:%.*]] = fcmp fast ogt float [[TMP58]], undef -; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP58]], float undef -; CHECK-NEXT: [[TMP61:%.*]] = fcmp fast ogt float [[TMP60]], undef -; CHECK-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], float [[TMP60]], float undef -; CHECK-NEXT: [[TMP63:%.*]] = fcmp fast ogt float [[TMP62]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP2]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <32 x float> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x float> [[TMP2]], <32 x float> [[RDX_SHUF]] @@ -570,9 +359,8 @@ define float @maxf32(float) { ; CHECK-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP11:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> [[RDX_SHUF10]] -; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0 -; CHECK-NEXT: [[TMP65:%.*]] = select i1 [[TMP63]], float [[TMP62]], float undef -; CHECK-NEXT: ret float [[TMP64]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0 +; CHECK-NEXT: ret float [[TMP3]] ; %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 @@ -678,34 +466,24 @@ define i32 @maxi8_mutiple_uses(i32) { ; SSE-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] ; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] ; SSE-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 -; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef -; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef -; SSE-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef -; SSE-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef -; SSE-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef -; SSE-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef -; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef -; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef -; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] +; SSE-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 ; SSE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> ; SSE-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] ; SSE-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] ; SSE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; SSE-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; SSE-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; SSE-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; SSE-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]] -; SSE-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32 [[TMP15]] -; SSE-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]] -; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]], i32 [[TMP5]] -; SSE-NEXT: [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] -; SSE-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; SSE-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]] -; SSE-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]], i32 [[TMP22]] -; SSE-NEXT: [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4 -; SSE-NEXT: store i32 [[TMP25]], i32* @var, align 8 -; SSE-NEXT: ret i32 [[TMP24]] +; SSE-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; SSE-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]] +; SSE-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP7]] +; SSE-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]] +; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 [[TMP5]] +; SSE-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]] +; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]], i32 [[TMP12]] +; SSE-NEXT: [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4 +; SSE-NEXT: store i32 [[TMP15]], i32* @var, align 8 +; SSE-NEXT: ret i32 [[TMP14]] ; ; AVX-LABEL: @maxi8_mutiple_uses( ; AVX-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 @@ -713,34 +491,24 @@ define i32 @maxi8_mutiple_uses(i32) { ; AVX-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] ; AVX-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] ; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 -; AVX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef -; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef -; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef -; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef -; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef -; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef -; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef -; AVX-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; AVX-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] +; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] ; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; AVX-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; AVX-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]] -; AVX-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32 [[TMP15]] -; AVX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]] -; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]], i32 [[TMP5]] -; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] -; AVX-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]] -; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]], i32 [[TMP22]] -; AVX-NEXT: [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4 -; AVX-NEXT: store i32 [[TMP25]], i32* @var, align 8 -; AVX-NEXT: ret i32 [[TMP24]] +; AVX-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]] +; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP7]] +; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]] +; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 [[TMP5]] +; AVX-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]] +; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]], i32 [[TMP12]] +; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4 +; AVX-NEXT: store i32 [[TMP15]], i32* @var, align 8 +; AVX-NEXT: ret i32 [[TMP14]] ; ; AVX2-LABEL: @maxi8_mutiple_uses( ; AVX2-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 @@ -748,34 +516,24 @@ define i32 @maxi8_mutiple_uses(i32) { ; AVX2-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] ; AVX2-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] ; AVX2-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 -; AVX2-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef -; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef -; AVX2-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef -; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef -; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef -; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef -; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef -; AVX2-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; AVX2-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] +; AVX2-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; AVX2-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; AVX2-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]] -; AVX2-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32 [[TMP15]] -; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]] -; AVX2-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]], i32 [[TMP5]] -; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] -; AVX2-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]] -; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]], i32 [[TMP22]] -; AVX2-NEXT: [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4 -; AVX2-NEXT: store i32 [[TMP25]], i32* @var, align 8 -; AVX2-NEXT: ret i32 [[TMP24]] +; AVX2-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; AVX2-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]] +; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP7]] +; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]] +; AVX2-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 [[TMP5]] +; AVX2-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]] +; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]], i32 [[TMP12]] +; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4 +; AVX2-NEXT: store i32 [[TMP15]], i32* @var, align 8 +; AVX2-NEXT: ret i32 [[TMP14]] ; ; SKX-LABEL: @maxi8_mutiple_uses( ; SKX-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16 @@ -797,26 +555,16 @@ define i32 @maxi8_mutiple_uses(i32) { ; SKX-NEXT: [[TMP12:%.*]] = icmp sgt <2 x i32> [[TMP9]], [[TMP11]] ; SKX-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP9]], <2 x i32> [[TMP11]] ; SKX-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP13]], i32 1 -; SKX-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef -; SKX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef -; SKX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef -; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef -; SKX-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef -; SKX-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef -; SKX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef -; SKX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef -; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP6]] -; SKX-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0 -; SKX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP14]] -; SKX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP14]] -; SKX-NEXT: [[TMP26:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP6]] -; SKX-NEXT: [[TMP27:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; SKX-NEXT: [[TMP28:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP27]] -; SKX-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[OP_EXTRA]], i32 [[TMP27]] -; SKX-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1 -; SKX-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 3, i32 4 -; SKX-NEXT: store i32 [[TMP31]], i32* @var, align 8 -; SKX-NEXT: ret i32 [[TMP29]] +; SKX-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0 +; SKX-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP15]], [[TMP14]] +; SKX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP16]], i32 [[TMP15]], i32 [[TMP14]] +; SKX-NEXT: [[TMP17:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; SKX-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP17]] +; SKX-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[OP_EXTRA]], i32 [[TMP17]] +; SKX-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1 +; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 3, i32 4 +; SKX-NEXT: store i32 [[TMP21]], i32* @var, align 8 +; SKX-NEXT: ret i32 [[TMP19]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 @@ -854,33 +602,21 @@ define i32 @maxi8_wrong_parent(i32) { ; SSE: pp: ; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] ; SSE-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 -; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef -; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef -; SSE-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef -; SSE-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef -; SSE-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef -; SSE-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef -; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef -; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef -; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] -; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] -; SSE-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; SSE-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] +; SSE-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; SSE-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 ; SSE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> ; SSE-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] ; SSE-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] ; SSE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; SSE-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; SSE-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; SSE-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; SSE-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]] -; SSE-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 [[TMP15]] -; SSE-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]] -; SSE-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP18]] -; SSE-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]] -; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP5]] -; SSE-NEXT: [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] +; SSE-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; SSE-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]] +; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]] +; SSE-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]] +; SSE-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]] +; SSE-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]] +; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP5]] ; SSE-NEXT: ret i32 [[OP_EXTRA]] ; ; AVX-LABEL: @maxi8_wrong_parent( @@ -891,33 +627,21 @@ define i32 @maxi8_wrong_parent(i32) { ; AVX: pp: ; AVX-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] ; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 -; AVX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef -; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef -; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef -; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef -; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef -; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef -; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef -; AVX-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; AVX-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] -; AVX-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] -; AVX-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; AVX-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] +; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; AVX-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] ; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; AVX-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; AVX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]] -; AVX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 [[TMP15]] -; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]] -; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP18]] -; AVX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]] -; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP5]] -; AVX-NEXT: [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] +; AVX-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; AVX-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]] +; AVX-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]] +; AVX-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]] +; AVX-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]] +; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]] +; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP5]] ; AVX-NEXT: ret i32 [[OP_EXTRA]] ; ; AVX2-LABEL: @maxi8_wrong_parent( @@ -928,33 +652,21 @@ define i32 @maxi8_wrong_parent(i32) { ; AVX2: pp: ; AVX2-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] ; AVX2-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 -; AVX2-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef -; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef -; AVX2-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef -; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef -; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef -; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef -; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef -; AVX2-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; AVX2-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] -; AVX2-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] -; AVX2-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; AVX2-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] +; AVX2-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; AVX2-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; AVX2-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; AVX2-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]] -; AVX2-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 [[TMP15]] -; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]] -; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP18]] -; AVX2-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]] -; AVX2-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP5]] -; AVX2-NEXT: [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] +; AVX2-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; AVX2-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]] +; AVX2-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]] +; AVX2-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]] +; AVX2-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]] +; AVX2-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]] +; AVX2-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP5]] ; AVX2-NEXT: ret i32 [[OP_EXTRA]] ; ; SKX-LABEL: @maxi8_wrong_parent( @@ -985,21 +697,9 @@ define i32 @maxi8_wrong_parent(i32) { ; SKX-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> [[TMP17]], i32 [[TMP4]], i32 1 ; SKX-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP14]], <2 x i32> [[TMP16]], <2 x i32> [[TMP18]] ; SKX-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1 -; SKX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef -; SKX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef -; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef -; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef -; SKX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef -; SKX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef -; SKX-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef -; SKX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef -; SKX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], [[TMP7]] -; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 [[TMP7]] -; SKX-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], [[TMP8]] -; SKX-NEXT: [[TMP32:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0 -; SKX-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], [[TMP20]] -; SKX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 [[TMP20]] -; SKX-NEXT: [[TMP34:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 [[TMP8]] +; SKX-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0 +; SKX-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP20]] +; SKX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 [[TMP20]] ; SKX-NEXT: ret i32 [[OP_EXTRA]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll index 311d8a4..ed0e578 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll @@ -37,14 +37,11 @@ define i32 @add_red(float* %A, i32 %n) { ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], -; CHECK-NEXT: [[ADD6:%.*]] = fadd fast float undef, undef -; CHECK-NEXT: [[ADD11:%.*]] = fadd fast float [[ADD6]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[ADD16:%.*]] = fadd fast float [[ADD11]], undef ; CHECK-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]] ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_033]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]] @@ -77,14 +74,11 @@ define i32 @add_red(float* %A, i32 %n) { ; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* ; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; STORE-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], -; STORE-NEXT: [[ADD6:%.*]] = fadd fast float undef, undef -; STORE-NEXT: [[ADD11:%.*]] = fadd fast float [[ADD6]], undef ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; STORE-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; STORE-NEXT: [[ADD16:%.*]] = fadd fast float [[ADD11]], undef ; STORE-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]] ; STORE-NEXT: [[INC]] = add nsw i64 [[I_033]], 1 ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]] @@ -178,14 +172,11 @@ define i32 @mul_red(float* noalias %A, float* noalias %B, i32 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[ADD8:%.*]] = fadd fast float undef, undef -; CHECK-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], undef ; CHECK-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]] ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_040]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] @@ -223,14 +214,11 @@ define i32 @mul_red(float* noalias %A, float* noalias %B, i32 %n) { ; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* ; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; STORE-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]] -; STORE-NEXT: [[ADD8:%.*]] = fadd fast float undef, undef -; STORE-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], undef ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; STORE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; STORE-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], undef ; STORE-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]] ; STORE-NEXT: [[INC]] = add nsw i64 [[I_040]], 1 ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] @@ -350,13 +338,6 @@ define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>* ; CHECK-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]] -; CHECK-NEXT: [[ADD8:%.*]] = fadd fast float undef, undef -; CHECK-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], undef -; CHECK-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], undef -; CHECK-NEXT: [[ADD26:%.*]] = fadd fast float [[ADD20]], undef -; CHECK-NEXT: [[ADD32:%.*]] = fadd fast float [[ADD26]], undef -; CHECK-NEXT: [[ADD38:%.*]] = fadd fast float [[ADD32]], undef -; CHECK-NEXT: [[ADD44:%.*]] = fadd fast float [[ADD38]], undef ; CHECK-NEXT: [[ADD47:%.*]] = add nsw i64 [[MUL]], 8 ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]] ; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4 @@ -369,7 +350,6 @@ define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) { ; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]] -; CHECK-NEXT: [[ADD50:%.*]] = fadd fast float [[ADD44]], [[MUL49]] ; CHECK-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]] ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_083]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]] @@ -421,13 +401,6 @@ define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) { ; STORE-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>* ; STORE-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 ; STORE-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]] -; STORE-NEXT: [[ADD8:%.*]] = fadd fast float undef, undef -; STORE-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], undef -; STORE-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], undef -; STORE-NEXT: [[ADD26:%.*]] = fadd fast float [[ADD20]], undef -; STORE-NEXT: [[ADD32:%.*]] = fadd fast float [[ADD26]], undef -; STORE-NEXT: [[ADD38:%.*]] = fadd fast float [[ADD32]], undef -; STORE-NEXT: [[ADD44:%.*]] = fadd fast float [[ADD38]], undef ; STORE-NEXT: [[ADD47:%.*]] = add nsw i64 [[MUL]], 8 ; STORE-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]] ; STORE-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4 @@ -440,7 +413,6 @@ define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) { ; STORE-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] ; STORE-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; STORE-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]] -; STORE-NEXT: [[ADD50:%.*]] = fadd fast float [[ADD44]], [[MUL49]] ; STORE-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]] ; STORE-NEXT: [[INC]] = add nsw i64 [[I_083]], 1 ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]] @@ -576,16 +548,12 @@ define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[SUM_042]], undef -; CHECK-NEXT: [[ADD9:%.*]] = fadd fast float [[ADD]], undef -; CHECK-NEXT: [[ADD15:%.*]] = fadd fast float [[ADD9]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 ; CHECK-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]] -; CHECK-NEXT: [[ADD21:%.*]] = fadd fast float [[ADD15]], undef ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_043]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] @@ -622,16 +590,12 @@ define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) { ; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* ; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; STORE-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]] -; STORE-NEXT: [[ADD:%.*]] = fadd fast float [[SUM_042]], undef -; STORE-NEXT: [[ADD9:%.*]] = fadd fast float [[ADD]], undef -; STORE-NEXT: [[ADD15:%.*]] = fadd fast float [[ADD9]], undef ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; STORE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 ; STORE-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]] -; STORE-NEXT: [[ADD21:%.*]] = fadd fast float [[ADD15]], undef ; STORE-NEXT: [[INC]] = add nsw i64 [[I_043]], 1 ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] ; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] @@ -1087,14 +1051,11 @@ define i32 @store_red(float* noalias %A, float* noalias %B, float* noalias %C, i ; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* ; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; STORE-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP4]] -; STORE-NEXT: [[ADD8:%.*]] = fadd fast float undef, undef -; STORE-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], undef ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; STORE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; STORE-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], undef ; STORE-NEXT: store float [[TMP6]], float* [[C_ADDR_038]], align 4 ; STORE-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[C_ADDR_038]], i64 1 ; STORE-NEXT: [[INC]] = add nsw i64 [[I_039]], 1 @@ -1169,14 +1130,11 @@ define void @float_red_example4(float* %res) { ; STORE-LABEL: @float_red_example4( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([32 x float]* @arr_float to <4 x float>*), align 16 -; STORE-NEXT: [[ADD:%.*]] = fadd fast float undef, undef -; STORE-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP0]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; STORE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; STORE-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] ; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; @@ -1216,12 +1174,6 @@ define void @float_red_example8(float* %res) { ; STORE-LABEL: @float_red_example8( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr_float to <8 x float>*), align 16 -; STORE-NEXT: [[ADD:%.*]] = fadd fast float undef, undef -; STORE-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] -; STORE-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; STORE-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] -; STORE-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] -; STORE-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> undef, <8 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP0]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1229,7 +1181,6 @@ define void @float_red_example8(float* %res) { ; STORE-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> ; STORE-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] ; STORE-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 -; STORE-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] ; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; @@ -1293,20 +1244,6 @@ define void @float_red_example16(float* %res) { ; STORE-LABEL: @float_red_example16( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr_float to <16 x float>*), align 16 -; STORE-NEXT: [[ADD:%.*]] = fadd fast float undef, undef -; STORE-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] -; STORE-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; STORE-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] -; STORE-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] -; STORE-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] -; STORE-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] -; STORE-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] -; STORE-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] -; STORE-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] -; STORE-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] -; STORE-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] -; STORE-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] -; STORE-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> undef, <16 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP0]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> @@ -1316,7 +1253,6 @@ define void @float_red_example16(float* %res) { ; STORE-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x float> [[BIN_RDX4]], <16 x float> undef, <16 x i32> ; STORE-NEXT: [[BIN_RDX6:%.*]] = fadd fast <16 x float> [[BIN_RDX4]], [[RDX_SHUF5]] ; STORE-NEXT: [[TMP1:%.*]] = extractelement <16 x float> [[BIN_RDX6]], i32 0 -; STORE-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] ; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; @@ -1372,14 +1308,11 @@ define void @i32_red_example4(i32* %res) { ; STORE-LABEL: @i32_red_example4( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr_i32 to <4 x i32>*), align 16 -; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef -; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]] ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <4 x i32> [[TMP0]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX2:%.*]] = add nsw <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]] ; STORE-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 -; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]] ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; @@ -1419,12 +1352,6 @@ define void @i32_red_example8(i32* %res) { ; STORE-LABEL: @i32_red_example8( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 -; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef -; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]] -; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]] -; STORE-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]] -; STORE-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]] -; STORE-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]] ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -1432,7 +1359,6 @@ define void @i32_red_example8(i32* %res) { ; STORE-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; STORE-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; STORE-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; STORE-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]] ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; @@ -1496,20 +1422,6 @@ define void @i32_red_example16(i32* %res) { ; STORE-LABEL: @i32_red_example16( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr_i32 to <16 x i32>*), align 16 -; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef -; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]] -; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]] -; STORE-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]] -; STORE-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]] -; STORE-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]] -; STORE-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]] -; STORE-NEXT: [[ADD_7:%.*]] = add nsw i32 undef, [[ADD_6]] -; STORE-NEXT: [[ADD_8:%.*]] = add nsw i32 undef, [[ADD_7]] -; STORE-NEXT: [[ADD_9:%.*]] = add nsw i32 undef, [[ADD_8]] -; STORE-NEXT: [[ADD_10:%.*]] = add nsw i32 undef, [[ADD_9]] -; STORE-NEXT: [[ADD_11:%.*]] = add nsw i32 undef, [[ADD_10]] -; STORE-NEXT: [[ADD_12:%.*]] = add nsw i32 undef, [[ADD_11]] -; STORE-NEXT: [[ADD_13:%.*]] = add nsw i32 undef, [[ADD_12]] ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> undef, <16 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <16 x i32> [[TMP0]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[BIN_RDX]], <16 x i32> undef, <16 x i32> @@ -1519,7 +1431,6 @@ define void @i32_red_example16(i32* %res) { ; STORE-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x i32> [[BIN_RDX4]], <16 x i32> undef, <16 x i32> ; STORE-NEXT: [[BIN_RDX6:%.*]] = add nsw <16 x i32> [[BIN_RDX4]], [[RDX_SHUF5]] ; STORE-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0 -; STORE-NEXT: [[ADD_14:%.*]] = add nsw i32 undef, [[ADD_13]] ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; @@ -1631,36 +1542,6 @@ define void @i32_red_example32(i32* %res) { ; STORE-LABEL: @i32_red_example32( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr_i32 to <32 x i32>*), align 16 -; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef -; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]] -; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]] -; STORE-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]] -; STORE-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]] -; STORE-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]] -; STORE-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]] -; STORE-NEXT: [[ADD_7:%.*]] = add nsw i32 undef, [[ADD_6]] -; STORE-NEXT: [[ADD_8:%.*]] = add nsw i32 undef, [[ADD_7]] -; STORE-NEXT: [[ADD_9:%.*]] = add nsw i32 undef, [[ADD_8]] -; STORE-NEXT: [[ADD_10:%.*]] = add nsw i32 undef, [[ADD_9]] -; STORE-NEXT: [[ADD_11:%.*]] = add nsw i32 undef, [[ADD_10]] -; STORE-NEXT: [[ADD_12:%.*]] = add nsw i32 undef, [[ADD_11]] -; STORE-NEXT: [[ADD_13:%.*]] = add nsw i32 undef, [[ADD_12]] -; STORE-NEXT: [[ADD_14:%.*]] = add nsw i32 undef, [[ADD_13]] -; STORE-NEXT: [[ADD_15:%.*]] = add nsw i32 undef, [[ADD_14]] -; STORE-NEXT: [[ADD_16:%.*]] = add nsw i32 undef, [[ADD_15]] -; STORE-NEXT: [[ADD_17:%.*]] = add nsw i32 undef, [[ADD_16]] -; STORE-NEXT: [[ADD_18:%.*]] = add nsw i32 undef, [[ADD_17]] -; STORE-NEXT: [[ADD_19:%.*]] = add nsw i32 undef, [[ADD_18]] -; STORE-NEXT: [[ADD_20:%.*]] = add nsw i32 undef, [[ADD_19]] -; STORE-NEXT: [[ADD_21:%.*]] = add nsw i32 undef, [[ADD_20]] -; STORE-NEXT: [[ADD_22:%.*]] = add nsw i32 undef, [[ADD_21]] -; STORE-NEXT: [[ADD_23:%.*]] = add nsw i32 undef, [[ADD_22]] -; STORE-NEXT: [[ADD_24:%.*]] = add nsw i32 undef, [[ADD_23]] -; STORE-NEXT: [[ADD_25:%.*]] = add nsw i32 undef, [[ADD_24]] -; STORE-NEXT: [[ADD_26:%.*]] = add nsw i32 undef, [[ADD_25]] -; STORE-NEXT: [[ADD_27:%.*]] = add nsw i32 undef, [[ADD_26]] -; STORE-NEXT: [[ADD_28:%.*]] = add nsw i32 undef, [[ADD_27]] -; STORE-NEXT: [[ADD_29:%.*]] = add nsw i32 undef, [[ADD_28]] ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> undef, <32 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <32 x i32> [[TMP0]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x i32> [[BIN_RDX]], <32 x i32> undef, <32 x i32> @@ -1672,7 +1553,6 @@ define void @i32_red_example32(i32* %res) { ; STORE-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x i32> [[BIN_RDX6]], <32 x i32> undef, <32 x i32> ; STORE-NEXT: [[BIN_RDX8:%.*]] = add nsw <32 x i32> [[BIN_RDX6]], [[RDX_SHUF7]] ; STORE-NEXT: [[TMP1:%.*]] = extractelement <32 x i32> [[BIN_RDX8]], i32 0 -; STORE-NEXT: [[ADD_30:%.*]] = add nsw i32 undef, [[ADD_29]] ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; @@ -1750,12 +1630,6 @@ define void @i32_red_call(i32 %val) { ; CHECK-LABEL: @i32_red_call( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef -; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]] -; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]] -; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]] -; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -1763,19 +1637,12 @@ define void @i32_red_call(i32 %val) { ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]] ; CHECK-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]]) ; CHECK-NEXT: ret void ; ; STORE-LABEL: @i32_red_call( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 -; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef -; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]] -; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]] -; STORE-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]] -; STORE-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]] -; STORE-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]] ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -1783,7 +1650,6 @@ define void @i32_red_call(i32 %val) { ; STORE-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; STORE-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; STORE-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; STORE-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]] ; STORE-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]]) ; STORE-NEXT: ret void ; @@ -1811,12 +1677,6 @@ define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_ ; CHECK-LABEL: @i32_red_invoke( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef -; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]] -; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]] -; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]] -; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -1824,7 +1684,6 @@ define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_ ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]] ; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]]) ; CHECK-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] ; CHECK: exception: @@ -1837,12 +1696,6 @@ define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_ ; STORE-LABEL: @i32_red_invoke( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 -; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef -; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]] -; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]] -; STORE-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]] -; STORE-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]] -; STORE-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]] ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -1850,7 +1703,6 @@ define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_ ; STORE-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; STORE-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; STORE-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; STORE-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]] ; STORE-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]]) ; STORE-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] ; STORE: exception: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll b/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll index ffbdd9f..52c1c6b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll @@ -12,10 +12,10 @@ define i32 @test(double* nocapture %A, i8* nocapture %B) { ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[B:%.*]] to <2 x i8>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i8> undef, i8 [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i8> [[TMP4]], i8 [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i8> undef, i8 [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i8> [[TMP5]], i8 [[TMP3]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = sitofp <2 x i8> [[TMP6]] to <2 x double> ; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll b/llvm/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll index 13bbe5e..0b8c72b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll @@ -5,36 +5,6 @@ define signext i8 @Foo(<32 x i8>* %__v) { ; CHECK-LABEL: @Foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <32 x i8>, <32 x i8>* [[__V:%.*]], align 32 -; CHECK-NEXT: [[ADD_I_1_I:%.*]] = add i8 undef, undef -; CHECK-NEXT: [[ADD_I_2_I:%.*]] = add i8 [[ADD_I_1_I]], undef -; CHECK-NEXT: [[ADD_I_3_I:%.*]] = add i8 [[ADD_I_2_I]], undef -; CHECK-NEXT: [[ADD_I_4_I:%.*]] = add i8 [[ADD_I_3_I]], undef -; CHECK-NEXT: [[ADD_I_5_I:%.*]] = add i8 [[ADD_I_4_I]], undef -; CHECK-NEXT: [[ADD_I_6_I:%.*]] = add i8 [[ADD_I_5_I]], undef -; CHECK-NEXT: [[ADD_I_7_I:%.*]] = add i8 [[ADD_I_6_I]], undef -; CHECK-NEXT: [[ADD_I_8_I:%.*]] = add i8 [[ADD_I_7_I]], undef -; CHECK-NEXT: [[ADD_I_9_I:%.*]] = add i8 [[ADD_I_8_I]], undef -; CHECK-NEXT: [[ADD_I_10_I:%.*]] = add i8 [[ADD_I_9_I]], undef -; CHECK-NEXT: [[ADD_I_11_I:%.*]] = add i8 [[ADD_I_10_I]], undef -; CHECK-NEXT: [[ADD_I_12_I:%.*]] = add i8 [[ADD_I_11_I]], undef -; CHECK-NEXT: [[ADD_I_13_I:%.*]] = add i8 [[ADD_I_12_I]], undef -; CHECK-NEXT: [[ADD_I_14_I:%.*]] = add i8 [[ADD_I_13_I]], undef -; CHECK-NEXT: [[ADD_I_15_I:%.*]] = add i8 [[ADD_I_14_I]], undef -; CHECK-NEXT: [[ADD_I_16_I:%.*]] = add i8 [[ADD_I_15_I]], undef -; CHECK-NEXT: [[ADD_I_17_I:%.*]] = add i8 [[ADD_I_16_I]], undef -; CHECK-NEXT: [[ADD_I_18_I:%.*]] = add i8 [[ADD_I_17_I]], undef -; CHECK-NEXT: [[ADD_I_19_I:%.*]] = add i8 [[ADD_I_18_I]], undef -; CHECK-NEXT: [[ADD_I_20_I:%.*]] = add i8 [[ADD_I_19_I]], undef -; CHECK-NEXT: [[ADD_I_21_I:%.*]] = add i8 [[ADD_I_20_I]], undef -; CHECK-NEXT: [[ADD_I_22_I:%.*]] = add i8 [[ADD_I_21_I]], undef -; CHECK-NEXT: [[ADD_I_23_I:%.*]] = add i8 [[ADD_I_22_I]], undef -; CHECK-NEXT: [[ADD_I_24_I:%.*]] = add i8 [[ADD_I_23_I]], undef -; CHECK-NEXT: [[ADD_I_25_I:%.*]] = add i8 [[ADD_I_24_I]], undef -; CHECK-NEXT: [[ADD_I_26_I:%.*]] = add i8 [[ADD_I_25_I]], undef -; CHECK-NEXT: [[ADD_I_27_I:%.*]] = add i8 [[ADD_I_26_I]], undef -; CHECK-NEXT: [[ADD_I_28_I:%.*]] = add i8 [[ADD_I_27_I]], undef -; CHECK-NEXT: [[ADD_I_29_I:%.*]] = add i8 [[ADD_I_28_I]], undef -; CHECK-NEXT: [[ADD_I_30_I:%.*]] = add i8 [[ADD_I_29_I]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i8> [[TMP0]], <32 x i8> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <32 x i8> [[TMP0]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x i8> [[BIN_RDX]], <32 x i8> undef, <32 x i32> @@ -46,7 +16,6 @@ define signext i8 @Foo(<32 x i8>* %__v) { ; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x i8> [[BIN_RDX6]], <32 x i8> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX8:%.*]] = add <32 x i8> [[BIN_RDX6]], [[RDX_SHUF7]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <32 x i8> [[BIN_RDX8]], i32 0 -; CHECK-NEXT: [[ADD_I_31_I:%.*]] = add i8 [[ADD_I_30_I]], undef ; CHECK-NEXT: ret i8 [[TMP1]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll index 56539ab..15ede81 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll @@ -35,13 +35,6 @@ define i32 @test(i32* nocapture readonly %p) { ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], -; CHECK-NEXT: [[ADD:%.*]] = add i32 undef, [[SUM]] -; CHECK-NEXT: [[ADD_1:%.*]] = add i32 undef, [[ADD]] -; CHECK-NEXT: [[ADD_2:%.*]] = add i32 undef, [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = add i32 undef, [[ADD_2]] -; CHECK-NEXT: [[ADD_4:%.*]] = add i32 undef, [[ADD_3]] -; CHECK-NEXT: [[ADD_5:%.*]] = add i32 undef, [[ADD_4]] -; CHECK-NEXT: [[ADD_6:%.*]] = add i32 undef, [[ADD_5]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -50,7 +43,6 @@ define i32 @test(i32* nocapture readonly %p) { ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[SUM]] -; CHECK-NEXT: [[ADD_7:%.*]] = add i32 undef, [[ADD_6]] ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 [[OP_EXTRA]] @@ -138,13 +130,6 @@ define i32 @test2(i32* nocapture readonly %p, i32* nocapture readonly %q) { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[Q]] to <8 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[ADD:%.*]] = add i32 undef, [[SUM]] -; CHECK-NEXT: [[ADD_1:%.*]] = add i32 undef, [[ADD]] -; CHECK-NEXT: [[ADD_2:%.*]] = add i32 undef, [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = add i32 undef, [[ADD_2]] -; CHECK-NEXT: [[ADD_4:%.*]] = add i32 undef, [[ADD_3]] -; CHECK-NEXT: [[ADD_5:%.*]] = add i32 undef, [[ADD_4]] -; CHECK-NEXT: [[ADD_6:%.*]] = add i32 undef, [[ADD_5]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP4]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -153,7 +138,6 @@ define i32 @test2(i32* nocapture readonly %p, i32* nocapture readonly %q) { ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]] -; CHECK-NEXT: [[ADD_7:%.*]] = add i32 undef, [[ADD_6]] ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 [[OP_EXTRA]] @@ -258,13 +242,6 @@ define i32 @test3(i32* nocapture readonly %p, i32* nocapture readonly %q) { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[Q]] to <8 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> [[REORDER_SHUFFLE]], [[TMP3]] -; CHECK-NEXT: [[ADD:%.*]] = add i32 undef, [[SUM]] -; CHECK-NEXT: [[ADD_1:%.*]] = add i32 undef, [[ADD]] -; CHECK-NEXT: [[ADD_2:%.*]] = add i32 undef, [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = add i32 undef, [[ADD_2]] -; CHECK-NEXT: [[ADD_4:%.*]] = add i32 undef, [[ADD_3]] -; CHECK-NEXT: [[ADD_5:%.*]] = add i32 undef, [[ADD_4]] -; CHECK-NEXT: [[ADD_6:%.*]] = add i32 undef, [[ADD_5]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP4]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -273,7 +250,6 @@ define i32 @test3(i32* nocapture readonly %p, i32* nocapture readonly %q) { ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]] -; CHECK-NEXT: [[ADD_7:%.*]] = add i32 undef, [[ADD_6]] ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 [[OP_EXTRA]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll index 4dd4087..7fdc953 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll @@ -26,12 +26,6 @@ define i32 @test_add(i32* nocapture readonly %p) { ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[MUL_18:%.*]] = add i32 undef, undef -; CHECK-NEXT: [[MUL_29:%.*]] = add i32 undef, [[MUL_18]] -; CHECK-NEXT: [[MUL_310:%.*]] = add i32 undef, [[MUL_29]] -; CHECK-NEXT: [[MUL_411:%.*]] = add i32 undef, [[MUL_310]] -; CHECK-NEXT: [[MUL_512:%.*]] = add i32 undef, [[MUL_411]] -; CHECK-NEXT: [[MUL_613:%.*]] = add i32 undef, [[MUL_512]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -39,7 +33,6 @@ define i32 @test_add(i32* nocapture readonly %p) { ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; CHECK-NEXT: [[MUL_714:%.*]] = add i32 undef, [[MUL_613]] ; CHECK-NEXT: ret i32 [[TMP2]] ; entry: @@ -147,12 +140,6 @@ define i32 @test_and(i32* nocapture readonly %p) { ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[MUL_18:%.*]] = and i32 undef, undef -; CHECK-NEXT: [[MUL_29:%.*]] = and i32 undef, [[MUL_18]] -; CHECK-NEXT: [[MUL_310:%.*]] = and i32 undef, [[MUL_29]] -; CHECK-NEXT: [[MUL_411:%.*]] = and i32 undef, [[MUL_310]] -; CHECK-NEXT: [[MUL_512:%.*]] = and i32 undef, [[MUL_411]] -; CHECK-NEXT: [[MUL_613:%.*]] = and i32 undef, [[MUL_512]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = and <8 x i32> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -160,7 +147,6 @@ define i32 @test_and(i32* nocapture readonly %p) { ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; CHECK-NEXT: [[MUL_714:%.*]] = and i32 undef, [[MUL_613]] ; CHECK-NEXT: ret i32 [[TMP2]] ; entry: @@ -208,12 +194,6 @@ define i32 @test_or(i32* nocapture readonly %p) { ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[MUL_18:%.*]] = or i32 undef, undef -; CHECK-NEXT: [[MUL_29:%.*]] = or i32 undef, [[MUL_18]] -; CHECK-NEXT: [[MUL_310:%.*]] = or i32 undef, [[MUL_29]] -; CHECK-NEXT: [[MUL_411:%.*]] = or i32 undef, [[MUL_310]] -; CHECK-NEXT: [[MUL_512:%.*]] = or i32 undef, [[MUL_411]] -; CHECK-NEXT: [[MUL_613:%.*]] = or i32 undef, [[MUL_512]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <8 x i32> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -221,7 +201,6 @@ define i32 @test_or(i32* nocapture readonly %p) { ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = or <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; CHECK-NEXT: [[MUL_714:%.*]] = or i32 undef, [[MUL_613]] ; CHECK-NEXT: ret i32 [[TMP2]] ; entry: @@ -269,12 +248,6 @@ define i32 @test_xor(i32* nocapture readonly %p) { ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[MUL_18:%.*]] = xor i32 undef, undef -; CHECK-NEXT: [[MUL_29:%.*]] = xor i32 undef, [[MUL_18]] -; CHECK-NEXT: [[MUL_310:%.*]] = xor i32 undef, [[MUL_29]] -; CHECK-NEXT: [[MUL_411:%.*]] = xor i32 undef, [[MUL_310]] -; CHECK-NEXT: [[MUL_512:%.*]] = xor i32 undef, [[MUL_411]] -; CHECK-NEXT: [[MUL_613:%.*]] = xor i32 undef, [[MUL_512]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = xor <8 x i32> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -282,7 +255,6 @@ define i32 @test_xor(i32* nocapture readonly %p) { ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = xor <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; CHECK-NEXT: [[MUL_714:%.*]] = xor i32 undef, [[MUL_613]] ; CHECK-NEXT: ret i32 [[TMP2]] ; entry: @@ -322,15 +294,12 @@ define i32 @PR37731(<4 x i32>* noalias nocapture dereferenceable(16) %self) unna ; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[TMP4]], ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i32> [[TMP3]], [[TMP5]] ; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[SELF]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = xor i32 undef, undef -; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP7]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = xor <4 x i32> [[TMP6]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = xor <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], undef -; CHECK-NEXT: ret i32 [[TMP9]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 +; CHECK-NEXT: ret i32 [[TMP7]] ; entry: %0 = load <4 x i32>, <4 x i32>* %self, align 16 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll index 27997f6a..4a69a35 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll @@ -33,11 +33,8 @@ define i32 @foo(i32* %diff) #0 { ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>* ; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP11]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[TMP12]], [[TMP9]] -; CHECK-NEXT: [[ADD10:%.*]] = add nsw i32 undef, [[A_088]] ; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 1 -; CHECK-NEXT: [[ADD24:%.*]] = add nsw i32 [[ADD10]], undef ; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 2 -; CHECK-NEXT: [[ADD38:%.*]] = add nsw i32 [[ADD24]], undef ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 3 ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[ARRAYIDX6]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* [[TMP14]], align 16 @@ -47,7 +44,6 @@ define i32 @foo(i32* %diff) #0 { ; CHECK-NEXT: [[BIN_RDX2:%.*]] = add nsw <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 ; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP15]], [[A_088]] -; CHECK-NEXT: [[ADD52:%.*]] = add nsw i32 [[ADD38]], undef ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll index 13884ef..768cba9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll @@ -19,11 +19,6 @@ define void @hoge() { ; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], undef ; CHECK-NEXT: [[SHUFFLE8:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> undef, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[SHUFFLE8]], -; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 undef, undef -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 undef, i32 undef -; CHECK-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], undef -; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; CHECK-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], undef ; CHECK-NEXT: [[RDX_SHUF9:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP10:%.*]] = icmp sgt <4 x i32> [[TMP5]], [[RDX_SHUF9]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT11:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP10]], <4 x i32> [[TMP5]], <4 x i32> [[RDX_SHUF9]] @@ -31,28 +26,12 @@ define void @hoge() { ; CHECK-NEXT: [[RDX_MINMAX_CMP13:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT11]], [[RDX_SHUF12]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT14:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP13]], <4 x i32> [[RDX_MINMAX_SELECT11]], <4 x i32> [[RDX_SHUF12]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT14]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef ; CHECK-NEXT: [[TMP19:%.*]] = select i1 undef, i32 [[TMP6]], i32 undef ; CHECK-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], 63 ; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> undef, [[TMP2]] ; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP7]], undef ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i32 undef, undef -; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 undef, i32 undef -; CHECK-NEXT: [[TMP28:%.*]] = icmp sgt i32 [[TMP27]], undef -; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 undef, i32 [[TMP27]] -; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 undef, undef -; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 undef, i32 undef -; CHECK-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], [[TMP29]] -; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP29]], i32 [[TMP32]] -; CHECK-NEXT: [[TMP36:%.*]] = icmp sgt i32 undef, undef -; CHECK-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 undef, i32 undef -; CHECK-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP37]], [[TMP34]] -; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP34]], i32 [[TMP37]] -; CHECK-NEXT: [[TMP41:%.*]] = icmp sgt i32 undef, undef -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 undef, i32 undef -; CHECK-NEXT: [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], [[TMP39]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP9]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP9]], <4 x i32> [[RDX_SHUF]] @@ -70,7 +49,6 @@ define void @hoge() { ; CHECK-NEXT: [[OP_EXTRA6:%.*]] = select i1 [[TMP14]], i32 [[OP_EXTRA5]], i32 undef ; CHECK-NEXT: [[TMP15:%.*]] = icmp slt i32 [[OP_EXTRA6]], undef ; CHECK-NEXT: [[OP_EXTRA7:%.*]] = select i1 [[TMP15]], i32 [[OP_EXTRA6]], i32 undef -; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP39]], i32 [[TMP42]] ; CHECK-NEXT: [[TMP45:%.*]] = icmp sgt i32 undef, [[OP_EXTRA7]] ; CHECK-NEXT: unreachable ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll b/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll index 7ecd580..822c362 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll @@ -16,15 +16,6 @@ define void @_Z2azv() local_unnamed_addr { ; CHECK-NEXT: [[DOTSROA_RAW_IDX_7:%.*]] = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 7, i32 1 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTSROA_CAST_4]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[CMP_I1_4:%.*]] = icmp slt i32 undef, undef -; CHECK-NEXT: [[DOTSROA_SPECULATED_4:%.*]] = select i1 [[CMP_I1_4]], i32 undef, i32 undef -; CHECK-NEXT: [[CMP_I1_5:%.*]] = icmp slt i32 [[DOTSROA_SPECULATED_4]], undef -; CHECK-NEXT: [[DOTSROA_SPECULATED_5:%.*]] = select i1 [[CMP_I1_5]], i32 undef, i32 [[DOTSROA_SPECULATED_4]] -; CHECK-NEXT: [[CMP_I1_6:%.*]] = icmp slt i32 [[DOTSROA_SPECULATED_5]], undef -; CHECK-NEXT: [[DOTSROA_SPECULATED_6:%.*]] = select i1 [[CMP_I1_6]], i32 undef, i32 [[DOTSROA_SPECULATED_5]] -; CHECK-NEXT: [[CMP_I1_7:%.*]] = icmp slt i32 [[DOTSROA_SPECULATED_6]], undef -; CHECK-NEXT: [[DOTSROA_SPECULATED_7:%.*]] = select i1 [[CMP_I1_7]], i32 undef, i32 [[DOTSROA_SPECULATED_6]] -; CHECK-NEXT: [[CMP_I1_8:%.*]] = icmp slt i32 undef, undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP1]], <8 x i32> [[RDX_SHUF]] @@ -39,7 +30,6 @@ define void @_Z2azv() local_unnamed_addr { ; CHECK-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 undef ; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[OP_EXTRA]], undef ; CHECK-NEXT: [[OP_EXTRA7:%.*]] = select i1 [[TMP4]], i32 [[OP_EXTRA]], i32 undef -; CHECK-NEXT: [[DOTSROA_SPECULATED_8:%.*]] = select i1 [[CMP_I1_8]], i32 undef, i32 undef ; CHECK-NEXT: [[DOTSROA_SPECULATED_9:%.*]] = select i1 undef, i32 undef, i32 [[OP_EXTRA7]] ; CHECK-NEXT: [[CMP_I1_10:%.*]] = icmp slt i32 [[DOTSROA_SPECULATED_9]], undef ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll new file mode 100644 index 0000000..a8752bc --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll @@ -0,0 +1,529 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -slp-vectorizer -S -o - -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell < %s | FileCheck %s + +@k = external dso_local constant [8 x [4 x i32]], align 16 +@l = external dso_local global [366 x i32], align 16 + +; Function Attrs: nofree norecurse noreturn nounwind writeonly +define void @n() local_unnamed_addr #0 { +; CHECK-LABEL: @n( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 0, i64 0), align 16 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 0, i64 1) to <4 x i32>*), align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 1, i64 1), align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 1, i64 2), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 1, i64 3), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 2, i64 0), align 16 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 2, i64 1), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 2, i64 2), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 2, i64 3), align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 0), align 16 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 1), align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 2), align 8 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 3), align 4 +; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 0), align 16 +; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 1), align 4 +; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 2), align 8 +; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 3), align 4 +; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 0), align 16 +; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 1), align 4 +; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 2), align 8 +; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 3), align 4 +; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 0), align 16 +; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 1), align 4 +; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 2), align 8 +; CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 3), align 4 +; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 0), align 16 +; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 1), align 4 +; CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 2), align 8 +; CHECK-NEXT: [[TMP28:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 3), align 4 +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[B_0:%.*]] = phi i32 [ [[SPEC_SELECT8_3_7:%.*]], [[FOR_COND]] ], [ undef, [[ENTRY]] ] +; CHECK-NEXT: [[TMP29:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[TMP29]], -183 +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[TMP30]], [[TMP0]] +; CHECK-NEXT: [[TMP31:%.*]] = icmp slt i32 [[SUB]], 0 +; CHECK-NEXT: [[NEG:%.*]] = sub nsw i32 0, [[SUB]] +; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[NEG]], i32 [[SUB]] +; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> undef, i32 [[TMP30]], i32 0 +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP30]], i32 1 +; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x i32> [[TMP34]], i32 [[TMP30]], i32 2 +; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i32> [[TMP35]], i32 [[TMP30]], i32 3 +; CHECK-NEXT: [[TMP37:%.*]] = sub <4 x i32> [[TMP36]], [[TMP1]] +; CHECK-NEXT: [[TMP38:%.*]] = icmp slt <4 x i32> [[TMP37]], zeroinitializer +; CHECK-NEXT: [[TMP39:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP37]] +; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP38]], <4 x i32> [[TMP39]], <4 x i32> [[TMP37]] +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP40]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP40]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP40]], <4 x i32> [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp slt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; CHECK-NEXT: [[TMP42:%.*]] = icmp slt i32 [[TMP41]], [[TMP32]] +; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 [[TMP41]], i32 [[TMP32]] +; CHECK-NEXT: [[TMP44:%.*]] = icmp slt i32 [[TMP43]], [[B_0]] +; CHECK-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP44]], i32 [[TMP43]], i32 [[B_0]] +; CHECK-NEXT: [[SUB_1_1:%.*]] = sub i32 [[TMP30]], [[TMP2]] +; CHECK-NEXT: [[TMP45:%.*]] = icmp slt i32 [[SUB_1_1]], 0 +; CHECK-NEXT: [[NEG_1_1:%.*]] = sub nsw i32 0, [[SUB_1_1]] +; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[NEG_1_1]], i32 [[SUB_1_1]] +; CHECK-NEXT: [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP46]], [[OP_EXTRA]] +; CHECK-NEXT: [[NARROW:%.*]] = or i1 [[CMP12_1_1]], undef +; CHECK-NEXT: [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP46]], i32 [[OP_EXTRA]] +; CHECK-NEXT: [[SUB_2_1:%.*]] = sub i32 [[TMP30]], [[TMP3]] +; CHECK-NEXT: [[TMP47:%.*]] = icmp slt i32 [[SUB_2_1]], 0 +; CHECK-NEXT: [[NEG_2_1:%.*]] = sub nsw i32 0, [[SUB_2_1]] +; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[NEG_2_1]], i32 [[SUB_2_1]] +; CHECK-NEXT: [[CMP12_2_1:%.*]] = icmp slt i32 [[TMP48]], [[SPEC_SELECT8_1_1]] +; CHECK-NEXT: [[NARROW34:%.*]] = or i1 [[CMP12_2_1]], [[NARROW]] +; CHECK-NEXT: [[SPEC_SELECT8_2_1:%.*]] = select i1 [[CMP12_2_1]], i32 [[TMP48]], i32 [[SPEC_SELECT8_1_1]] +; CHECK-NEXT: [[SUB_3_1:%.*]] = sub i32 [[TMP30]], [[TMP4]] +; CHECK-NEXT: [[TMP49:%.*]] = icmp slt i32 [[SUB_3_1]], 0 +; CHECK-NEXT: [[NEG_3_1:%.*]] = sub nsw i32 0, [[SUB_3_1]] +; CHECK-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[NEG_3_1]], i32 [[SUB_3_1]] +; CHECK-NEXT: [[CMP12_3_1:%.*]] = icmp slt i32 [[TMP50]], [[SPEC_SELECT8_2_1]] +; CHECK-NEXT: [[NARROW35:%.*]] = or i1 [[CMP12_3_1]], [[NARROW34]] +; CHECK-NEXT: [[SPEC_SELECT_3_1:%.*]] = zext i1 [[NARROW35]] to i32 +; CHECK-NEXT: [[SPEC_SELECT8_3_1:%.*]] = select i1 [[CMP12_3_1]], i32 [[TMP50]], i32 [[SPEC_SELECT8_2_1]] +; CHECK-NEXT: [[SUB_222:%.*]] = sub i32 [[TMP30]], [[TMP5]] +; CHECK-NEXT: [[TMP51:%.*]] = icmp slt i32 [[SUB_222]], 0 +; CHECK-NEXT: [[NEG_223:%.*]] = sub nsw i32 0, [[SUB_222]] +; CHECK-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], i32 [[NEG_223]], i32 [[SUB_222]] +; CHECK-NEXT: [[CMP12_224:%.*]] = icmp slt i32 [[TMP52]], [[SPEC_SELECT8_3_1]] +; CHECK-NEXT: [[SPEC_SELECT8_226:%.*]] = select i1 [[CMP12_224]], i32 [[TMP52]], i32 [[SPEC_SELECT8_3_1]] +; CHECK-NEXT: [[SUB_1_2:%.*]] = sub i32 [[TMP30]], [[TMP6]] +; CHECK-NEXT: [[TMP53:%.*]] = icmp slt i32 [[SUB_1_2]], 0 +; CHECK-NEXT: [[NEG_1_2:%.*]] = sub nsw i32 0, [[SUB_1_2]] +; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[NEG_1_2]], i32 [[SUB_1_2]] +; CHECK-NEXT: [[CMP12_1_2:%.*]] = icmp slt i32 [[TMP54]], [[SPEC_SELECT8_226]] +; CHECK-NEXT: [[TMP55:%.*]] = or i1 [[CMP12_1_2]], [[CMP12_224]] +; CHECK-NEXT: [[SPEC_SELECT8_1_2:%.*]] = select i1 [[CMP12_1_2]], i32 [[TMP54]], i32 [[SPEC_SELECT8_226]] +; CHECK-NEXT: [[SUB_2_2:%.*]] = sub i32 [[TMP30]], [[TMP7]] +; CHECK-NEXT: [[TMP56:%.*]] = icmp slt i32 [[SUB_2_2]], 0 +; CHECK-NEXT: [[NEG_2_2:%.*]] = sub nsw i32 0, [[SUB_2_2]] +; CHECK-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[NEG_2_2]], i32 [[SUB_2_2]] +; CHECK-NEXT: [[CMP12_2_2:%.*]] = icmp slt i32 [[TMP57]], [[SPEC_SELECT8_1_2]] +; CHECK-NEXT: [[TMP58:%.*]] = or i1 [[CMP12_2_2]], [[TMP55]] +; CHECK-NEXT: [[SPEC_SELECT8_2_2:%.*]] = select i1 [[CMP12_2_2]], i32 [[TMP57]], i32 [[SPEC_SELECT8_1_2]] +; CHECK-NEXT: [[SUB_3_2:%.*]] = sub i32 [[TMP30]], [[TMP8]] +; CHECK-NEXT: [[TMP59:%.*]] = icmp slt i32 [[SUB_3_2]], 0 +; CHECK-NEXT: [[NEG_3_2:%.*]] = sub nsw i32 0, [[SUB_3_2]] +; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[NEG_3_2]], i32 [[SUB_3_2]] +; CHECK-NEXT: [[CMP12_3_2:%.*]] = icmp slt i32 [[TMP60]], [[SPEC_SELECT8_2_2]] +; CHECK-NEXT: [[TMP61:%.*]] = or i1 [[CMP12_3_2]], [[TMP58]] +; CHECK-NEXT: [[SPEC_SELECT_3_2:%.*]] = select i1 [[TMP61]], i32 2, i32 [[SPEC_SELECT_3_1]] +; CHECK-NEXT: [[SPEC_SELECT8_3_2:%.*]] = select i1 [[CMP12_3_2]], i32 [[TMP60]], i32 [[SPEC_SELECT8_2_2]] +; CHECK-NEXT: [[SUB_328:%.*]] = sub i32 [[TMP30]], [[TMP9]] +; CHECK-NEXT: [[TMP62:%.*]] = icmp slt i32 [[SUB_328]], 0 +; CHECK-NEXT: [[NEG_329:%.*]] = sub nsw i32 0, [[SUB_328]] +; CHECK-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], i32 [[NEG_329]], i32 [[SUB_328]] +; CHECK-NEXT: [[CMP12_330:%.*]] = icmp slt i32 [[TMP63]], [[SPEC_SELECT8_3_2]] +; CHECK-NEXT: [[SPEC_SELECT8_332:%.*]] = select i1 [[CMP12_330]], i32 [[TMP63]], i32 [[SPEC_SELECT8_3_2]] +; CHECK-NEXT: [[SUB_1_3:%.*]] = sub i32 [[TMP30]], [[TMP10]] +; CHECK-NEXT: [[TMP64:%.*]] = icmp slt i32 [[SUB_1_3]], 0 +; CHECK-NEXT: [[NEG_1_3:%.*]] = sub nsw i32 0, [[SUB_1_3]] +; CHECK-NEXT: [[TMP65:%.*]] = select i1 [[TMP64]], i32 [[NEG_1_3]], i32 [[SUB_1_3]] +; CHECK-NEXT: [[CMP12_1_3:%.*]] = icmp slt i32 [[TMP65]], [[SPEC_SELECT8_332]] +; CHECK-NEXT: [[TMP66:%.*]] = or i1 [[CMP12_1_3]], [[CMP12_330]] +; CHECK-NEXT: [[SPEC_SELECT8_1_3:%.*]] = select i1 [[CMP12_1_3]], i32 [[TMP65]], i32 [[SPEC_SELECT8_332]] +; CHECK-NEXT: [[SUB_2_3:%.*]] = sub i32 [[TMP30]], [[TMP11]] +; CHECK-NEXT: [[TMP67:%.*]] = icmp slt i32 [[SUB_2_3]], 0 +; CHECK-NEXT: [[NEG_2_3:%.*]] = sub nsw i32 0, [[SUB_2_3]] +; CHECK-NEXT: [[TMP68:%.*]] = select i1 [[TMP67]], i32 [[NEG_2_3]], i32 [[SUB_2_3]] +; CHECK-NEXT: [[CMP12_2_3:%.*]] = icmp slt i32 [[TMP68]], [[SPEC_SELECT8_1_3]] +; CHECK-NEXT: [[TMP69:%.*]] = or i1 [[CMP12_2_3]], [[TMP66]] +; CHECK-NEXT: [[SPEC_SELECT8_2_3:%.*]] = select i1 [[CMP12_2_3]], i32 [[TMP68]], i32 [[SPEC_SELECT8_1_3]] +; CHECK-NEXT: [[SUB_3_3:%.*]] = sub i32 [[TMP30]], [[TMP12]] +; CHECK-NEXT: [[TMP70:%.*]] = icmp slt i32 [[SUB_3_3]], 0 +; CHECK-NEXT: [[NEG_3_3:%.*]] = sub nsw i32 0, [[SUB_3_3]] +; CHECK-NEXT: [[TMP71:%.*]] = select i1 [[TMP70]], i32 [[NEG_3_3]], i32 [[SUB_3_3]] +; CHECK-NEXT: [[CMP12_3_3:%.*]] = icmp slt i32 [[TMP71]], [[SPEC_SELECT8_2_3]] +; CHECK-NEXT: [[TMP72:%.*]] = or i1 [[CMP12_3_3]], [[TMP69]] +; CHECK-NEXT: [[SPEC_SELECT_3_3:%.*]] = select i1 [[TMP72]], i32 3, i32 [[SPEC_SELECT_3_2]] +; CHECK-NEXT: [[SPEC_SELECT8_3_3:%.*]] = select i1 [[CMP12_3_3]], i32 [[TMP71]], i32 [[SPEC_SELECT8_2_3]] +; CHECK-NEXT: [[SUB_4:%.*]] = sub i32 [[TMP30]], [[TMP13]] +; CHECK-NEXT: [[TMP73:%.*]] = icmp slt i32 [[SUB_4]], 0 +; CHECK-NEXT: [[NEG_4:%.*]] = sub nsw i32 0, [[SUB_4]] +; CHECK-NEXT: [[TMP74:%.*]] = select i1 [[TMP73]], i32 [[NEG_4]], i32 [[SUB_4]] +; CHECK-NEXT: [[CMP12_4:%.*]] = icmp slt i32 [[TMP74]], [[SPEC_SELECT8_3_3]] +; CHECK-NEXT: [[SPEC_SELECT8_4:%.*]] = select i1 [[CMP12_4]], i32 [[TMP74]], i32 [[SPEC_SELECT8_3_3]] +; CHECK-NEXT: [[SUB_1_4:%.*]] = sub i32 [[TMP30]], [[TMP14]] +; CHECK-NEXT: [[TMP75:%.*]] = icmp slt i32 [[SUB_1_4]], 0 +; CHECK-NEXT: [[NEG_1_4:%.*]] = sub nsw i32 0, [[SUB_1_4]] +; CHECK-NEXT: [[TMP76:%.*]] = select i1 [[TMP75]], i32 [[NEG_1_4]], i32 [[SUB_1_4]] +; CHECK-NEXT: [[CMP12_1_4:%.*]] = icmp slt i32 [[TMP76]], [[SPEC_SELECT8_4]] +; CHECK-NEXT: [[TMP77:%.*]] = or i1 [[CMP12_1_4]], [[CMP12_4]] +; CHECK-NEXT: [[SPEC_SELECT8_1_4:%.*]] = select i1 [[CMP12_1_4]], i32 [[TMP76]], i32 [[SPEC_SELECT8_4]] +; CHECK-NEXT: [[SUB_2_4:%.*]] = sub i32 [[TMP30]], [[TMP15]] +; CHECK-NEXT: [[TMP78:%.*]] = icmp slt i32 [[SUB_2_4]], 0 +; CHECK-NEXT: [[NEG_2_4:%.*]] = sub nsw i32 0, [[SUB_2_4]] +; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 [[NEG_2_4]], i32 [[SUB_2_4]] +; CHECK-NEXT: [[CMP12_2_4:%.*]] = icmp slt i32 [[TMP79]], [[SPEC_SELECT8_1_4]] +; CHECK-NEXT: [[TMP80:%.*]] = or i1 [[CMP12_2_4]], [[TMP77]] +; CHECK-NEXT: [[SPEC_SELECT8_2_4:%.*]] = select i1 [[CMP12_2_4]], i32 [[TMP79]], i32 [[SPEC_SELECT8_1_4]] +; CHECK-NEXT: [[SUB_3_4:%.*]] = sub i32 [[TMP30]], [[TMP16]] +; CHECK-NEXT: [[TMP81:%.*]] = icmp slt i32 [[SUB_3_4]], 0 +; CHECK-NEXT: [[NEG_3_4:%.*]] = sub nsw i32 0, [[SUB_3_4]] +; CHECK-NEXT: [[TMP82:%.*]] = select i1 [[TMP81]], i32 [[NEG_3_4]], i32 [[SUB_3_4]] +; CHECK-NEXT: [[CMP12_3_4:%.*]] = icmp slt i32 [[TMP82]], [[SPEC_SELECT8_2_4]] +; CHECK-NEXT: [[TMP83:%.*]] = or i1 [[CMP12_3_4]], [[TMP80]] +; CHECK-NEXT: [[SPEC_SELECT_3_4:%.*]] = select i1 [[TMP83]], i32 4, i32 [[SPEC_SELECT_3_3]] +; CHECK-NEXT: [[SPEC_SELECT8_3_4:%.*]] = select i1 [[CMP12_3_4]], i32 [[TMP82]], i32 [[SPEC_SELECT8_2_4]] +; CHECK-NEXT: [[SUB_5:%.*]] = sub i32 [[TMP30]], [[TMP17]] +; CHECK-NEXT: [[TMP84:%.*]] = icmp slt i32 [[SUB_5]], 0 +; CHECK-NEXT: [[NEG_5:%.*]] = sub nsw i32 0, [[SUB_5]] +; CHECK-NEXT: [[TMP85:%.*]] = select i1 [[TMP84]], i32 [[NEG_5]], i32 [[SUB_5]] +; CHECK-NEXT: [[CMP12_5:%.*]] = icmp slt i32 [[TMP85]], [[SPEC_SELECT8_3_4]] +; CHECK-NEXT: [[SPEC_SELECT8_5:%.*]] = select i1 [[CMP12_5]], i32 [[TMP85]], i32 [[SPEC_SELECT8_3_4]] +; CHECK-NEXT: [[SUB_1_5:%.*]] = sub i32 [[TMP30]], [[TMP18]] +; CHECK-NEXT: [[TMP86:%.*]] = icmp slt i32 [[SUB_1_5]], 0 +; CHECK-NEXT: [[NEG_1_5:%.*]] = sub nsw i32 0, [[SUB_1_5]] +; CHECK-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[NEG_1_5]], i32 [[SUB_1_5]] +; CHECK-NEXT: [[CMP12_1_5:%.*]] = icmp slt i32 [[TMP87]], [[SPEC_SELECT8_5]] +; CHECK-NEXT: [[TMP88:%.*]] = or i1 [[CMP12_1_5]], [[CMP12_5]] +; CHECK-NEXT: [[SPEC_SELECT8_1_5:%.*]] = select i1 [[CMP12_1_5]], i32 [[TMP87]], i32 [[SPEC_SELECT8_5]] +; CHECK-NEXT: [[SUB_2_5:%.*]] = sub i32 [[TMP30]], [[TMP19]] +; CHECK-NEXT: [[TMP89:%.*]] = icmp slt i32 [[SUB_2_5]], 0 +; CHECK-NEXT: [[NEG_2_5:%.*]] = sub nsw i32 0, [[SUB_2_5]] +; CHECK-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[NEG_2_5]], i32 [[SUB_2_5]] +; CHECK-NEXT: [[CMP12_2_5:%.*]] = icmp slt i32 [[TMP90]], [[SPEC_SELECT8_1_5]] +; CHECK-NEXT: [[TMP91:%.*]] = or i1 [[CMP12_2_5]], [[TMP88]] +; CHECK-NEXT: [[SPEC_SELECT8_2_5:%.*]] = select i1 [[CMP12_2_5]], i32 [[TMP90]], i32 [[SPEC_SELECT8_1_5]] +; CHECK-NEXT: [[SUB_3_5:%.*]] = sub i32 [[TMP30]], [[TMP20]] +; CHECK-NEXT: [[TMP92:%.*]] = icmp slt i32 [[SUB_3_5]], 0 +; CHECK-NEXT: [[NEG_3_5:%.*]] = sub nsw i32 0, [[SUB_3_5]] +; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 [[NEG_3_5]], i32 [[SUB_3_5]] +; CHECK-NEXT: [[CMP12_3_5:%.*]] = icmp slt i32 [[TMP93]], [[SPEC_SELECT8_2_5]] +; CHECK-NEXT: [[TMP94:%.*]] = or i1 [[CMP12_3_5]], [[TMP91]] +; CHECK-NEXT: [[SPEC_SELECT_3_5:%.*]] = select i1 [[TMP94]], i32 5, i32 [[SPEC_SELECT_3_4]] +; CHECK-NEXT: [[SPEC_SELECT8_3_5:%.*]] = select i1 [[CMP12_3_5]], i32 [[TMP93]], i32 [[SPEC_SELECT8_2_5]] +; CHECK-NEXT: [[SUB_6:%.*]] = sub i32 [[TMP30]], [[TMP21]] +; CHECK-NEXT: [[TMP95:%.*]] = icmp slt i32 [[SUB_6]], 0 +; CHECK-NEXT: [[NEG_6:%.*]] = sub nsw i32 0, [[SUB_6]] +; CHECK-NEXT: [[TMP96:%.*]] = select i1 [[TMP95]], i32 [[NEG_6]], i32 [[SUB_6]] +; CHECK-NEXT: [[CMP12_6:%.*]] = icmp slt i32 [[TMP96]], [[SPEC_SELECT8_3_5]] +; CHECK-NEXT: [[SPEC_SELECT8_6:%.*]] = select i1 [[CMP12_6]], i32 [[TMP96]], i32 [[SPEC_SELECT8_3_5]] +; CHECK-NEXT: [[SUB_1_6:%.*]] = sub i32 [[TMP30]], [[TMP22]] +; CHECK-NEXT: [[TMP97:%.*]] = icmp slt i32 [[SUB_1_6]], 0 +; CHECK-NEXT: [[NEG_1_6:%.*]] = sub nsw i32 0, [[SUB_1_6]] +; CHECK-NEXT: [[TMP98:%.*]] = select i1 [[TMP97]], i32 [[NEG_1_6]], i32 [[SUB_1_6]] +; CHECK-NEXT: [[CMP12_1_6:%.*]] = icmp slt i32 [[TMP98]], [[SPEC_SELECT8_6]] +; CHECK-NEXT: [[TMP99:%.*]] = or i1 [[CMP12_1_6]], [[CMP12_6]] +; CHECK-NEXT: [[SPEC_SELECT8_1_6:%.*]] = select i1 [[CMP12_1_6]], i32 [[TMP98]], i32 [[SPEC_SELECT8_6]] +; CHECK-NEXT: [[SUB_2_6:%.*]] = sub i32 [[TMP30]], [[TMP23]] +; CHECK-NEXT: [[TMP100:%.*]] = icmp slt i32 [[SUB_2_6]], 0 +; CHECK-NEXT: [[NEG_2_6:%.*]] = sub nsw i32 0, [[SUB_2_6]] +; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP100]], i32 [[NEG_2_6]], i32 [[SUB_2_6]] +; CHECK-NEXT: [[CMP12_2_6:%.*]] = icmp slt i32 [[TMP101]], [[SPEC_SELECT8_1_6]] +; CHECK-NEXT: [[TMP102:%.*]] = or i1 [[CMP12_2_6]], [[TMP99]] +; CHECK-NEXT: [[SPEC_SELECT8_2_6:%.*]] = select i1 [[CMP12_2_6]], i32 [[TMP101]], i32 [[SPEC_SELECT8_1_6]] +; CHECK-NEXT: [[SUB_3_6:%.*]] = sub i32 [[TMP30]], [[TMP24]] +; CHECK-NEXT: [[TMP103:%.*]] = icmp slt i32 [[SUB_3_6]], 0 +; CHECK-NEXT: [[NEG_3_6:%.*]] = sub nsw i32 0, [[SUB_3_6]] +; CHECK-NEXT: [[TMP104:%.*]] = select i1 [[TMP103]], i32 [[NEG_3_6]], i32 [[SUB_3_6]] +; CHECK-NEXT: [[CMP12_3_6:%.*]] = icmp slt i32 [[TMP104]], [[SPEC_SELECT8_2_6]] +; CHECK-NEXT: [[TMP105:%.*]] = or i1 [[CMP12_3_6]], [[TMP102]] +; CHECK-NEXT: [[SPEC_SELECT_3_6:%.*]] = select i1 [[TMP105]], i32 6, i32 [[SPEC_SELECT_3_5]] +; CHECK-NEXT: [[SPEC_SELECT8_3_6:%.*]] = select i1 [[CMP12_3_6]], i32 [[TMP104]], i32 [[SPEC_SELECT8_2_6]] +; CHECK-NEXT: [[SUB_7:%.*]] = sub i32 [[TMP30]], [[TMP25]] +; CHECK-NEXT: [[TMP106:%.*]] = icmp slt i32 [[SUB_7]], 0 +; CHECK-NEXT: [[NEG_7:%.*]] = sub nsw i32 0, [[SUB_7]] +; CHECK-NEXT: [[TMP107:%.*]] = select i1 [[TMP106]], i32 [[NEG_7]], i32 [[SUB_7]] +; CHECK-NEXT: [[CMP12_7:%.*]] = icmp slt i32 [[TMP107]], [[SPEC_SELECT8_3_6]] +; CHECK-NEXT: [[SPEC_SELECT8_7:%.*]] = select i1 [[CMP12_7]], i32 [[TMP107]], i32 [[SPEC_SELECT8_3_6]] +; CHECK-NEXT: [[SUB_1_7:%.*]] = sub i32 [[TMP30]], [[TMP26]] +; CHECK-NEXT: [[TMP108:%.*]] = icmp slt i32 [[SUB_1_7]], 0 +; CHECK-NEXT: [[NEG_1_7:%.*]] = sub nsw i32 0, [[SUB_1_7]] +; CHECK-NEXT: [[TMP109:%.*]] = select i1 [[TMP108]], i32 [[NEG_1_7]], i32 [[SUB_1_7]] +; CHECK-NEXT: [[CMP12_1_7:%.*]] = icmp slt i32 [[TMP109]], [[SPEC_SELECT8_7]] +; CHECK-NEXT: [[TMP110:%.*]] = or i1 [[CMP12_1_7]], [[CMP12_7]] +; CHECK-NEXT: [[SPEC_SELECT8_1_7:%.*]] = select i1 [[CMP12_1_7]], i32 [[TMP109]], i32 [[SPEC_SELECT8_7]] +; CHECK-NEXT: [[SUB_2_7:%.*]] = sub i32 [[TMP30]], [[TMP27]] +; CHECK-NEXT: [[TMP111:%.*]] = icmp slt i32 [[SUB_2_7]], 0 +; CHECK-NEXT: [[NEG_2_7:%.*]] = sub nsw i32 0, [[SUB_2_7]] +; CHECK-NEXT: [[TMP112:%.*]] = select i1 [[TMP111]], i32 [[NEG_2_7]], i32 [[SUB_2_7]] +; CHECK-NEXT: [[CMP12_2_7:%.*]] = icmp slt i32 [[TMP112]], [[SPEC_SELECT8_1_7]] +; CHECK-NEXT: [[TMP113:%.*]] = or i1 [[CMP12_2_7]], [[TMP110]] +; CHECK-NEXT: [[SPEC_SELECT8_2_7:%.*]] = select i1 [[CMP12_2_7]], i32 [[TMP112]], i32 [[SPEC_SELECT8_1_7]] +; CHECK-NEXT: [[SUB_3_7:%.*]] = sub i32 [[TMP30]], [[TMP28]] +; CHECK-NEXT: [[TMP114:%.*]] = icmp slt i32 [[SUB_3_7]], 0 +; CHECK-NEXT: [[NEG_3_7:%.*]] = sub nsw i32 0, [[SUB_3_7]] +; CHECK-NEXT: [[TMP115:%.*]] = select i1 [[TMP114]], i32 [[NEG_3_7]], i32 [[SUB_3_7]] +; CHECK-NEXT: [[CMP12_3_7:%.*]] = icmp slt i32 [[TMP115]], [[SPEC_SELECT8_2_7]] +; CHECK-NEXT: [[TMP116:%.*]] = or i1 [[CMP12_3_7]], [[TMP113]] +; CHECK-NEXT: [[SPEC_SELECT_3_7:%.*]] = select i1 [[TMP116]], i32 7, i32 [[SPEC_SELECT_3_6]] +; CHECK-NEXT: [[SPEC_SELECT8_3_7]] = select i1 [[CMP12_3_7]], i32 [[TMP115]], i32 [[SPEC_SELECT8_2_7]] +; CHECK-NEXT: [[K:%.*]] = getelementptr inbounds [366 x i32], [366 x i32]* @l, i64 0, i64 [[INDVARS_IV]] +; CHECK-NEXT: store i32 [[SPEC_SELECT_3_7]], i32* [[K]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: br label [[FOR_COND]] +; +entry: + %0 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 0, i64 0), align 16 + %1 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 0, i64 1), align 4 + %2 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 0, i64 2), align 8 + %3 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 0, i64 3), align 4 + %4 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 1, i64 0), align 16 + %5 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 1, i64 1), align 4 + %6 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 1, i64 2), align 8 + %7 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 1, i64 3), align 4 + %8 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 2, i64 0), align 16 + %9 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 2, i64 1), align 4 + %10 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 2, i64 2), align 8 + %11 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 2, i64 3), align 4 + %12 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 0), align 16 + %13 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 1), align 4 + %14 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 2), align 8 + %15 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 3), align 4 + %16 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 0), align 16 + %17 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 1), align 4 + %18 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 2), align 8 + %19 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 3), align 4 + %20 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 0), align 16 + %21 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 1), align 4 + %22 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 2), align 8 + %23 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 3), align 4 + %24 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 0), align 16 + %25 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 1), align 4 + %26 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 2), align 8 + %27 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 3), align 4 + %28 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 0), align 16 + %29 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 1), align 4 + %30 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 2), align 8 + %31 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 3), align 4 + br label %for.cond + +for.cond: ; preds = %for.cond, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.cond ], [ 0, %entry ] + %b.0 = phi i32 [ %spec.select8.3.7, %for.cond ], [ undef, %entry ] + %32 = trunc i64 %indvars.iv to i32 + %33 = add i32 %32, -183 + %sub = sub i32 %33, %0 + %34 = icmp slt i32 %sub, 0 + %neg = sub nsw i32 0, %sub + %35 = select i1 %34, i32 %neg, i32 %sub + %cmp12 = icmp slt i32 %35, %b.0 + %spec.select8 = select i1 %cmp12, i32 %35, i32 %b.0 + %sub.1 = sub i32 %33, %1 + %36 = icmp slt i32 %sub.1, 0 + %neg.1 = sub nsw i32 0, %sub.1 + %37 = select i1 %36, i32 %neg.1, i32 %sub.1 + %cmp12.1 = icmp slt i32 %37, %spec.select8 + %spec.select8.1 = select i1 %cmp12.1, i32 %37, i32 %spec.select8 + %sub.2 = sub i32 %33, %2 + %38 = icmp slt i32 %sub.2, 0 + %neg.2 = sub nsw i32 0, %sub.2 + %39 = select i1 %38, i32 %neg.2, i32 %sub.2 + %cmp12.2 = icmp slt i32 %39, %spec.select8.1 + %spec.select8.2 = select i1 %cmp12.2, i32 %39, i32 %spec.select8.1 + %sub.3 = sub i32 %33, %3 + %40 = icmp slt i32 %sub.3, 0 + %neg.3 = sub nsw i32 0, %sub.3 + %41 = select i1 %40, i32 %neg.3, i32 %sub.3 + %cmp12.3 = icmp slt i32 %41, %spec.select8.2 + %spec.select8.3 = select i1 %cmp12.3, i32 %41, i32 %spec.select8.2 + %sub.116 = sub i32 %33, %4 + %42 = icmp slt i32 %sub.116, 0 + %neg.117 = sub nsw i32 0, %sub.116 + %43 = select i1 %42, i32 %neg.117, i32 %sub.116 + %cmp12.118 = icmp slt i32 %43, %spec.select8.3 + %spec.select8.120 = select i1 %cmp12.118, i32 %43, i32 %spec.select8.3 + %sub.1.1 = sub i32 %33, %5 + %44 = icmp slt i32 %sub.1.1, 0 + %neg.1.1 = sub nsw i32 0, %sub.1.1 + %45 = select i1 %44, i32 %neg.1.1, i32 %sub.1.1 + %cmp12.1.1 = icmp slt i32 %45, %spec.select8.120 + %narrow = or i1 %cmp12.1.1, %cmp12.118 + %spec.select8.1.1 = select i1 %cmp12.1.1, i32 %45, i32 %spec.select8.120 + %sub.2.1 = sub i32 %33, %6 + %46 = icmp slt i32 %sub.2.1, 0 + %neg.2.1 = sub nsw i32 0, %sub.2.1 + %47 = select i1 %46, i32 %neg.2.1, i32 %sub.2.1 + %cmp12.2.1 = icmp slt i32 %47, %spec.select8.1.1 + %narrow34 = or i1 %cmp12.2.1, %narrow + %spec.select8.2.1 = select i1 %cmp12.2.1, i32 %47, i32 %spec.select8.1.1 + %sub.3.1 = sub i32 %33, %7 + %48 = icmp slt i32 %sub.3.1, 0 + %neg.3.1 = sub nsw i32 0, %sub.3.1 + %49 = select i1 %48, i32 %neg.3.1, i32 %sub.3.1 + %cmp12.3.1 = icmp slt i32 %49, %spec.select8.2.1 + %narrow35 = or i1 %cmp12.3.1, %narrow34 + %spec.select.3.1 = zext i1 %narrow35 to i32 + %spec.select8.3.1 = select i1 %cmp12.3.1, i32 %49, i32 %spec.select8.2.1 + %sub.222 = sub i32 %33, %8 + %50 = icmp slt i32 %sub.222, 0 + %neg.223 = sub nsw i32 0, %sub.222 + %51 = select i1 %50, i32 %neg.223, i32 %sub.222 + %cmp12.224 = icmp slt i32 %51, %spec.select8.3.1 + %spec.select8.226 = select i1 %cmp12.224, i32 %51, i32 %spec.select8.3.1 + %sub.1.2 = sub i32 %33, %9 + %52 = icmp slt i32 %sub.1.2, 0 + %neg.1.2 = sub nsw i32 0, %sub.1.2 + %53 = select i1 %52, i32 %neg.1.2, i32 %sub.1.2 + %cmp12.1.2 = icmp slt i32 %53, %spec.select8.226 + %54 = or i1 %cmp12.1.2, %cmp12.224 + %spec.select8.1.2 = select i1 %cmp12.1.2, i32 %53, i32 %spec.select8.226 + %sub.2.2 = sub i32 %33, %10 + %55 = icmp slt i32 %sub.2.2, 0 + %neg.2.2 = sub nsw i32 0, %sub.2.2 + %56 = select i1 %55, i32 %neg.2.2, i32 %sub.2.2 + %cmp12.2.2 = icmp slt i32 %56, %spec.select8.1.2 + %57 = or i1 %cmp12.2.2, %54 + %spec.select8.2.2 = select i1 %cmp12.2.2, i32 %56, i32 %spec.select8.1.2 + %sub.3.2 = sub i32 %33, %11 + %58 = icmp slt i32 %sub.3.2, 0 + %neg.3.2 = sub nsw i32 0, %sub.3.2 + %59 = select i1 %58, i32 %neg.3.2, i32 %sub.3.2 + %cmp12.3.2 = icmp slt i32 %59, %spec.select8.2.2 + %60 = or i1 %cmp12.3.2, %57 + %spec.select.3.2 = select i1 %60, i32 2, i32 %spec.select.3.1 + %spec.select8.3.2 = select i1 %cmp12.3.2, i32 %59, i32 %spec.select8.2.2 + %sub.328 = sub i32 %33, %12 + %61 = icmp slt i32 %sub.328, 0 + %neg.329 = sub nsw i32 0, %sub.328 + %62 = select i1 %61, i32 %neg.329, i32 %sub.328 + %cmp12.330 = icmp slt i32 %62, %spec.select8.3.2 + %spec.select8.332 = select i1 %cmp12.330, i32 %62, i32 %spec.select8.3.2 + %sub.1.3 = sub i32 %33, %13 + %63 = icmp slt i32 %sub.1.3, 0 + %neg.1.3 = sub nsw i32 0, %sub.1.3 + %64 = select i1 %63, i32 %neg.1.3, i32 %sub.1.3 + %cmp12.1.3 = icmp slt i32 %64, %spec.select8.332 + %65 = or i1 %cmp12.1.3, %cmp12.330 + %spec.select8.1.3 = select i1 %cmp12.1.3, i32 %64, i32 %spec.select8.332 + %sub.2.3 = sub i32 %33, %14 + %66 = icmp slt i32 %sub.2.3, 0 + %neg.2.3 = sub nsw i32 0, %sub.2.3 + %67 = select i1 %66, i32 %neg.2.3, i32 %sub.2.3 + %cmp12.2.3 = icmp slt i32 %67, %spec.select8.1.3 + %68 = or i1 %cmp12.2.3, %65 + %spec.select8.2.3 = select i1 %cmp12.2.3, i32 %67, i32 %spec.select8.1.3 + %sub.3.3 = sub i32 %33, %15 + %69 = icmp slt i32 %sub.3.3, 0 + %neg.3.3 = sub nsw i32 0, %sub.3.3 + %70 = select i1 %69, i32 %neg.3.3, i32 %sub.3.3 + %cmp12.3.3 = icmp slt i32 %70, %spec.select8.2.3 + %71 = or i1 %cmp12.3.3, %68 + %spec.select.3.3 = select i1 %71, i32 3, i32 %spec.select.3.2 + %spec.select8.3.3 = select i1 %cmp12.3.3, i32 %70, i32 %spec.select8.2.3 + %sub.4 = sub i32 %33, %16 + %72 = icmp slt i32 %sub.4, 0 + %neg.4 = sub nsw i32 0, %sub.4 + %73 = select i1 %72, i32 %neg.4, i32 %sub.4 + %cmp12.4 = icmp slt i32 %73, %spec.select8.3.3 + %spec.select8.4 = select i1 %cmp12.4, i32 %73, i32 %spec.select8.3.3 + %sub.1.4 = sub i32 %33, %17 + %74 = icmp slt i32 %sub.1.4, 0 + %neg.1.4 = sub nsw i32 0, %sub.1.4 + %75 = select i1 %74, i32 %neg.1.4, i32 %sub.1.4 + %cmp12.1.4 = icmp slt i32 %75, %spec.select8.4 + %76 = or i1 %cmp12.1.4, %cmp12.4 + %spec.select8.1.4 = select i1 %cmp12.1.4, i32 %75, i32 %spec.select8.4 + %sub.2.4 = sub i32 %33, %18 + %77 = icmp slt i32 %sub.2.4, 0 + %neg.2.4 = sub nsw i32 0, %sub.2.4 + %78 = select i1 %77, i32 %neg.2.4, i32 %sub.2.4 + %cmp12.2.4 = icmp slt i32 %78, %spec.select8.1.4 + %79 = or i1 %cmp12.2.4, %76 + %spec.select8.2.4 = select i1 %cmp12.2.4, i32 %78, i32 %spec.select8.1.4 + %sub.3.4 = sub i32 %33, %19 + %80 = icmp slt i32 %sub.3.4, 0 + %neg.3.4 = sub nsw i32 0, %sub.3.4 + %81 = select i1 %80, i32 %neg.3.4, i32 %sub.3.4 + %cmp12.3.4 = icmp slt i32 %81, %spec.select8.2.4 + %82 = or i1 %cmp12.3.4, %79 + %spec.select.3.4 = select i1 %82, i32 4, i32 %spec.select.3.3 + %spec.select8.3.4 = select i1 %cmp12.3.4, i32 %81, i32 %spec.select8.2.4 + %sub.5 = sub i32 %33, %20 + %83 = icmp slt i32 %sub.5, 0 + %neg.5 = sub nsw i32 0, %sub.5 + %84 = select i1 %83, i32 %neg.5, i32 %sub.5 + %cmp12.5 = icmp slt i32 %84, %spec.select8.3.4 + %spec.select8.5 = select i1 %cmp12.5, i32 %84, i32 %spec.select8.3.4 + %sub.1.5 = sub i32 %33, %21 + %85 = icmp slt i32 %sub.1.5, 0 + %neg.1.5 = sub nsw i32 0, %sub.1.5 + %86 = select i1 %85, i32 %neg.1.5, i32 %sub.1.5 + %cmp12.1.5 = icmp slt i32 %86, %spec.select8.5 + %87 = or i1 %cmp12.1.5, %cmp12.5 + %spec.select8.1.5 = select i1 %cmp12.1.5, i32 %86, i32 %spec.select8.5 + %sub.2.5 = sub i32 %33, %22 + %88 = icmp slt i32 %sub.2.5, 0 + %neg.2.5 = sub nsw i32 0, %sub.2.5 + %89 = select i1 %88, i32 %neg.2.5, i32 %sub.2.5 + %cmp12.2.5 = icmp slt i32 %89, %spec.select8.1.5 + %90 = or i1 %cmp12.2.5, %87 + %spec.select8.2.5 = select i1 %cmp12.2.5, i32 %89, i32 %spec.select8.1.5 + %sub.3.5 = sub i32 %33, %23 + %91 = icmp slt i32 %sub.3.5, 0 + %neg.3.5 = sub nsw i32 0, %sub.3.5 + %92 = select i1 %91, i32 %neg.3.5, i32 %sub.3.5 + %cmp12.3.5 = icmp slt i32 %92, %spec.select8.2.5 + %93 = or i1 %cmp12.3.5, %90 + %spec.select.3.5 = select i1 %93, i32 5, i32 %spec.select.3.4 + %spec.select8.3.5 = select i1 %cmp12.3.5, i32 %92, i32 %spec.select8.2.5 + %sub.6 = sub i32 %33, %24 + %94 = icmp slt i32 %sub.6, 0 + %neg.6 = sub nsw i32 0, %sub.6 + %95 = select i1 %94, i32 %neg.6, i32 %sub.6 + %cmp12.6 = icmp slt i32 %95, %spec.select8.3.5 + %spec.select8.6 = select i1 %cmp12.6, i32 %95, i32 %spec.select8.3.5 + %sub.1.6 = sub i32 %33, %25 + %96 = icmp slt i32 %sub.1.6, 0 + %neg.1.6 = sub nsw i32 0, %sub.1.6 + %97 = select i1 %96, i32 %neg.1.6, i32 %sub.1.6 + %cmp12.1.6 = icmp slt i32 %97, %spec.select8.6 + %98 = or i1 %cmp12.1.6, %cmp12.6 + %spec.select8.1.6 = select i1 %cmp12.1.6, i32 %97, i32 %spec.select8.6 + %sub.2.6 = sub i32 %33, %26 + %99 = icmp slt i32 %sub.2.6, 0 + %neg.2.6 = sub nsw i32 0, %sub.2.6 + %100 = select i1 %99, i32 %neg.2.6, i32 %sub.2.6 + %cmp12.2.6 = icmp slt i32 %100, %spec.select8.1.6 + %101 = or i1 %cmp12.2.6, %98 + %spec.select8.2.6 = select i1 %cmp12.2.6, i32 %100, i32 %spec.select8.1.6 + %sub.3.6 = sub i32 %33, %27 + %102 = icmp slt i32 %sub.3.6, 0 + %neg.3.6 = sub nsw i32 0, %sub.3.6 + %103 = select i1 %102, i32 %neg.3.6, i32 %sub.3.6 + %cmp12.3.6 = icmp slt i32 %103, %spec.select8.2.6 + %104 = or i1 %cmp12.3.6, %101 + %spec.select.3.6 = select i1 %104, i32 6, i32 %spec.select.3.5 + %spec.select8.3.6 = select i1 %cmp12.3.6, i32 %103, i32 %spec.select8.2.6 + %sub.7 = sub i32 %33, %28 + %105 = icmp slt i32 %sub.7, 0 + %neg.7 = sub nsw i32 0, %sub.7 + %106 = select i1 %105, i32 %neg.7, i32 %sub.7 + %cmp12.7 = icmp slt i32 %106, %spec.select8.3.6 + %spec.select8.7 = select i1 %cmp12.7, i32 %106, i32 %spec.select8.3.6 + %sub.1.7 = sub i32 %33, %29 + %107 = icmp slt i32 %sub.1.7, 0 + %neg.1.7 = sub nsw i32 0, %sub.1.7 + %108 = select i1 %107, i32 %neg.1.7, i32 %sub.1.7 + %cmp12.1.7 = icmp slt i32 %108, %spec.select8.7 + %109 = or i1 %cmp12.1.7, %cmp12.7 + %spec.select8.1.7 = select i1 %cmp12.1.7, i32 %108, i32 %spec.select8.7 + %sub.2.7 = sub i32 %33, %30 + %110 = icmp slt i32 %sub.2.7, 0 + %neg.2.7 = sub nsw i32 0, %sub.2.7 + %111 = select i1 %110, i32 %neg.2.7, i32 %sub.2.7 + %cmp12.2.7 = icmp slt i32 %111, %spec.select8.1.7 + %112 = or i1 %cmp12.2.7, %109 + %spec.select8.2.7 = select i1 %cmp12.2.7, i32 %111, i32 %spec.select8.1.7 + %sub.3.7 = sub i32 %33, %31 + %113 = icmp slt i32 %sub.3.7, 0 + %neg.3.7 = sub nsw i32 0, %sub.3.7 + %114 = select i1 %113, i32 %neg.3.7, i32 %sub.3.7 + %cmp12.3.7 = icmp slt i32 %114, %spec.select8.2.7 + %115 = or i1 %cmp12.3.7, %112 + %spec.select.3.7 = select i1 %115, i32 7, i32 %spec.select.3.6 + %spec.select8.3.7 = select i1 %cmp12.3.7, i32 %114, i32 %spec.select8.2.7 + %k = getelementptr inbounds [366 x i32], [366 x i32]* @l, i64 0, i64 %indvars.iv + store i32 %spec.select.3.7, i32* %k, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + br label %for.cond +} + diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll index 889bba8..779d526 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll @@ -18,19 +18,6 @@ define i32 @foo(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7 ; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 undef, undef -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef -; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], undef -; CHECK-NEXT: [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32 undef -; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[COND19]], undef -; CHECK-NEXT: [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]], i32 undef -; CHECK-NEXT: [[CMP25:%.*]] = icmp ult i32 [[COND24]], undef -; CHECK-NEXT: [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]], i32 undef -; CHECK-NEXT: [[CMP30:%.*]] = icmp ult i32 [[COND29]], undef -; CHECK-NEXT: [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]], i32 undef -; CHECK-NEXT: [[CMP35:%.*]] = icmp ult i32 [[COND34]], undef -; CHECK-NEXT: [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]], i32 undef -; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[COND39]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]] @@ -41,7 +28,6 @@ define i32 @foo(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4 ; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 -; CHECK-NEXT: [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]], i32 undef ; CHECK-NEXT: ret i32 [[TMP11]] ; entry: @@ -92,19 +78,6 @@ define i32 @foo1(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7 ; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 undef, undef -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef -; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], undef -; CHECK-NEXT: [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32 undef -; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[COND19]], undef -; CHECK-NEXT: [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]], i32 undef -; CHECK-NEXT: [[CMP25:%.*]] = icmp ult i32 [[COND24]], undef -; CHECK-NEXT: [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]], i32 undef -; CHECK-NEXT: [[CMP30:%.*]] = icmp ult i32 [[COND29]], undef -; CHECK-NEXT: [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]], i32 undef -; CHECK-NEXT: [[CMP35:%.*]] = icmp ult i32 [[COND34]], undef -; CHECK-NEXT: [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]], i32 undef -; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[COND39]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]] @@ -115,7 +88,6 @@ define i32 @foo1(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a ; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 -; CHECK-NEXT: [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]], i32 undef ; CHECK-NEXT: ret i32 [[TMP11]] ; entry: @@ -170,19 +142,6 @@ define i32 @foo2(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7 ; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 undef, undef -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef -; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], undef -; CHECK-NEXT: [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32 undef -; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[COND19]], undef -; CHECK-NEXT: [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]], i32 undef -; CHECK-NEXT: [[CMP25:%.*]] = icmp ult i32 [[COND24]], undef -; CHECK-NEXT: [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]], i32 undef -; CHECK-NEXT: [[CMP30:%.*]] = icmp ult i32 [[COND29]], undef -; CHECK-NEXT: [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]], i32 undef -; CHECK-NEXT: [[CMP35:%.*]] = icmp ult i32 [[COND34]], undef -; CHECK-NEXT: [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]], i32 undef -; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[COND39]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]] @@ -193,7 +152,6 @@ define i32 @foo2(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a ; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 -; CHECK-NEXT: [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]], i32 undef ; CHECK-NEXT: ret i32 [[TMP11]] ; entry: -- 2.7.4