From 53d523c9eb1fba3714d39802fef27eb75ed35baa Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 7 Aug 2017 14:51:52 +0000 Subject: [PATCH] Revert "[SLP] General improvements of SLP vectorization process." This reverts commit r310255. llvm-svn: 310257 --- .../llvm/Transforms/Vectorize/SLPVectorizer.h | 13 -- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 215 ++++++++++----------- .../SLPVectorizer/AArch64/gather-root.ll | 100 +++++----- .../Transforms/SLPVectorizer/X86/horizontal.ll | 64 +++--- .../X86/insert-element-build-vector.ll | 40 ++-- 5 files changed, 206 insertions(+), 226 deletions(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h index a4fa3f3..f7c8cd4 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -100,19 +100,6 @@ private: slpvectorizer::BoUpSLP &R, TargetTransformInfo *TTI); - /// Try to vectorize trees that start at insertvalue instructions. - bool vectorizeInsertValueInst(InsertValueInst *IVI, BasicBlock *BB, - slpvectorizer::BoUpSLP &R); - /// Try to vectorize trees that start at insertelement instructions. - bool vectorizeInsertElementInst(InsertElementInst *IEI, BasicBlock *BB, - slpvectorizer::BoUpSLP &R); - /// Try to vectorize trees that start at compare instructions. - bool vectorizeCmpInst(CmpInst *CI, BasicBlock *BB, slpvectorizer::BoUpSLP &R); - /// Tries to vectorize constructs started from CmpInst, InsertValueInst or - /// InsertElementInst instructions. - bool vectorizeSimpleInstructions(SmallVectorImpl &Instructions, - BasicBlock *BB, slpvectorizer::BoUpSLP &R); - /// \brief Scan the basic block and look for patterns that are likely to start /// a vectorization chain. bool vectorizeChainsInBlock(BasicBlock *BB, slpvectorizer::BoUpSLP &R); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 3398043..6e2865e 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4387,7 +4387,7 @@ bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) { if (!I) return false; - if (!isa(I) && !isa(I)) + if (!isa(I)) return false; Value *P = I->getParent(); @@ -4925,30 +4925,39 @@ private: /// %rb = insertelement <4 x float> %ra, float %s1, i32 1 /// %rc = insertelement <4 x float> %rb, float %s2, i32 2 /// %rd = insertelement <4 x float> %rc, float %s3, i32 3 -/// starting from the last insertelement instruction. /// /// Returns true if it matches /// -static bool findBuildVector(InsertElementInst *LastInsertElem, +static bool findBuildVector(InsertElementInst *FirstInsertElem, SmallVectorImpl &BuildVector, SmallVectorImpl &BuildVectorOpds) { - Value *V = nullptr; - do { - BuildVector.push_back(LastInsertElem); - BuildVectorOpds.push_back(LastInsertElem->getOperand(1)); - V = LastInsertElem->getOperand(0); - if (isa(V)) - break; - LastInsertElem = dyn_cast(V); - if (!LastInsertElem || !LastInsertElem->hasOneUse()) + if (!isa(FirstInsertElem->getOperand(0))) + return false; + + InsertElementInst *IE = FirstInsertElem; + while (true) { + BuildVector.push_back(IE); + BuildVectorOpds.push_back(IE->getOperand(1)); + + if (IE->use_empty()) return false; - } while (true); - std::reverse(BuildVector.begin(), BuildVector.end()); - std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end()); - return true; + + InsertElementInst *NextUse = dyn_cast(IE->user_back()); + if (!NextUse) + return true; + + // If this isn't the final use, make sure the next insertelement is the only + // use. It's OK if the final constructed vector is used multiple times + if (!IE->hasOneUse()) + return false; + + IE = NextUse; + } + + return false; } -/// \brief Like findBuildVector, but looks for construction of aggregate. +/// \brief Like findBuildVector, but looks backwards for construction of aggregate. /// /// \return true if it matches. static bool findBuildAggregate(InsertValueInst *IV, @@ -5133,64 +5142,6 @@ bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V, ExtraVectorization); } -bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI, - BasicBlock *BB, BoUpSLP &R) { - const DataLayout &DL = BB->getModule()->getDataLayout(); - if (!R.canMapToVector(IVI->getType(), DL)) - return false; - - SmallVector BuildVector; - SmallVector BuildVectorOpds; - if (!findBuildAggregate(IVI, BuildVector, BuildVectorOpds)) - return false; - - DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n"); - return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false); -} - -bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI, - BasicBlock *BB, BoUpSLP &R) { - SmallVector BuildVector; - SmallVector BuildVectorOpds; - if (!findBuildVector(IEI, BuildVector, BuildVectorOpds)) - return false; - - // Vectorize starting with the build vector operands ignoring the BuildVector - // instructions for the purpose of scheduling and user extraction. - return tryToVectorizeList(BuildVectorOpds, R, BuildVector); -} - -bool SLPVectorizerPass::vectorizeCmpInst(CmpInst *CI, BasicBlock *BB, - BoUpSLP &R) { - if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) - return true; - - bool OpsChanged = false; - for (int Idx = 0; Idx < 2; ++Idx) { - OpsChanged |= - vectorizeRootInstruction(nullptr, CI->getOperand(Idx), BB, R, TTI); - } - return OpsChanged; -} - -bool SLPVectorizerPass::vectorizeSimpleInstructions( - SmallVectorImpl &Instructions, BasicBlock *BB, BoUpSLP &R) { - bool OpsChanged = false; - for (auto &VH : reverse(Instructions)) { - auto *I = dyn_cast_or_null(VH); - if (!I) - continue; - if (auto *LastInsertValue = dyn_cast(I)) - OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R); - else if (auto *LastInsertElem = dyn_cast(I)) - OpsChanged |= vectorizeInsertElementInst(LastInsertElem, BB, R); - else if (auto *CI = dyn_cast(I)) - OpsChanged |= vectorizeCmpInst(CI, BB, R); - } - Instructions.clear(); - return OpsChanged; -} - bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { bool Changed = false; SmallVector Incoming; @@ -5250,21 +5201,10 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { VisitedInstrs.clear(); - SmallVector PostProcessInstructions; - SmallDenseSet KeyNodes; for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) { // We may go through BB multiple times so skip the one we have checked. - if (!VisitedInstrs.insert(&*it).second) { - if (it->use_empty() && KeyNodes.count(&*it) > 0 && - vectorizeSimpleInstructions(PostProcessInstructions, BB, R)) { - // We would like to start over since some instructions are deleted - // and the iterator may become invalid value. - Changed = true; - it = BB->begin(); - e = BB->end(); - } + if (!VisitedInstrs.insert(&*it).second) continue; - } if (isa(it)) continue; @@ -5286,40 +5226,97 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { continue; } - // Ran into an instruction without users, like terminator, or function call - // with ignored return value, store. Ignore unused instructions (basing on - // instruction type, except for CallInst and InvokeInst). - if (it->use_empty() && (it->getType()->isVoidTy() || isa(it) || - isa(it))) { - KeyNodes.insert(&*it); - bool OpsChanged = false; - if (ShouldStartVectorizeHorAtStore || !isa(it)) { - for (auto *V : it->operand_values()) { - // Try to match and vectorize a horizontal reduction. - OpsChanged |= vectorizeRootInstruction(nullptr, V, BB, R, TTI); + if (ShouldStartVectorizeHorAtStore) { + if (StoreInst *SI = dyn_cast(it)) { + // Try to match and vectorize a horizontal reduction. + if (vectorizeRootInstruction(nullptr, SI->getValueOperand(), BB, R, + TTI)) { + Changed = true; + it = BB->begin(); + e = BB->end(); + continue; } } - // Start vectorization of post-process list of instructions from the - // top-tree instructions to try to vectorize as many instructions as - // possible. - OpsChanged |= vectorizeSimpleInstructions(PostProcessInstructions, BB, R); - if (OpsChanged) { + } + + // Try to vectorize horizontal reductions feeding into a return. + if (ReturnInst *RI = dyn_cast(it)) { + if (RI->getNumOperands() != 0) { + // Try to match and vectorize a horizontal reduction. + if (vectorizeRootInstruction(nullptr, RI->getOperand(0), BB, R, TTI)) { + Changed = true; + it = BB->begin(); + e = BB->end(); + continue; + } + } + } + + // Try to vectorize trees that start at compare instructions. + if (CmpInst *CI = dyn_cast(it)) { + if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) { + Changed = true; // We would like to start over since some instructions are deleted // and the iterator may become invalid value. - Changed = true; it = BB->begin(); e = BB->end(); continue; } + + for (int I = 0; I < 2; ++I) { + if (vectorizeRootInstruction(nullptr, CI->getOperand(I), BB, R, TTI)) { + Changed = true; + // We would like to start over since some instructions are deleted + // and the iterator may become invalid value. + it = BB->begin(); + e = BB->end(); + break; + } + } + continue; + } + + // Try to vectorize trees that start at insertelement instructions. + if (InsertElementInst *FirstInsertElem = dyn_cast(it)) { + SmallVector BuildVector; + SmallVector BuildVectorOpds; + if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds)) + continue; + + // Vectorize starting with the build vector operands ignoring the + // BuildVector instructions for the purpose of scheduling and user + // extraction. + if (tryToVectorizeList(BuildVectorOpds, R, BuildVector)) { + Changed = true; + it = BB->begin(); + e = BB->end(); + } + + continue; } - if (isa(it) || isa(it) || - isa(it)) - PostProcessInstructions.push_back(&*it); + // Try to vectorize trees that start at insertvalue instructions feeding into + // a store. + if (StoreInst *SI = dyn_cast(it)) { + if (InsertValueInst *LastInsertValue = dyn_cast(SI->getValueOperand())) { + const DataLayout &DL = BB->getModule()->getDataLayout(); + if (R.canMapToVector(SI->getValueOperand()->getType(), DL)) { + SmallVector BuildVector; + SmallVector BuildVectorOpds; + if (!findBuildAggregate(LastInsertValue, BuildVector, BuildVectorOpds)) + continue; + DEBUG(dbgs() << "SLP: store of array mappable to vector: " << *SI << "\n"); + if (tryToVectorizeList(BuildVectorOpds, R, BuildVector, false)) { + Changed = true; + it = BB->begin(); + e = BB->end(); + } + continue; + } + } + } } - assert(PostProcessInstructions.empty() && - "Not all instruction were processed."); return Changed; } diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll index dda8a0c..0ea08b7 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll @@ -31,8 +31,10 @@ define void @PR28330(i32 %n) { ; ; GATHER-LABEL: @PR28330( ; GATHER-NEXT: entry: -; GATHER-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <2 x i8>*), align 1 -; GATHER-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[TMP0]], zeroinitializer +; GATHER-NEXT: [[TMP0:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1 +; GATHER-NEXT: [[TMP1:%.*]] = icmp eq i8 [[TMP0]], 0 +; GATHER-NEXT: [[TMP2:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2 +; GATHER-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], 0 ; GATHER-NEXT: [[TMP4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1 ; GATHER-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0 ; GATHER-NEXT: [[TMP6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4 @@ -48,11 +50,10 @@ define void @PR28330(i32 %n) { ; GATHER-NEXT: br label [[FOR_BODY:%.*]] ; GATHER: for.body: ; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; GATHER-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> , <2 x i32> -; GATHER-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0 -; GATHER-NEXT: [[TMP20:%.*]] = add i32 [[TMP17]], [[TMP3]] -; GATHER-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 -; GATHER-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP4]] +; GATHER-NEXT: [[TMP19:%.*]] = select i1 [[TMP1]], i32 -720, i32 -80 +; GATHER-NEXT: [[TMP20:%.*]] = add i32 [[TMP17]], [[TMP19]] +; GATHER-NEXT: [[TMP21:%.*]] = select i1 [[TMP3]], i32 -720, i32 -80 +; GATHER-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP21]] ; GATHER-NEXT: [[TMP23:%.*]] = select i1 [[TMP5]], i32 -720, i32 -80 ; GATHER-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]] ; GATHER-NEXT: [[TMP25:%.*]] = select i1 [[TMP7]], i32 -720, i32 -80 @@ -64,16 +65,16 @@ define void @PR28330(i32 %n) { ; GATHER-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80 ; GATHER-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], [[TMP31]] ; GATHER-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> undef, i32 [[TMP3]], i32 0 -; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP4]], i32 1 -; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP23]], i32 2 -; GATHER-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP25]], i32 3 -; GATHER-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP27]], i32 4 -; GATHER-NEXT: [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[TMP29]], i32 5 -; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[TMP31]], i32 6 -; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP33]], i32 7 -; GATHER-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP12]]) -; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP13]], [[TMP17]] +; GATHER-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0 +; GATHER-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 [[TMP21]], i32 1 +; GATHER-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[TMP23]], i32 2 +; GATHER-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP25]], i32 3 +; GATHER-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP27]], i32 4 +; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP29]], i32 5 +; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP31]], i32 6 +; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP33]], i32 7 +; GATHER-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP7]]) +; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP8]], [[TMP17]] ; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], [[TMP33]] ; GATHER-NEXT: br label [[FOR_BODY]] ; @@ -179,8 +180,10 @@ define void @PR32038(i32 %n) { ; ; GATHER-LABEL: @PR32038( ; GATHER-NEXT: entry: -; GATHER-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <2 x i8>*), align 1 -; GATHER-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[TMP0]], zeroinitializer +; GATHER-NEXT: [[TMP0:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1 +; GATHER-NEXT: [[TMP1:%.*]] = icmp eq i8 [[TMP0]], 0 +; GATHER-NEXT: [[TMP2:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2 +; GATHER-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], 0 ; GATHER-NEXT: [[TMP4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1 ; GATHER-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0 ; GATHER-NEXT: [[TMP6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4 @@ -196,11 +199,10 @@ define void @PR32038(i32 %n) { ; GATHER-NEXT: br label [[FOR_BODY:%.*]] ; GATHER: for.body: ; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; GATHER-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> , <2 x i32> -; GATHER-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0 -; GATHER-NEXT: [[TMP20:%.*]] = add i32 -5, [[TMP3]] -; GATHER-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 -; GATHER-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP4]] +; GATHER-NEXT: [[TMP19:%.*]] = select i1 [[TMP1]], i32 -720, i32 -80 +; GATHER-NEXT: [[TMP20:%.*]] = add i32 -5, [[TMP19]] +; GATHER-NEXT: [[TMP21:%.*]] = select i1 [[TMP3]], i32 -720, i32 -80 +; GATHER-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP21]] ; GATHER-NEXT: [[TMP23:%.*]] = select i1 [[TMP5]], i32 -720, i32 -80 ; GATHER-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]] ; GATHER-NEXT: [[TMP25:%.*]] = select i1 [[TMP7]], i32 -720, i32 -80 @@ -212,27 +214,29 @@ define void @PR32038(i32 %n) { ; GATHER-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80 ; GATHER-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], [[TMP31]] ; GATHER-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> undef, i32 [[TMP3]], i32 0 -; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP4]], i32 1 -; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP23]], i32 2 -; GATHER-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP25]], i32 3 -; GATHER-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP27]], i32 4 -; GATHER-NEXT: [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[TMP29]], i32 5 -; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[TMP31]], i32 6 -; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP33]], i32 7 -; GATHER-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP12]]) -; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP13]], -5 +; GATHER-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0 +; GATHER-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 [[TMP21]], i32 1 +; GATHER-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[TMP23]], i32 2 +; GATHER-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP25]], i32 3 +; GATHER-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP27]], i32 4 +; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP29]], i32 5 +; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP31]], i32 6 +; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP33]], i32 7 +; GATHER-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP7]]) +; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP8]], -5 ; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], [[TMP33]] ; GATHER-NEXT: br label [[FOR_BODY]] ; ; MAX-COST-LABEL: @PR32038( ; MAX-COST-NEXT: entry: -; MAX-COST-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <2 x i8>*), align 1 -; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[TMP0]], zeroinitializer +; MAX-COST-NEXT: [[TMP0:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1 +; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq i8 [[TMP0]], 0 +; MAX-COST-NEXT: [[TMP2:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2 +; MAX-COST-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], 0 ; MAX-COST-NEXT: [[TMP4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1 -; MAX-COST-NEXT: [[TMPP5:%.*]] = icmp eq i8 [[TMP4]], 0 +; MAX-COST-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0 ; MAX-COST-NEXT: [[TMP6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4 -; MAX-COST-NEXT: [[TMPP7:%.*]] = icmp eq i8 [[TMP6]], 0 +; MAX-COST-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP6]], 0 ; MAX-COST-NEXT: [[TMP8:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1 ; MAX-COST-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0 ; MAX-COST-NEXT: [[TMP10:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2 @@ -241,16 +245,14 @@ define void @PR32038(i32 %n) { ; MAX-COST-NEXT: [[TMP13:%.*]] = icmp eq i8 [[TMP12]], 0 ; MAX-COST-NEXT: [[TMP14:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8 ; MAX-COST-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0 +; MAX-COST-NEXT: [[TMP0:%.*]] = insertelement <4 x i1> undef, i1 [[TMP1]], i32 0 +; MAX-COST-NEXT: [[TMP1:%.*]] = insertelement <4 x i1> [[TMP0]], i1 [[TMP3]], i32 1 +; MAX-COST-NEXT: [[TMP2:%.*]] = insertelement <4 x i1> [[TMP1]], i1 [[TMP5]], i32 2 +; MAX-COST-NEXT: [[TMP3:%.*]] = insertelement <4 x i1> [[TMP2]], i1 [[TMP7]], i32 3 ; MAX-COST-NEXT: br label [[FOR_BODY:%.*]] ; MAX-COST: for.body: ; MAX-COST-NEXT: [[TMP17:%.*]] = phi i32 [ [[TMP34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; MAX-COST-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 -; MAX-COST-NEXT: [[TMP3:%.*]] = insertelement <4 x i1> undef, i1 [[TMP2]], i32 0 -; MAX-COST-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 -; MAX-COST-NEXT: [[TMP5:%.*]] = insertelement <4 x i1> [[TMP3]], i1 [[TMP4]], i32 1 -; MAX-COST-NEXT: [[TMP6:%.*]] = insertelement <4 x i1> [[TMP5]], i1 [[TMPP5]], i32 2 -; MAX-COST-NEXT: [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1 [[TMPP7]], i32 3 -; MAX-COST-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> , <4 x i32> +; MAX-COST-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> , <4 x i32> ; MAX-COST-NEXT: [[TMP20:%.*]] = add i32 -5, undef ; MAX-COST-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], undef ; MAX-COST-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], undef @@ -258,10 +260,10 @@ define void @PR32038(i32 %n) { ; MAX-COST-NEXT: [[TMP27:%.*]] = select i1 [[TMP9]], i32 -720, i32 -80 ; MAX-COST-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], [[TMP27]] ; MAX-COST-NEXT: [[TMP29:%.*]] = select i1 [[TMP11]], i32 -720, i32 -80 -; MAX-COST-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP8]]) -; MAX-COST-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP27]] -; MAX-COST-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[TMP29]] -; MAX-COST-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP11]], -5 +; MAX-COST-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP4]]) +; MAX-COST-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], [[TMP27]] +; MAX-COST-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP29]] +; MAX-COST-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP7]], -5 ; MAX-COST-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP29]] ; MAX-COST-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80 ; MAX-COST-NEXT: [[TMP32:%.*]] = add i32 [[BIN_EXTRA]], [[TMP31]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll index dd08ef28..080f850 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll @@ -817,22 +817,22 @@ declare i32 @foobar(i32) define void @i32_red_call(i32 %val) { ; CHECK-LABEL: @i32_red_call( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef -; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]] -; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]] -; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]] -; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]] -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> -; CHECK-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]] -; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> -; CHECK-NEXT: [[BIN_RDX2:%.*]] = add nsw <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> -; CHECK-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]] -; CHECK-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]]) +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 +; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 +; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16 +; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP4]], [[ADD_2]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4 +; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP5]], [[ADD_3]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8 +; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP6]], [[ADD_4]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4 +; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP7]], [[ADD_5]] +; CHECK-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[ADD_6]]) ; CHECK-NEXT: ret void ; entry: @@ -858,22 +858,22 @@ entry: define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_v0 { ; CHECK-LABEL: @i32_red_invoke( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef -; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]] -; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]] -; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]] -; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]] -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> -; CHECK-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]] -; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> -; CHECK-NEXT: [[BIN_RDX2:%.*]] = add nsw <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> -; CHECK-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]] -; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]]) +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 +; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 +; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16 +; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP4]], [[ADD_2]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4 +; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP5]], [[ADD_3]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8 +; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP6]], [[ADD_4]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4 +; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP7]], [[ADD_5]] +; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[ADD_6]]) ; CHECK-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] ; CHECK: exception: ; CHECK-NEXT: [[CLEANUP:%.*]] = landingpad i8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll index 46386e8..9e4f503 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -303,30 +303,24 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x ; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1 ; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2 ; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1 +; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C2]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C3]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> undef, i32 [[C2]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[C3]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <2 x i32> [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> undef, float [[A0]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[A1]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[B0]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[B1]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[A2]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[A3]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> undef, float [[B2]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[B3]], i32 1 -; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP6]], <2 x float> [[TMP13]], <2 x float> [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[TMP11]], i32 0 -; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP17]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[TMP11]], i32 1 -; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP18]], i32 1 -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[TMP16]], i32 0 -; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> undef, float [[TMP19]], i32 2 -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP16]], i32 1 -; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP20]], i32 3 +; CHECK-NEXT: [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]] +; CHECK-NEXT: [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> undef, float [[A2]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A3]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[B2]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B3]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]] +; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[S0]], i32 0 +; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[S1]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0 +; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> undef, float [[TMP9]], i32 2 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1 +; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP10]], i32 3 ; CHECK-NEXT: ret <4 x float> [[RD]] ; ; ZEROTHRESH-LABEL: @simple_select_no_users( -- 2.7.4