From a36bc873a269dca0c5399d72bfdd42d3ddc72671 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 25 Aug 2021 07:27:03 -0700 Subject: [PATCH] [SLP]No need to schedule/check parent for extract{element/value} instruction. The instruction extractelement/extractvalue are not required to be scheduled since they only depend on the source vector/aggregate (with constant indices), smae applies to the parent basic block checks. Improves compile time and saves scheduling budget. Differential Revision: https://reviews.llvm.org/D108703 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 46 ++++++++++++++++------ .../SLPVectorizer/X86/remark_extract_broadcast.ll | 28 ++++--------- 2 files changed, 42 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1520a70..f68c677 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -200,12 +200,39 @@ static bool isValidElementType(Type *Ty) { !Ty->isPPC_FP128Ty(); } +/// \returns True if the value is a constant (but not globals/constant +/// expressions). +static bool isConstant(Value *V) { + return isa(V) && !isa(V) && !isa(V); +} + +/// Checks if \p V is one of vector-like instructions, i.e. undef, +/// insertelement/extractelement with constant indices for fixed vector type or +/// extractvalue instruction. +static bool isVectorLikeInstWithConstOps(Value *V) { + if (!isa(V) && + !isa(V)) + return false; + auto *I = dyn_cast(V); + if (!I || isa(I)) + return true; + if (!isa(I->getOperand(0)->getType())) + return false; + if (isa(I)) + return isConstant(I->getOperand(1)); + assert(isa(V) && "Expected only insertelement."); + return isConstant(I->getOperand(2)); +} + /// \returns true if all of the instructions in \p VL are in the same block or /// false otherwise. static bool allSameBlock(ArrayRef VL) { Instruction *I0 = dyn_cast(VL[0]); if (!I0) return false; + if (all_of(VL, isVectorLikeInstWithConstOps)) + return true; + BasicBlock *BB = I0->getParent(); for (int I = 1, E = VL.size(); I < E; I++) { auto *II = dyn_cast(VL[I]); @@ -218,12 +245,6 @@ static bool allSameBlock(ArrayRef VL) { return true; } -/// \returns True if the value is a constant (but not globals/constant -/// expressions). -static bool isConstant(Value *V) { - return isa(V) && !isa(V) && !isa(V); -} - /// \returns True if all of the values in \p VL are constants (but not /// globals/constant expressions). static bool allConstant(ArrayRef VL) { @@ -5932,7 +5953,9 @@ void BoUpSLP::optimizeGatherSequence() { Optional BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, const InstructionsState &S) { - if (isa(S.OpValue) || isa(S.OpValue)) + // No need to schedule PHIs, insertelement, extractelement and extractvalue + // instructions. + if (isa(S.OpValue) || isVectorLikeInstWithConstOps(S.OpValue)) return nullptr; // Initialize the instruction bundle. @@ -6028,7 +6051,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef VL, Value *OpValue) { - if (isa(OpValue) || isa(OpValue)) + if (isa(OpValue) || isVectorLikeInstWithConstOps(OpValue)) return; ScheduleData *Bundle = getScheduleData(OpValue); @@ -6068,8 +6091,9 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V, return true; Instruction *I = dyn_cast(V); assert(I && "bundle member must be an instruction"); - assert(!isa(I) && !isa(I) && - "phi nodes/insertelements don't need to be scheduled"); + assert(!isa(I) && !isVectorLikeInstWithConstOps(I) && + "phi nodes/insertelements/extractelements/extractvalues don't need to " + "be scheduled"); auto &&CheckSheduleForI = [this, &S](Instruction *I) -> bool { ScheduleData *ISD = getScheduleData(I); if (!ISD) @@ -6339,7 +6363,7 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) { for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd; I = I->getNextNode()) { BS->doForAllOpcodes(I, [this, &Idx, &NumToSchedule, BS](ScheduleData *SD) { - assert((isa(SD->Inst) || + assert((isVectorLikeInstWithConstOps(SD->Inst) || SD->isPartOfBundle() == (getTreeEntry(SD->Inst) != nullptr)) && "scheduler and vectorizer bundle mismatch"); SD->FirstInBundle->SchedulingPriority = Idx++; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll index 9e3a886..02b88fc 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll @@ -6,16 +6,8 @@ define void @fextr(i16* %ptr) { ; CHECK-LABEL: @fextr( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load <8 x i16>, <8 x i16>* undef, align 16 -; CHECK-NEXT: [[V0:%.*]] = extractelement <8 x i16> [[LD]], i32 0 ; CHECK-NEXT: br label [[T:%.*]] ; CHECK: t: -; CHECK-NEXT: [[V1:%.*]] = extractelement <8 x i16> [[LD]], i32 1 -; CHECK-NEXT: [[V2:%.*]] = extractelement <8 x i16> [[LD]], i32 2 -; CHECK-NEXT: [[V3:%.*]] = extractelement <8 x i16> [[LD]], i32 3 -; CHECK-NEXT: [[V4:%.*]] = extractelement <8 x i16> [[LD]], i32 4 -; CHECK-NEXT: [[V5:%.*]] = extractelement <8 x i16> [[LD]], i32 5 -; CHECK-NEXT: [[V6:%.*]] = extractelement <8 x i16> [[LD]], i32 6 -; CHECK-NEXT: [[V7:%.*]] = extractelement <8 x i16> [[LD]], i32 7 ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0 ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 1 ; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 2 @@ -24,18 +16,12 @@ define void @fextr(i16* %ptr) { ; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 5 ; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 6 ; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 7 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 [[V0]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[V1]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[V2]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[V3]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[V4]], i32 4 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[V5]], i32 5 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i16> [[TMP5]], i16 [[V6]], i32 6 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[V7]], i32 7 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i16> [[TMP7]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16* [[P0]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP8]], <8 x i16>* [[TMP9]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i16> [[LD]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* [[TMP3]], align 2 ; CHECK-NEXT: ret void ; ; YAML: Pass: slp-vectorizer @@ -43,7 +29,7 @@ define void @fextr(i16* %ptr) { ; YAML-NEXT: Function: fextr ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'Stores SLP vectorized with cost ' -; YAML-NEXT: - Cost: '-4' +; YAML-NEXT: - Cost: '-20' ; YAML-NEXT: - String: ' and with tree size ' ; YAML-NEXT: - TreeSize: '4' -- 2.7.4