From b3ced9852c7e6cc2dab61b6adb5c92812c99b00e Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 12 Mar 2021 07:39:53 -0800 Subject: [PATCH] [SLP]Fix crash on extending scheduling region. If SLP vectorizer tries to extend the scheduling region and runs out of the budget too early, but still extends the region to the new ending instructions (i.e., it was able to extend the region for the first instruction in the bundle, but not for the second), the compiler need to recalculate dependecies in full, just like if the extending was successfull. Without it, the schedule data chunks may end up with the wrong number of (unscheduled) dependecies and it may end up with the incorrect function, where the vectorized instruction does not dominate on the extractelement instruction. Differential Revision: https://reviews.llvm.org/D98531 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 80 +++++++++-------- .../SLPVectorizer/X86/crash_exceed_scheduling.ll | 100 +++++++++++++++++++++ 2 files changed, 144 insertions(+), 36 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 7c10abe..0ec8027 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5179,11 +5179,53 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, bool ReSchedule = false; LLVM_DEBUG(dbgs() << "SLP: bundle: " << *S.OpValue << "\n"); + auto &&TryScheduleBundle = [this, OldScheduleEnd, SLP](bool ReSchedule, + ScheduleData *Bundle) { + // The scheduling region got new instructions at the lower end (or it is a + // new region for the first bundle). This makes it necessary to + // recalculate all dependencies. + // It is seldom that this needs to be done a second time after adding the + // initial bundle to the region. + if (ScheduleEnd != OldScheduleEnd) { + for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) + doForAllOpcodes(I, [](ScheduleData *SD) { SD->clearDependencies(); }); + ReSchedule = true; + } + if (ReSchedule) { + resetSchedule(); + initialFillReadyList(ReadyInsts); + } + if (Bundle) { + LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle + << " in block " << BB->getName() << "\n"); + calculateDependencies(Bundle, /*InsertInReadyList=*/true, SLP); + } + + // Now try to schedule the new bundle or (if no bundle) just calculate + // dependencies. As soon as the bundle is "ready" it means that there are no + // cyclic dependencies and we can schedule it. Note that's important that we + // don't "schedule" the bundle yet (see cancelScheduling). + while (((!Bundle && ReSchedule) || (Bundle && !Bundle->isReady())) && + !ReadyInsts.empty()) { + ScheduleData *Picked = ReadyInsts.pop_back_val(); + if (Picked->isSchedulingEntity() && Picked->isReady()) + schedule(Picked, ReadyInsts); + } + }; + // Make sure that the scheduling region contains all // instructions of the bundle. for (Value *V : VL) { - if (!extendSchedulingRegion(V, S)) + if (!extendSchedulingRegion(V, S)) { + // If the scheduling region got new instructions at the lower end (or it + // is a new region for the first bundle). This makes it necessary to + // recalculate all dependencies. + // Otherwise the compiler may crash trying to incorrectly calculate + // dependencies and emit instruction in the wrong order at the actual + // scheduling. + TryScheduleBundle(/*ReSchedule=*/false, nullptr); return None; + } } for (Value *V : VL) { @@ -5212,42 +5254,8 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, BundleMember->FirstInBundle = Bundle; PrevInBundle = BundleMember; } - if (ScheduleEnd != OldScheduleEnd) { - // The scheduling region got new instructions at the lower end (or it is a - // new region for the first bundle). This makes it necessary to - // recalculate all dependencies. - // It is seldom that this needs to be done a second time after adding the - // initial bundle to the region. - for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) { - doForAllOpcodes(I, [](ScheduleData *SD) { - SD->clearDependencies(); - }); - } - ReSchedule = true; - } - if (ReSchedule) { - resetSchedule(); - initialFillReadyList(ReadyInsts); - } assert(Bundle && "Failed to find schedule bundle"); - - LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle << " in block " - << BB->getName() << "\n"); - - calculateDependencies(Bundle, true, SLP); - - // Now try to schedule the new bundle. As soon as the bundle is "ready" it - // means that there are no cyclic dependencies and we can schedule it. - // Note that's important that we don't "schedule" the bundle yet (see - // cancelScheduling). - while (!Bundle->isReady() && !ReadyInsts.empty()) { - - ScheduleData *pickedSD = ReadyInsts.pop_back_val(); - - if (pickedSD->isSchedulingEntity() && pickedSD->isReady()) { - schedule(pickedSD, ReadyInsts); - } - } + TryScheduleBundle(ReSchedule, Bundle); if (!Bundle->isReady()) { cancelScheduling(VL, S.OpValue); return None; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll new file mode 100644 index 0000000..299c2d3 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll @@ -0,0 +1,100 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -slp-vectorizer -slp-min-tree-size=2 -slp-threshold=-1000 -slp-max-look-ahead-depth=1 -slp-look-ahead-users-budget=1 -slp-schedule-budget=27 -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define void @exceed(double %0, double %1) { +; CHECK-LABEL: @exceed( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP0:%.*]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[TMP1:%.*]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = fdiv fast <2 x double> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 +; CHECK-NEXT: [[IX:%.*]] = fmul double [[TMP7]], undef +; CHECK-NEXT: [[IXX0:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX1:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX2:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX3:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX4:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX5:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IX1:%.*]] = fmul double [[TMP7]], undef +; CHECK-NEXT: [[IXX10:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX11:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX12:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX13:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX14:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX15:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX20:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX21:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX22:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 +; CHECK-NEXT: [[IX2:%.*]] = fmul double [[TMP8]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double undef, i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x double> , double [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP16:%.*]] = fmul fast <2 x double> [[TMP14]], [[TMP15]] +; CHECK-NEXT: switch i32 undef, label [[BB1:%.*]] [ +; CHECK-NEXT: i32 0, label [[BB2:%.*]] +; CHECK-NEXT: ] +; CHECK: bb1: +; CHECK-NEXT: br label [[LABEL:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x double> [[TMP16]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x double> poison, double [[TMP17]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP16]], i32 1 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x double> [[TMP18]], double [[TMP19]], i32 1 +; CHECK-NEXT: br label [[LABEL]] +; CHECK: label: +; CHECK-NEXT: [[TMP21:%.*]] = phi <2 x double> [ [[TMP12]], [[BB1]] ], [ [[TMP20]], [[BB2]] ] +; CHECK-NEXT: ret void +; +entry: + %i10 = fdiv fast double %0, %1 + %ix = fmul double %i10, undef + %ixx0 = fsub double undef, undef + %ixx1 = fsub double undef, undef + %ixx2 = fsub double undef, undef + %ixx3 = fsub double undef, undef + %ixx4 = fsub double undef, undef + %ixx5 = fsub double undef, undef + %ix1 = fmul double %i10, undef + %ixx10 = fsub double undef, undef + %ixx11 = fsub double undef, undef + %ixx12 = fsub double undef, undef + %ixx13 = fsub double undef, undef + %ixx14 = fsub double undef, undef + %ixx15 = fsub double undef, undef + %ixx20 = fsub double undef, undef + %ixx21 = fsub double undef, undef + %ixx22 = fsub double undef, undef + %i11 = fdiv fast double %0, %1 + %ix2 = fmul double %i11, %i11 + %tmp1 = fadd fast double %i11, %0 + %tmp2 = fadd fast double %0, %1 + %tmp5 = fmul fast double %tmp1, %tmp2 + %tmp15 = fadd fast double %i10, %1 + %tmp25 = fadd fast double %0, %1 + %tmp6 = fmul fast double %tmp15, %tmp25 + %tmp555 = fmul fast double %i10, undef + %ixx101 = fsub double undef, undef + %tmp666 = fmul fast double %1, undef + switch i32 undef, label %bb1 [ + i32 0, label %bb2 + ] + +bb1: ; preds = %entry + br label %label + +bb2: ; preds = %entry + br label %label + +label: ; preds = %bb2, %bb1 + %phi1 = phi double [ %tmp5, %bb1 ], [ %tmp555, %bb2 ] + %phi2 = phi double [ %tmp6, %bb1 ], [ %tmp666, %bb2 ] + ret void +} -- 2.7.4