auto *In = TE->getMainOp();
assert(In &&
(isa<ExtractValueInst>(In) || isa<ExtractElementInst>(In) ||
- isa<InsertElementInst>(In) ||
In->getNumOperands() == TE->getNumOperands()) &&
"Missed TreeEntry operands?");
(void)In; // fake use to avoid build failure when assertions disabled
Optional<BoUpSLP::ScheduleData *>
BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
const InstructionsState &S) {
- if (isa<PHINode>(S.OpValue))
+ if (isa<PHINode>(S.OpValue) || isa<InsertElementInst>(S.OpValue))
return nullptr;
// Initialize the instruction bundle.
void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
Value *OpValue) {
- if (isa<PHINode>(OpValue))
+ if (isa<PHINode>(OpValue) || isa<InsertElementInst>(OpValue))
return;
ScheduleData *Bundle = getScheduleData(OpValue);
return true;
Instruction *I = dyn_cast<Instruction>(V);
assert(I && "bundle member must be an instruction");
- assert(!isa<PHINode>(I) && "phi nodes don't need to be scheduled");
+ assert(!isa<PHINode>(I) && !isa<InsertElementInst>(I) &&
+ "phi nodes/insertelements don't need to be scheduled");
auto &&CheckSheduleForI = [this, &S](Instruction *I) -> bool {
ScheduleData *ISD = getScheduleData(I);
if (!ISD)
for (User *U : BundleMember->Inst->users()) {
if (isa<Instruction>(U)) {
ScheduleData *UseSD = getScheduleData(U);
- if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle) &&
- // Ignore inner deps for insertelement
- !(UseSD->FirstInBundle == SD &&
- isa<InsertElementInst>(BundleMember->Inst))) {
+ if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
BundleMember->Dependencies++;
ScheduleData *DestBundle = UseSD->FirstInBundle;
if (!DestBundle->IsScheduled)
for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd;
I = I->getNextNode()) {
BS->doForAllOpcodes(I, [this, &Idx, &NumToSchedule, BS](ScheduleData *SD) {
- assert(SD->isPartOfBundle() ==
- (getTreeEntry(SD->Inst) != nullptr) &&
+ assert((isa<InsertElementInst>(SD->Inst) ||
+ SD->isPartOfBundle() == (getTreeEntry(SD->Inst) != nullptr)) &&
"scheduler and vectorizer bundle mismatch");
SD->FirstInBundle->SchedulingPriority = Idx++;
if (SD->isSchedulingEntity()) {
; MINTREESIZE-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[A]], i32 0
; MINTREESIZE-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0
; MINTREESIZE-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP4]], i32 1
-; MINTREESIZE-NEXT: [[TMP11:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
-; MINTREESIZE-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP11]], float [[TMP3]], i32 1
-; MINTREESIZE-NEXT: [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0
-; MINTREESIZE-NEXT: [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP2]], i32 1
-; MINTREESIZE-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0
-; MINTREESIZE-NEXT: [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP1]], i32 1
-; MINTREESIZE-NEXT: [[TMP17:%.*]] = fadd <4 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT: ret <4 x float> [[TMP17]]
+; MINTREESIZE-NEXT: [[TMP11:%.*]] = fadd <4 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
+; MINTREESIZE-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP3]], i32 1
+; MINTREESIZE-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0
+; MINTREESIZE-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[TMP2]], i32 1
+; MINTREESIZE-NEXT: [[TMP16:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0
+; MINTREESIZE-NEXT: [[TMP17:%.*]] = insertelement <2 x float> [[TMP16]], float [[TMP1]], i32 1
+; MINTREESIZE-NEXT: ret <4 x float> [[TMP11]]
;
%a0 = extractelement <4 x float> %a, i32 0
%b0 = extractelement <4 x float> %b, i32 0
; MINTREESIZE-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[A]], i32 0
; MINTREESIZE-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0
; MINTREESIZE-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP4]], i32 1
-; MINTREESIZE-NEXT: [[TMP11:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
-; MINTREESIZE-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP11]], float [[TMP3]], i32 1
-; MINTREESIZE-NEXT: [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0
-; MINTREESIZE-NEXT: [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP2]], i32 1
-; MINTREESIZE-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0
-; MINTREESIZE-NEXT: [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP1]], i32 1
-; MINTREESIZE-NEXT: [[TMP17:%.*]] = fadd <4 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT: ret <4 x float> [[TMP17]]
+; MINTREESIZE-NEXT: [[TMP11:%.*]] = fadd <4 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
+; MINTREESIZE-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP3]], i32 1
+; MINTREESIZE-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0
+; MINTREESIZE-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[TMP2]], i32 1
+; MINTREESIZE-NEXT: [[TMP16:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0
+; MINTREESIZE-NEXT: [[TMP17:%.*]] = insertelement <2 x float> [[TMP16]], float [[TMP1]], i32 1
+; MINTREESIZE-NEXT: ret <4 x float> [[TMP11]]
;
%a0 = extractelement <4 x float> %a, i32 0
%b0 = extractelement <4 x float> %b, i32 0