}
bool LoopVectorizationLegality::canVectorize() {
+ // Store the result and return it at the end instead of exiting early, in case
+ // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
+ bool Result = true;
// We must have a loop in canonical form. Loops with indirectbr in them cannot
// be canonicalized.
if (!TheLoop->getLoopPreheader()) {
ORE->emit(createMissedAnalysis("CFGNotUnderstood")
<< "loop control flow is not understood by vectorizer");
- return false;
+ if (ORE->allowExtraAnalysis())
+ Result = false;
+ else
+ return false;
}
// FIXME: The code is currently dead, since the loop gets sent to
if (!TheLoop->empty()) {
ORE->emit(createMissedAnalysis("NotInnermostLoop")
<< "loop is not the innermost loop");
- return false;
+ if (ORE->allowExtraAnalysis())
+ Result = false;
+ else
+ return false;
}
// We must have a single backedge.
if (TheLoop->getNumBackEdges() != 1) {
ORE->emit(createMissedAnalysis("CFGNotUnderstood")
<< "loop control flow is not understood by vectorizer");
- return false;
+ if (ORE->allowExtraAnalysis())
+ Result = false;
+ else
+ return false;
}
// We must have a single exiting block.
if (!TheLoop->getExitingBlock()) {
ORE->emit(createMissedAnalysis("CFGNotUnderstood")
<< "loop control flow is not understood by vectorizer");
- return false;
+ if (ORE->allowExtraAnalysis())
+ Result = false;
+ else
+ return false;
}
// We only handle bottom-tested loops, i.e. loop in which the condition is
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
ORE->emit(createMissedAnalysis("CFGNotUnderstood")
<< "loop control flow is not understood by vectorizer");
- return false;
+ if (ORE->allowExtraAnalysis())
+ Result = false;
+ else
+ return false;
}
// We need to have a loop header.
unsigned NumBlocks = TheLoop->getNumBlocks();
if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
- return false;
- }
-
- // ScalarEvolution needs to be able to find the exit count.
- const SCEV *ExitCount = PSE.getBackedgeTakenCount();
- if (ExitCount == PSE.getSE()->getCouldNotCompute()) {
- ORE->emit(createMissedAnalysis("CantComputeNumberOfIterations")
- << "could not determine number of loop iterations");
- DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
- return false;
+ if (ORE->allowExtraAnalysis())
+ Result = false;
+ else
+ return false;
}
// Check if we can vectorize the instructions and CFG in this loop.
if (!canVectorizeInstrs()) {
DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
- return false;
+ if (ORE->allowExtraAnalysis())
+ Result = false;
+ else
+ return false;
}
// Go over each instruction and look at memory deps.
if (!canVectorizeMemory()) {
DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
- return false;
+ if (ORE->allowExtraAnalysis())
+ Result = false;
+ else
+ return false;
}
DEBUG(dbgs() << "LV: We can vectorize this loop"
<< "Too many SCEV assumptions need to be made and checked "
<< "at runtime");
DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n");
- return false;
+ if (ORE->allowExtraAnalysis())
+ Result = false;
+ else
+ return false;
}
- // Okay! We can vectorize. At this point we don't have any other mem analysis
+ // Okay! We've done all the tests. If any have failed, return false. Otherwise
+ // we can vectorize, and at this point we don't have any other mem analysis
// which may limit our maximum vectorization factor, so just return true with
// no restrictions.
- return true;
+ return Result;
}
static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
; break;
; }
; }
+; File, line, and column should match those specified in the metadata
+; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations
+; CHECK: remark: source.cpp:4:5: loop not vectorized
; void test_disabled(int *A, int Length) {
; #pragma clang loop vectorize(disable) interleave(disable)
; for (int i = 0; i < Length; i++)
; A[i] = i;
; }
+; CHECK: remark: source.cpp:13:5: loop not vectorized: vectorization and interleaving are explicitly disabled, or vectorize width and interleave count are both set to 1
; void test_array_bounds(int *A, int *B, int Length) {
; #pragma clang loop vectorize(enable)
; for (int i = 0; i < Length; i++)
; A[i] = A[B[i]];
; }
-
-; File, line, and column should match those specified in the metadata
-; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations
-; CHECK: remark: source.cpp:4:5: loop not vectorized
-; CHECK: remark: source.cpp:13:5: loop not vectorized: vectorization and interleaving are explicitly disabled, or vectorize width and interleave count are both set to 1
; CHECK: remark: source.cpp:19:5: loop not vectorized: cannot identify array bounds
; CHECK: remark: source.cpp:19:5: loop not vectorized
; CHECK: warning: source.cpp:19:5: loop not vectorized: failed explicitly specified loop vectorization
-; CHECK: _Z4testPii
-; CHECK-NOT: x i32>
-; CHECK: ret
-
-; CHECK: _Z13test_disabledPii
-; CHECK-NOT: x i32>
-; CHECK: ret
-
-; CHECK: _Z17test_array_boundsPiS_i
-; CHECK-NOT: x i32>
-; CHECK: ret
+; int foo();
+; void test_multiple_failures(int *A) {
+; int k = 0;
+; #pragma clang loop vectorize(enable) interleave(enable)
+; for (int i = 0; i < 1000; i+=A[i]) {
+; if (A[i])
+; k = foo();
+; }
+; return k;
+; }
+; CHECK: remark: source.cpp:29:7: loop not vectorized: control flow cannot be substituted for a select
+; CHECK: remark: source.cpp:27:3: loop not vectorized
; YAML: --- !Analysis
; YAML-NEXT: Pass: loop-vectorize
; YAML-NEXT: - String: 'loop not vectorized: '
; YAML-NEXT: - String: failed explicitly specified loop vectorization
; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass: loop-vectorize
+; YAML-NEXT: Name: NoCFGForSelect
+; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 29, Column: 7 }
+; YAML-NEXT: Function: test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT: - String: 'loop not vectorized: '
+; YAML-NEXT: - String: control flow cannot be substituted for a select
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass: loop-vectorize
+; YAML-NEXT: Name: NonReductionValueUsedOutsideLoop
+; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 }
+; YAML-NEXT: Function: test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT: - String: 'loop not vectorized: '
+; YAML-NEXT: - String: value that could not be identified as reduction is used outside the loop
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass: loop-vectorize
+; YAML-NEXT: Name: CantComputeNumberOfIterations
+; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 }
+; YAML-NEXT: Function: test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT: - String: 'loop not vectorized: '
+; YAML-NEXT: - String: could not determine number of loop iterations
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Missed
+; YAML-NEXT: Pass: loop-vectorize
+; YAML-NEXT: Name: MissedDetails
+; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 }
+; YAML-NEXT: Function: test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT: - String: loop not vectorized
+; YAML-NEXT: ...
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
ret void, !dbg !24
}
+; CHECK: _Z4testPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
; Function Attrs: nounwind optsize ssp uwtable
define void @_Z13test_disabledPii(i32* nocapture %A, i32 %Length) #0 !dbg !7 {
entry:
ret void, !dbg !31
}
+; CHECK: _Z13test_disabledPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
; Function Attrs: nounwind optsize ssp uwtable
define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) #0 !dbg !8 {
entry:
ret void, !dbg !36
}
+; CHECK: _Z17test_array_boundsPiS_i
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+; Function Attrs: nounwind uwtable
+define i32 @test_multiple_failures(i32* nocapture readonly %A) #0 !dbg !46 {
+entry:
+ br label %for.body, !dbg !38
+
+for.body: ; preds = %entry, %for.inc
+ %i.09 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
+ %k.09 = phi i32 [ 0, %entry ], [ %k.1, %for.inc ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.09, !dbg !40
+ %0 = load i32, i32* %arrayidx, align 4, !dbg !40
+ %tobool = icmp eq i32 %0, 0, !dbg !40
+ br i1 %tobool, label %for.inc, label %if.then, !dbg !40
+
+if.then: ; preds = %for.body
+ %call = tail call i32 (...) @foo(), !dbg !41
+ %.pre = load i32, i32* %arrayidx, align 4
+ br label %for.inc, !dbg !42
+
+for.inc: ; preds = %for.body, %if.then
+ %1 = phi i32 [ %.pre, %if.then ], [ 0, %for.body ], !dbg !43
+ %k.1 = phi i32 [ %call, %if.then ], [ %k.09, %for.body ]
+ %add = add nsw i32 %1, %i.09, !dbg !44
+ %cmp = icmp slt i32 %add, 1000, !dbg !45
+ br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !38
+
+for.cond.cleanup: ; preds = %for.inc
+ ret i32 %k.1, !dbg !39
+}
+
+declare i32 @foo(...)
+
+; CHECK: test_multiple_failure
+; CHECK-NOT: x i32>
+; CHECK: ret
+
attributes #0 = { nounwind }
!llvm.dbg.cu = !{!0}
!34 = !{!34, !15}
!35 = !DILocation(line: 19, column: 5, scope: !33)
!36 = !DILocation(line: 20, column: 1, scope: !8)
+!37 = distinct !DILexicalBlock(line: 24, column: 3, file: !1, scope: !46)
+!38 = !DILocation(line: 27, column: 3, scope: !37)
+!39 = !DILocation(line: 31, column: 3, scope: !37)
+!40 = !DILocation(line: 28, column: 9, scope: !37)
+!41 = !DILocation(line: 29, column: 11, scope: !37)
+!42 = !DILocation(line: 29, column: 7, scope: !37)
+!43 = !DILocation(line: 27, column: 32, scope: !37)
+!44 = !DILocation(line: 27, column: 30, scope: !37)
+!45 = !DILocation(line: 27, column: 21, scope: !37)
+!46 = distinct !DISubprogram(name: "test_multiple_failures", line: 26, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 26, file: !1, scope: !5, type: !6, variables: !2)