/// Set of potential dependent memory accesses.
typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
+ /// Type to keep track of the status of the dependence check. The order of
+ /// the elements is important and has to be from most permissive to least
+ /// permissive.
+ enum class VectorizationSafetyStatus {
+ // Can vectorize safely without RT checks. All dependences are known to be
+ // safe.
+ Safe,
+ // Cannot vectorize due to unsafe or unknown dependencies.
+ Unsafe,
+ };
+
/// Dependece between memory access instructions.
struct Dependence {
/// The type of the dependence.
Instruction *getDestination(const LoopAccessInfo &LAI) const;
/// Dependence types that don't prevent vectorization.
- static bool isSafeForVectorization(DepType Type);
+ static VectorizationSafetyStatus isSafeForVectorization(DepType Type);
/// Lexically forward dependence.
bool isForward() const;
MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L)
: PSE(PSE), InnermostLoop(L), AccessIdx(0), MaxSafeRegisterWidth(-1U),
- ShouldRetryWithRuntimeCheck(false), SafeForVectorization(true),
- RecordDependences(true) {}
+ ShouldRetryWithRuntimeCheck(false),
+ Status(VectorizationSafetyStatus::Safe), RecordDependences(true) {}
/// Register the location (instructions are given increasing numbers)
/// of a write access.
/// No memory dependence was encountered that would inhibit
/// vectorization.
- bool isSafeForVectorization() const { return SafeForVectorization; }
+ bool isSafeForVectorization() const {
+ return Status == VectorizationSafetyStatus::Safe;
+ }
/// The maximum number of bytes of a vector register we can vectorize
/// the accesses safely with.
/// vectorize this loop with runtime checks.
bool ShouldRetryWithRuntimeCheck;
- /// No memory dependence was encountered that would inhibit
- /// vectorization.
- bool SafeForVectorization;
+ /// Result of the dependence checks, indicating whether the checked
+ /// dependences are safe for vectorization or not.
+ VectorizationSafetyStatus Status;
//// True if Dependences reflects the dependences in the
//// loop. If false we exceeded MaxDependences and
/// \return false if we shouldn't vectorize at all or avoid larger
/// vectorization factors by limiting MaxSafeDepDistBytes.
bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
+
+ /// Updates the current safety status with \p S. We can go from Safe to
+ /// to Unsafe.
+ void mergeInStatus(VectorizationSafetyStatus S);
};
/// Holds information about the memory runtime legality checks to verify
return X == PtrSCEVB;
}
-bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
+MemoryDepChecker::VectorizationSafetyStatus
+MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
switch (Type) {
case NoDep:
case Forward:
case BackwardVectorizable:
- return true;
+ return VectorizationSafetyStatus::Safe;
case Unknown:
case ForwardButPreventsForwarding:
case Backward:
case BackwardVectorizableButPreventsForwarding:
- return false;
+ return VectorizationSafetyStatus::Unsafe;
}
llvm_unreachable("unexpected DepType!");
}
return false;
}
+void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
+ if (Status < S)
+ Status = S;
+}
+
/// Given a non-constant (unknown) dependence-distance \p Dist between two
/// memory accesses, that have the same stride whose absolute value is given
/// in \p Stride, and that have the same type size \p TypeByteSize,
Dependence::DepType Type =
isDependent(*A.first, A.second, *B.first, B.second, Strides);
- SafeForVectorization &= Dependence::isSafeForVectorization(Type);
+ mergeInStatus(Dependence::isSafeForVectorization(Type));
// Gather dependences unless we accumulated MaxDependences
// dependences. In that case return as soon as we find the first
<< "Too many dependences, stopped recording\n");
}
}
- if (!RecordDependences && !SafeForVectorization)
+ if (!RecordDependences && !isSafeForVectorization())
return false;
}
++OI;
}
LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
- return SafeForVectorization;
+ return isSafeForVectorization();
}
SmallVector<Instruction *, 4>
ret void
}
+; Check we do generate unnecessary runtime checks. They will always fail.
+
+; void test_runtime_check2(float *a, float b, unsigned offset, unsigned offset2, unsigned n, float *c) {
+; for (unsigned i = 1; i < n; i++) {
+; a[i+o1] += a[i+o2] + b;
+; c[i] = c[i-1] + b;
+; }
+; }
+;
+; CHECK-LABEL: test_runtime_check2
+; CHECK: <4 x float>
+define void @test_runtime_check2(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n, float* %c) {
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %ind.sum = add i64 %iv, %offset
+ %arr.idx = getelementptr inbounds float, float* %a, i64 %ind.sum
+ %l1 = load float, float* %arr.idx, align 4
+ %ind.sum2 = add i64 %iv, %offset2
+ %arr.idx2 = getelementptr inbounds float, float* %a, i64 %ind.sum2
+ %l2 = load float, float* %arr.idx2, align 4
+ %m = fmul fast float %b, %l2
+ %ad = fadd fast float %l1, %m
+ store float %ad, float* %arr.idx, align 4
+ %c.ind = add i64 %iv, -1
+ %c.idx = getelementptr inbounds float, float* %c, i64 %c.ind
+ %lc = load float, float* %c.idx, align 4
+ %vc = fadd float %lc, 1.0
+ %c.idx2 = getelementptr inbounds float, float* %c, i64 %iv
+ store float %vc, float* %c.idx2
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, %n
+ br i1 %exitcond, label %loopexit, label %for.body
+
+loopexit:
+ ret void
+}
+
; CHECK: !9 = !DILocation(line: 101, column: 1, scope: !{{.*}})
!llvm.module.flags = !{!0, !1}