"Maximum number of iterations we keep dismantling potential values."),
cl::init(64));
-static cl::opt<unsigned> MaxInterferingAccesses(
- "attributor-max-interfering-accesses", cl::Hidden,
- cl::desc("Maximum number of interfering accesses to "
- "check before assuming all might interfere."),
- cl::init(6));
-
STATISTIC(NumAAs, "Number of abstract attributes created");
// Some helper macros to deal with statistics tracking.
const bool FindInterferingReads = I.mayWriteToMemory();
const bool UseDominanceReasoning =
FindInterferingWrites && NoRecurseAA.isKnownNoRecurse();
- const bool CanUseCFGResoning = CanIgnoreThreading(I);
const DominatorTree *DT =
InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(Scope);
(!FindInterferingReads || !Acc.isRead()))
return true;
- bool Dominates = DT && Exact && Acc.isMustAccess() &&
- (Acc.getLocalInst()->getFunction() == &Scope) &&
+ bool Dominates = FindInterferingWrites && DT && Exact &&
+ Acc.isMustAccess() &&
+ (Acc.getRemoteInst()->getFunction() == &Scope) &&
DT->dominates(Acc.getRemoteInst(), &I);
- if (FindInterferingWrites && Dominates)
- HasBeenWrittenTo = true;
+ if (Dominates)
+ DominatingWrites.insert(&Acc);
// Track if all interesting accesses are in the same `nosync` function as
// the given instruction.
AllInSameNoSyncFn &= Acc.getRemoteInst()->getFunction() == &Scope;
- // For now we only filter accesses based on CFG reasoning which does not
- // work yet if we have threading effects, or the access is complicated.
- if (CanUseCFGResoning && Dominates && UseDominanceReasoning &&
- IsSameThreadAsInst(Acc))
- DominatingWrites.insert(&Acc);
-
InterferingAccesses.push_back({&Acc, Exact});
return true;
};
if (!State::forallInterferingAccesses(I, AccessCB, Range))
return false;
- // Helper to determine if we can skip a specific write access. This is in
- // the worst case quadratic as we are looking for another write that will
- // hide the effect of this one.
+ HasBeenWrittenTo = !DominatingWrites.empty();
+
+ // Dominating writes form a chain, find the least/lowest member.
+ Instruction *LeastDominatingWriteInst = nullptr;
+ for (const Access *Acc : DominatingWrites) {
+ if (!LeastDominatingWriteInst) {
+ LeastDominatingWriteInst = Acc->getRemoteInst();
+ } else if (DT->dominates(LeastDominatingWriteInst,
+ Acc->getRemoteInst())) {
+ LeastDominatingWriteInst = Acc->getRemoteInst();
+ }
+ }
+
+ // Helper to determine if we can skip a specific write access.
auto CanSkipAccess = [&](const Access &Acc, bool Exact) {
if (!IsSameThreadAsInst(Acc))
return false;
- if ((!Acc.isWriteOrAssumption() ||
- !AA::isPotentiallyReachable(A, *Acc.getRemoteInst(), I, QueryingAA,
- &ExclusionSet, IsLiveInCalleeCB)) &&
- (!Acc.isRead() ||
- !AA::isPotentiallyReachable(A, I, *Acc.getRemoteInst(), QueryingAA,
- &ExclusionSet, IsLiveInCalleeCB)))
+
+ // Check read (RAW) dependences and write (WAR) dependences as necessary.
+ // If we successfully excluded all effects we are interested in, the
+ // access can be skipped.
+ bool ReadChecked = !FindInterferingReads;
+ bool WriteChecked = !FindInterferingWrites;
+
+ // If the instruction cannot reach the access, the former does not
+ // interfere with what the access reads.
+ if (!ReadChecked) {
+ if (!AA::isPotentiallyReachable(A, I, *Acc.getRemoteInst(), QueryingAA,
+ &ExclusionSet, IsLiveInCalleeCB))
+ ReadChecked = true;
+ }
+ // If the instruction cannot be reach from the access, the latter does not
+ // interfere with what the instruction reads.
+ if (!WriteChecked) {
+ if (!AA::isPotentiallyReachable(A, *Acc.getRemoteInst(), I, QueryingAA,
+ &ExclusionSet, IsLiveInCalleeCB))
+ WriteChecked = true;
+ }
+
+ // If we still might be affected by the write of the access but there are
+ // dominating writes in the function of the instruction
+ // (HasBeenWrittenTo), we can try to reason that the access is overwritten
+ // by them. This would have happend above if they are all in the same
+ // function, so we only check the inter-procedural case. Effectively, we
+ // want to show that there is no call after the dominting write that might
+ // reach the access, and when it returns reach the instruction with the
+ // updated value. To this end, we iterate all call sites, check if they
+ // might reach the instruction without going through another access
+ // (ExclusionSet) and at the same time might reach the access. However,
+ // that is all part of AAInterFnReachability.
+ if (!WriteChecked && HasBeenWrittenTo &&
+ Acc.getRemoteInst()->getFunction() != &Scope) {
+
+ const auto &FnReachabilityAA = A.getAAFor<AAInterFnReachability>(
+ QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL);
+
+ // Without going backwards in the call tree, can we reach the access
+ // from the least dominating write. Do not allow to pass the instruction
+ // itself either.
+ bool Inserted = ExclusionSet.insert(&I).second;
+
+ if (!FnReachabilityAA.instructionCanReach(
+ A, *LeastDominatingWriteInst,
+ *Acc.getRemoteInst()->getFunction(), &ExclusionSet))
+ WriteChecked = true;
+
+ if (Inserted)
+ ExclusionSet.erase(&I);
+ }
+
+ if (ReadChecked && WriteChecked)
return true;
if (!DT || !UseDominanceReasoning)
return false;
if (!DominatingWrites.count(&Acc))
return false;
- for (const Access *DomAcc : DominatingWrites) {
- assert(Acc.getLocalInst()->getFunction() ==
- DomAcc->getLocalInst()->getFunction() &&
- "Expected dominating writes to be in the same function!");
-
- if (DomAcc != &Acc &&
- DT->dominates(Acc.getLocalInst(), DomAcc->getLocalInst())) {
- return true;
- }
- }
- return false;
+ return LeastDominatingWriteInst != Acc.getLocalInst();
};
- // Run the user callback on all accesses we cannot skip and return if that
- // succeeded for all or not.
- unsigned NumInterferingAccesses = InterferingAccesses.size();
+ // Run the user callback on all accesses we cannot skip and return if
+ // that succeeded for all or not.
for (auto &It : InterferingAccesses) {
if ((!AllInSameNoSyncFn && !IsThreadLocalObj) ||
- NumInterferingAccesses > MaxInterferingAccesses ||
!CanSkipAccess(*It.first, It.second)) {
if (!UserCB(*It.first, It.second))
return false;
; TUNIT-LABEL: define {{[^@]+}}@unions
; TUNIT-SAME: () #[[ATTR0]] {
; TUNIT-NEXT: entry:
-; TUNIT-NEXT: [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8*
-; TUNIT-NEXT: [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST]], align 8
-; TUNIT-NEXT: [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i64 0, i32 1
-; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_1]], align 8
-; TUNIT-NEXT: call void @vfu1(i8 [[TMP0]], i32 [[TMP1]]) #[[ATTR0]]
; TUNIT-NEXT: [[MYSTR_CAST1:%.*]] = bitcast %struct.MYstr* @mystr to i8*
-; TUNIT-NEXT: [[TMP2:%.*]] = load i8, i8* [[MYSTR_CAST1]], align 8
-; TUNIT-NEXT: [[MYSTR_0_12:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i64 0, i32 1
-; TUNIT-NEXT: [[TMP3:%.*]] = load i32, i32* [[MYSTR_0_12]], align 8
+; TUNIT-NEXT: [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST1]], align 8
+; TUNIT-NEXT: [[MYSTR_0_12:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i64 0, i32 1
+; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_12]], align 8
+; TUNIT-NEXT: call void @vfu1(i8 [[TMP0]], i32 [[TMP1]]) #[[ATTR0]]
+; TUNIT-NEXT: [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8*
+; TUNIT-NEXT: [[TMP2:%.*]] = load i8, i8* [[MYSTR_CAST]], align 8
+; TUNIT-NEXT: [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i64 0, i32 1
+; TUNIT-NEXT: [[TMP3:%.*]] = load i32, i32* [[MYSTR_0_1]], align 8
; TUNIT-NEXT: [[RESULT:%.*]] = call i32 @vfu2(i8 [[TMP2]], i32 [[TMP3]]) #[[ATTR0]]
; TUNIT-NEXT: ret i32 [[RESULT]]
;
; TUNIT-SAME: () #[[ATTR2]] {
; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1
; TUNIT-NEXT: store i1 true, ptr [[STACK]], align 1
-; TUNIT-NEXT: [[L4:%.*]] = load i1, ptr [[STACK]], align 1
-; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR6]]
+; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR6]]
; TUNIT-NEXT: call void @useI1p(ptr noalias nocapture noundef nonnull dereferenceable(1) [[STACK]])
-; TUNIT-NEXT: ret i1 [[L4]]
+; TUNIT-NEXT: ret i1 true
;
; CGSCC: Function Attrs: norecurse
; CGSCC-LABEL: define {{[^@]+}}@keep_assume_4c_nr
; CGSCC-SAME: () #[[ATTR2]] {
; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1
; CGSCC-NEXT: store i1 true, ptr [[STACK]], align 1
-; CGSCC-NEXT: [[L4:%.*]] = load i1, ptr [[STACK]], align 1
-; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR7]]
+; CGSCC-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR7]]
; CGSCC-NEXT: call void @useI1p(ptr noalias nocapture noundef nonnull dereferenceable(1) [[STACK]])
-; CGSCC-NEXT: ret i1 [[L4]]
+; CGSCC-NEXT: ret i1 true
;
%stack = alloca i1
store i1 true, ptr %stack
;
; TUNIT: Function Attrs: norecurse
; TUNIT-LABEL: define {{[^@]+}}@keep_assume_4_nr
-; TUNIT-SAME: (i1 [[ARG:%.*]]) #[[ATTR2]] {
+; TUNIT-SAME: (i1 returned [[ARG:%.*]]) #[[ATTR2]] {
; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1
; TUNIT-NEXT: store i1 [[ARG]], ptr [[STACK]], align 1
-; TUNIT-NEXT: [[L:%.*]] = load i1, ptr [[STACK]], align 1
-; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]]
+; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR6]]
; TUNIT-NEXT: call void @useI1p(ptr noalias nocapture noundef nonnull dereferenceable(1) [[STACK]])
-; TUNIT-NEXT: ret i1 [[L]]
+; TUNIT-NEXT: ret i1 [[ARG]]
;
; CGSCC: Function Attrs: norecurse
; CGSCC-LABEL: define {{[^@]+}}@keep_assume_4_nr
-; CGSCC-SAME: (i1 [[ARG:%.*]]) #[[ATTR2]] {
+; CGSCC-SAME: (i1 returned [[ARG:%.*]]) #[[ATTR2]] {
; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1
; CGSCC-NEXT: store i1 [[ARG]], ptr [[STACK]], align 1
-; CGSCC-NEXT: [[L:%.*]] = load i1, ptr [[STACK]], align 1
-; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR7]]
+; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR7]]
; CGSCC-NEXT: call void @useI1p(ptr noalias nocapture noundef nonnull dereferenceable(1) [[STACK]])
-; CGSCC-NEXT: ret i1 [[L]]
+; CGSCC-NEXT: ret i1 [[ARG]]
;
%stack = alloca i1
store i1 %arg, ptr %stack
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
+
+declare void @unknown() nocallback
+
+define i32 @many_writes_nosycn(i1 %c0, i1 %c1, i1 %c2) nosync {
+; CHECK: Function Attrs: norecurse nosync
+; CHECK-LABEL: define {{[^@]+}}@many_writes_nosycn
+; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[P:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
+; CHECK: t0:
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[M1:%.*]]
+; CHECK: f0:
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: br i1 [[C2]], label [[F1:%.*]], label [[M1]]
+; CHECK: t1:
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: store i32 7, ptr [[P]], align 4
+; CHECK-NEXT: br label [[M2:%.*]]
+; CHECK: f1:
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: store i32 9, ptr [[P]], align 4
+; CHECK-NEXT: br label [[M2]]
+; CHECK: m1:
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: store i32 11, ptr [[P]], align 4
+; CHECK-NEXT: br label [[M2]]
+; CHECK: m2:
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT: ret i32 [[L]]
+;
+ %p = alloca i32
+ store i32 0, ptr %p
+ call void @unknown()
+ store i32 1, ptr %p
+ br i1 %c0, label %t0, label %f0
+t0:
+ store i32 2, ptr %p
+ call void @unknown()
+ store i32 3, ptr %p
+ br i1 %c1, label %t1, label %m1
+f0:
+ store i32 4, ptr %p
+ call void @unknown()
+ store i32 5, ptr %p
+ br i1 %c2, label %f1, label %m1
+t1:
+ store i32 6, ptr %p
+ call void @unknown()
+ store i32 7, ptr %p
+ br label %m2
+f1:
+ store i32 8, ptr %p
+ call void @unknown()
+ store i32 9, ptr %p
+ br label %m2
+m1:
+ store i32 10, ptr %p
+ call void @unknown()
+ store i32 11, ptr %p
+ br label %m2
+m2:
+ call void @unknown()
+ %l = load i32, ptr %p
+ ret i32 %l
+}
+
+define i32 @many_writes(i1 %c0, i1 %c1, i1 %c2) {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define {{[^@]+}}@many_writes
+; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: [[P:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
+; CHECK: t0:
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[M1:%.*]]
+; CHECK: f0:
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: br i1 [[C2]], label [[F1:%.*]], label [[M1]]
+; CHECK: t1:
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: store i32 7, ptr [[P]], align 4
+; CHECK-NEXT: br label [[M2:%.*]]
+; CHECK: f1:
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: store i32 9, ptr [[P]], align 4
+; CHECK-NEXT: br label [[M2]]
+; CHECK: m1:
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: store i32 11, ptr [[P]], align 4
+; CHECK-NEXT: br label [[M2]]
+; CHECK: m2:
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT: ret i32 [[L]]
+;
+ %p = alloca i32
+ store i32 0, ptr %p
+ call void @unknown()
+ store i32 1, ptr %p
+ br i1 %c0, label %t0, label %f0
+t0:
+ store i32 2, ptr %p
+ call void @unknown()
+ store i32 3, ptr %p
+ br i1 %c1, label %t1, label %m1
+f0:
+ store i32 4, ptr %p
+ call void @unknown()
+ store i32 5, ptr %p
+ br i1 %c2, label %f1, label %m1
+t1:
+ store i32 6, ptr %p
+ call void @unknown()
+ store i32 7, ptr %p
+ br label %m2
+f1:
+ store i32 8, ptr %p
+ call void @unknown()
+ store i32 9, ptr %p
+ br label %m2
+m1:
+ store i32 10, ptr %p
+ call void @unknown()
+ store i32 11, ptr %p
+ br label %m2
+m2:
+ call void @unknown()
+ %l = load i32, ptr %p
+ ret i32 %l
+}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback }
+; CHECK: attributes #[[ATTR1]] = { norecurse nosync }
+; CHECK: attributes #[[ATTR2]] = { norecurse }
+;.
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CGSCC: {{.*}}
+; TUNIT: {{.*}}
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=13 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
;
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
; TUNIT-NEXT: store i32 77, ptr [[P]], align 4
; TUNIT-NEXT: call void @exclusion_set3_helper(i1 noundef true, ptr noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[P]]) #[[ATTR8]]
; TUNIT-NEXT: [[USE2:%.*]] = load i32, ptr [[P]], align 4
-; TUNIT-NEXT: call void @usei32(i32 [[USE2]])
+; TUNIT-NEXT: call void @usei32(i32 noundef [[USE2]])
; TUNIT-NEXT: br label [[T]]
; TUNIT: m:
-; TUNIT-NEXT: call void @usei32(i32 42)
+; TUNIT-NEXT: call void @usei32(i32 noundef 42)
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: nosync