QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL);
const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>(
IRPosition::function(Scope), &QueryingAA, DepClassTy::OPTIONAL);
- const bool NoSync = NoSyncAA.isAssumedNoSync();
+ bool AllInSameNoSyncFn = NoSyncAA.isAssumedNoSync();
// Helper to determine if we need to consider threading, which we cannot
// right now. However, if the function is (assumed) nosync or the thread
// executing all instructions is the main thread only we can ignore
// threading.
auto CanIgnoreThreading = [&](const Instruction &I) -> bool {
- if (NoSync)
- return true;
if (ExecDomainAA && ExecDomainAA->isExecutedByInitialThreadOnly(I))
return true;
return false;
};
// Helper to determine if the access is executed by the same thread as the
- // load, for now it is sufficient to avoid any potential threading effects
- // as we cannot deal with them anyway.
- auto IsSameThreadAsLoad = [&](const Access &Acc) -> bool {
- return CanIgnoreThreading(*Acc.getLocalInst());
+ // given instruction, for now it is sufficient to avoid any potential
+ // threading effects as we cannot deal with them anyway.
+ auto IsSameThreadAsInst = [&](const Access &Acc) -> bool {
+ return AllInSameNoSyncFn || CanIgnoreThreading(*Acc.getLocalInst());
};
// TODO: Use inter-procedural reachability and dominance.
if (FindInterferingWrites && Dominates)
HasBeenWrittenTo = true;
+ // Track if all interesting accesses are in the same `nosync` function as
+ // the given instruction.
+ AllInSameNoSyncFn &= Acc.getRemoteInst()->getFunction() == &Scope;
+
// For now we only filter accesses based on CFG reasoning which does not
// work yet if we have threading effects, or the access is complicated.
if (CanUseCFGResoning && Dominates && UseDominanceReasoning &&
- IsSameThreadAsLoad(Acc))
+ IsSameThreadAsInst(Acc))
DominatingWrites.insert(&Acc);
InterferingAccesses.push_back({&Acc, Exact});
// the worst case quadratic as we are looking for another write that will
// hide the effect of this one.
auto CanSkipAccess = [&](const Access &Acc, bool Exact) {
+ if (!IsSameThreadAsInst(Acc))
+ return false;
if ((!Acc.isWriteOrAssumption() ||
!AA::isPotentiallyReachable(A, *Acc.getRemoteInst(), I, QueryingAA,
&ExclusionSet, IsLiveInCalleeCB)) &&
if (!DT || !UseDominanceReasoning)
return false;
- if (!IsSameThreadAsLoad(Acc))
- return false;
if (!DominatingWrites.count(&Acc))
return false;
for (const Access *DomAcc : DominatingWrites) {
// succeeded for all or not.
unsigned NumInterferingAccesses = InterferingAccesses.size();
for (auto &It : InterferingAccesses) {
- if (!CanUseCFGResoning || NumInterferingAccesses > MaxInterferingAccesses ||
+ if (!AllInSameNoSyncFn ||
+ NumInterferingAccesses > MaxInterferingAccesses ||
!CanSkipAccess(*It.first, It.second)) {
if (!UserCB(*It.first, It.second))
return false;
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
;; This function returns its second argument on all return statements
; TUNIT-LABEL: define {{[^@]+}}@visible
; TUNIT-SAME: (i32* noalias nocapture nofree readonly [[A:%.*]], i32* noalias nocapture nofree readonly align 4 [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; TUNIT-NEXT: entry:
-; TUNIT-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* noalias nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree readonly align 4 [[B]]) #[[ATTR4:[0-9]+]]
-; TUNIT-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* noalias nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree readonly align 4 [[B]]) #[[ATTR4]]
+; TUNIT-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* noalias nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree readonly align 4 [[B]]) #[[ATTR3:[0-9]+]]
+; TUNIT-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* noalias nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree readonly align 4 [[B]]) #[[ATTR3]]
; TUNIT-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL1]], [[CALL2]]
; TUNIT-NEXT: ret i32 [[ADD]]
;
; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4
; TUNIT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
-; TUNIT-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR4]]
+; TUNIT-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR3]]
; TUNIT-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[CALL]]
; TUNIT-NEXT: ret i32 [[ADD2]]
;
; TUNIT-NEXT: entry:
; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4
; TUNIT-NEXT: store i32 5, i32* [[B]], align 4
-; TUNIT-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR4]]
-; TUNIT-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR4]]
+; TUNIT-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR3]]
+; TUNIT-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR3]]
; TUNIT-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL1]], [[CALL2]]
; TUNIT-NEXT: ret i32 [[ADD]]
;
}
define internal i32 @noalias_args_argmem_rn(i32* %A, i32* %B) #1 {
-; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable
+; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
; TUNIT-LABEL: define {{[^@]+}}@noalias_args_argmem_rn
-; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR3:[0-9]+]] {
-; TUNIT-NEXT: ret i32 undef
+; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] {
+; TUNIT-NEXT: [[T0:%.*]] = load i32, i32* [[B]], align 4
+; TUNIT-NEXT: ret i32 [[T0]]
;
; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
; CGSCC-LABEL: define {{[^@]+}}@noalias_args_argmem_rn
; TUNIT-LABEL: define {{[^@]+}}@visible_local_3
; TUNIT-SAME: () #[[ATTR2]] {
; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4
-; TUNIT-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR5:[0-9]+]]
-; TUNIT-NEXT: ret i32 5
+; TUNIT-NEXT: store i32 5, i32* [[B]], align 4
+; TUNIT-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B]]) #[[ATTR4:[0-9]+]]
+; TUNIT-NEXT: ret i32 [[CALL]]
;
; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none)
; CGSCC-LABEL: define {{[^@]+}}@visible_local_3
; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable }
; TUNIT: attributes #[[ATTR1]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable }
; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) }
-; TUNIT: attributes #[[ATTR3]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable }
-; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind }
-; TUNIT: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn }
+; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind }
+; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn }
;.
; CGSCC: attributes #[[ATTR0]] = { nofree noinline nosync nounwind willreturn memory(argmem: read) uwtable }
; CGSCC: attributes #[[ATTR1]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable }
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
@Gstatic_int1 = internal global i32 zeroinitializer, align 4
; TUNIT-LABEL: define {{[^@]+}}@assume_3_nr
; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1
+; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1
+; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1
+; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]]
; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
; TUNIT: t:
; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1
; TUNIT-LABEL: define {{[^@]+}}@assume_4_nr
; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1
+; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1
; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
; TUNIT: t:
; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1
; TUNIT-LABEL: define {{[^@]+}}@assume_5_nr
; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1
+; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1
+; TUNIT-NEXT: [[L1:%.*]] = load i1, i1* [[STACK]], align 1
+; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR6]]
; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
; TUNIT: t:
; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1
; TUNIT-LABEL: define {{[^@]+}}@assume_5c_nr
; TUNIT-SAME: (i1 [[COND:%.*]]) #[[ATTR3]] {
; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1
-; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR6]]
+; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1
+; TUNIT-NEXT: [[L1:%.*]] = load i1, i1* [[STACK]], align 1
+; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR6]]
; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
; TUNIT: t:
; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1
; TUNIT-LABEL: define {{[^@]+}}@assume_3
; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1
+; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1
+; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1
+; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]]
; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
; TUNIT: t:
; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1
; TUNIT-LABEL: define {{[^@]+}}@assume_4
; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1
+; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1
; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
; TUNIT: t:
; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1
; TUNIT-LABEL: define {{[^@]+}}@assume_5
; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1
+; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1
+; TUNIT-NEXT: [[L1:%.*]] = load i1, i1* [[STACK]], align 1
+; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR6]]
; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
; TUNIT: t:
; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1
; TUNIT-LABEL: define {{[^@]+}}@assume_5c
; TUNIT-SAME: (i1 [[COND:%.*]]) #[[ATTR3]] {
; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1
-; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR6]]
+; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1
+; TUNIT-NEXT: [[L1:%.*]] = load i1, i1* [[STACK]], align 1
+; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR6]]
; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
; TUNIT: t:
; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
target triple = "amdgcn-amd-amdhsa"
+%struct.ident_t = type { i32, i32, i32, i32, i8* }
@ReachableKernel = internal addrspace(3) global i32 3, align 4
@UnreachableKernel = internal addrspace(3) global i32 42, align 4
@ReachableKernelAS0 = internal global i32 7, align 4
; TUNIT-NEXT: entry:
; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4
; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4
-; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR5:[0-9]+]]
+; TUNIT-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4
+; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef [[TMP2]]) #[[ATTR5:[0-9]+]]
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: nosync nounwind
; TUNIT-NEXT: entry:
; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4
; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4
-; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR5]]
+; TUNIT-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4
+; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef [[TMP2]]) #[[ATTR5]]
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: nosync nounwind
}
define internal void @level2Kernelall_late() {
-; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define {{[^@]+}}@level2Kernelall_late
-; TUNIT-SAME: () #[[ATTR2]] {
-; TUNIT-NEXT: entry:
-; TUNIT-NEXT: ret void
-;
-; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define {{[^@]+}}@level2Kernelall_late
-; CGSCC-SAME: () #[[ATTR2]] {
-; CGSCC-NEXT: entry:
-; CGSCC-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4
-; CGSCC-NEXT: ret void
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
+; CHECK-LABEL: define {{[^@]+}}@level2Kernelall_late
+; CHECK-SAME: () #[[ATTR2]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4
+; CHECK-NEXT: ret void
;
entry:
store i32 1, i32 *addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4
; TUNIT-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[C]], 0
; TUNIT-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
; TUNIT: if.then:
-; TUNIT-NEXT: call void @level2a() #[[ATTR3]]
+; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* [[LOCAL]], align 4
+; TUNIT-NEXT: call void @level2a(i32 [[TMP0]]) #[[ATTR3]]
; TUNIT-NEXT: br label [[IF_END:%.*]]
; TUNIT: if.else:
-; TUNIT-NEXT: call void @level2b() #[[ATTR3]]
+; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOCAL]], align 4
+; TUNIT-NEXT: call void @level2b(i32 [[TMP1]]) #[[ATTR3]]
; TUNIT-NEXT: br label [[IF_END]]
; TUNIT: if.end:
; TUNIT-NEXT: call void @level2all_late(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR3]]
; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] {
; TUNIT-NEXT: entry:
; TUNIT-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4
+; TUNIT-NEXT: store i32 17, i32* [[ADDR]], align 4
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
define internal void @level2a(i32* %addr) {
; TUNIT: Function Attrs: norecurse nosync nounwind
; TUNIT-LABEL: define {{[^@]+}}@level2a
-; TUNIT-SAME: () #[[ATTR1]] {
+; TUNIT-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1]] {
; TUNIT-NEXT: entry:
-; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4
-; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
-; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR5]]
+; TUNIT-NEXT: [[ADDR_PRIV:%.*]] = alloca i32, align 4
+; TUNIT-NEXT: store i32 [[TMP0]], i32* [[ADDR_PRIV]], align 4
+; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4
+; TUNIT-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
+; TUNIT-NEXT: [[QQQQ2:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4
+; TUNIT-NEXT: call void @use(i32 noundef [[TMP1]], i32 noundef [[TMP2]], i32 [[QQQQ2]]) #[[ATTR5]]
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: nosync nounwind
define internal void @level2b(i32* %addr) {
; TUNIT: Function Attrs: norecurse nosync nounwind
; TUNIT-LABEL: define {{[^@]+}}@level2b
-; TUNIT-SAME: () #[[ATTR1]] {
+; TUNIT-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1]] {
; TUNIT-NEXT: entry:
-; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4
-; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
-; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR5]]
+; TUNIT-NEXT: [[ADDR_PRIV:%.*]] = alloca i32, align 4
+; TUNIT-NEXT: store i32 [[TMP0]], i32* [[ADDR_PRIV]], align 4
+; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4
+; TUNIT-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
+; TUNIT-NEXT: [[TMP3:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4
+; TUNIT-NEXT: call void @use(i32 noundef [[TMP1]], i32 noundef [[TMP2]], i32 [[TMP3]]) #[[ATTR5]]
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: nosync nounwind
}
define internal void @level2all_late(i32* %addr) {
-; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define {{[^@]+}}@level2all_late
-; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] {
-; TUNIT-NEXT: entry:
-; TUNIT-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
-; TUNIT-NEXT: ret void
-;
-; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define {{[^@]+}}@level2all_late
-; CGSCC-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] {
-; CGSCC-NEXT: entry:
-; CGSCC-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
-; CGSCC-NEXT: store i32 5, i32* [[ADDR]], align 4
-; CGSCC-NEXT: ret void
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
+; CHECK-LABEL: define {{[^@]+}}@level2all_late
+; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
+; CHECK-NEXT: store i32 5, i32* [[ADDR]], align 4
+; CHECK-NEXT: ret void
;
entry:
store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
@GInt1 = internal global i32 undef, align 4
; TUNIT-NEXT: call void @usei32(i32 [[USE2]])
; TUNIT-NEXT: br label [[T]]
; TUNIT: m:
-; TUNIT-NEXT: call void @usei32(i32 42)
+; TUNIT-NEXT: [[USE3:%.*]] = load i32, ptr [[P]], align 4
+; TUNIT-NEXT: call void @usei32(i32 [[USE3]])
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: nosync
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
+; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
+
+target triple = "amdgcn-amd-amdhsa"
+
+%struct.ident_t = type { i32, i32, i32, i32, i8* }
+
+@G = internal addrspace(3) global i32 undef, align 4
+
+; Make sure we do not delete the stores to @G without also replacing the load with `1`.
+;.
+; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+;.
+define void @kernel() "kernel" {
+; TUNIT: Function Attrs: norecurse
+; TUNIT-LABEL: define {{[^@]+}}@kernel
+; TUNIT-SAME: () #[[ATTR0:[0-9]+]] {
+; TUNIT-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false)
+; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
+; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; TUNIT: if.then:
+; TUNIT-NEXT: store i32 1, i32 addrspace(3)* @G, align 4
+; TUNIT-NEXT: br label [[IF_MERGE:%.*]]
+; TUNIT: if.else:
+; TUNIT-NEXT: call void @barrier() #[[ATTR4:[0-9]+]]
+; TUNIT-NEXT: [[L:%.*]] = load i32, i32 addrspace(3)* @G, align 4
+; TUNIT-NEXT: call void @use1(i32 [[L]]) #[[ATTR4]]
+; TUNIT-NEXT: br label [[IF_MERGE]]
+; TUNIT: if.merge:
+; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
+; TUNIT: if.then2:
+; TUNIT-NEXT: store i32 2, i32 addrspace(3)* @G, align 4
+; TUNIT-NEXT: call void @barrier() #[[ATTR4]]
+; TUNIT-NEXT: br label [[IF_END]]
+; TUNIT: if.end:
+; TUNIT-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1)
+; TUNIT-NEXT: ret void
+;
+; CGSCC: Function Attrs: norecurse
+; CGSCC-LABEL: define {{[^@]+}}@kernel
+; CGSCC-SAME: () #[[ATTR0:[0-9]+]] {
+; CGSCC-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false)
+; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
+; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CGSCC: if.then:
+; CGSCC-NEXT: store i32 1, i32 addrspace(3)* @G, align 4
+; CGSCC-NEXT: br label [[IF_MERGE:%.*]]
+; CGSCC: if.else:
+; CGSCC-NEXT: call void @barrier()
+; CGSCC-NEXT: [[L:%.*]] = load i32, i32 addrspace(3)* @G, align 4
+; CGSCC-NEXT: call void @use1(i32 [[L]])
+; CGSCC-NEXT: br label [[IF_MERGE]]
+; CGSCC: if.merge:
+; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
+; CGSCC: if.then2:
+; CGSCC-NEXT: store i32 2, i32 addrspace(3)* @G, align 4
+; CGSCC-NEXT: call void @barrier()
+; CGSCC-NEXT: br label [[IF_END]]
+; CGSCC: if.end:
+; CGSCC-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1)
+; CGSCC-NEXT: ret void
+;
+ %call = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false)
+ %cmp = icmp eq i32 %call, -1
+ br i1 %cmp, label %if.then, label %if.else
+if.then:
+ store i32 1, i32 addrspace(3)* @G
+ br label %if.merge
+if.else:
+ call void @barrier();
+ %l = load i32, i32 addrspace(3)* @G
+ call void @use1(i32 %l)
+ br label %if.merge
+if.merge:
+ br i1 %cmp, label %if.then2, label %if.end
+if.then2:
+ store i32 2, i32 addrspace(3)* @G
+ call void @barrier();
+ br label %if.end
+if.end:
+ call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1)
+ ret void
+}
+
+declare void @barrier() norecurse nounwind nocallback
+declare void @use1(i32) nosync norecurse nounwind nocallback
+declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) nocallback
+declare void @__kmpc_target_deinit(%struct.ident_t*, i8) nocallback
+
+!llvm.module.flags = !{!0, !1}
+!nvvm.annotations = !{!2}
+
+!0 = !{i32 7, !"openmp", i32 50}
+!1 = !{i32 7, !"openmp-device", i32 50}
+!2 = !{void ()* @kernel, !"kernel", i32 1}
+
+;.
+; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" }
+; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nocallback norecurse nounwind }
+; TUNIT: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nosync nounwind }
+; TUNIT: attributes #[[ATTR3:[0-9]+]] = { nocallback }
+; TUNIT: attributes #[[ATTR4]] = { nounwind }
+;.
+; CGSCC: attributes #[[ATTR0]] = { norecurse "kernel" }
+; CGSCC: attributes #[[ATTR1:[0-9]+]] = { nocallback norecurse nounwind }
+; CGSCC: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nosync nounwind }
+; CGSCC: attributes #[[ATTR3:[0-9]+]] = { nocallback }
+;.
+; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK: [[META2:![0-9]+]] = !{void ()* @kernel, !"kernel", i32 1}
+;.
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}