From 3b052558125cbedf18c2ddb65780b50d6f437d54 Mon Sep 17 00:00:00 2001 From: Mitch Phillips <31459023+hctim@users.noreply.github.com> Date: Fri, 16 Dec 2022 17:56:38 -0800 Subject: [PATCH] Revert "[OpenMP][FIX] Restrict more unsound assmptions about threading" This reverts commit 07c375348083170e39c9498a42a9679c7e08f07f. Reason: This change is dependent on a commit that needs to be rolled back because it broke the ASan buildbot. See https://reviews.llvm.org/rGfc21f2d7bae2e0be630470cc7ca9323ed5859892 for more information. --- llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 25 ++--- .../Attributor/IPConstantProp/return-argument.ll | 2 +- .../test/Transforms/Attributor/internal-noalias.ll | 27 +++-- .../Transforms/Attributor/value-simplify-assume.ll | 24 +---- .../Transforms/Attributor/value-simplify-gpu.ll | 77 +++++++------- .../Attributor/value-simplify-reachability.ll | 5 +- .../Transforms/OpenMP/value-simplify-openmp-opt.ll | 115 --------------------- 7 files changed, 69 insertions(+), 206 deletions(-) delete mode 100644 llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 67edb99..6c2d56f 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1076,23 +1076,25 @@ struct AAPointerInfoImpl QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL); const auto *ExecDomainAA = A.lookupAAFor( IRPosition::function(Scope), &QueryingAA, DepClassTy::OPTIONAL); - bool AllInSameNoSyncFn = NoSyncAA.isAssumedNoSync(); + const bool NoSync = NoSyncAA.isAssumedNoSync(); // Helper to determine if we need to consider threading, which we cannot // right now. However, if the function is (assumed) nosync or the thread // executing all instructions is the main thread only we can ignore // threading. auto CanIgnoreThreading = [&](const Instruction &I) -> bool { + if (NoSync) + return true; if (ExecDomainAA && ExecDomainAA->isExecutedByInitialThreadOnly(I)) return true; return false; }; // Helper to determine if the access is executed by the same thread as the - // given instruction, for now it is sufficient to avoid any potential - // threading effects as we cannot deal with them anyway. - auto IsSameThreadAsInst = [&](const Access &Acc) -> bool { - return AllInSameNoSyncFn || CanIgnoreThreading(*Acc.getLocalInst()); + // load, for now it is sufficient to avoid any potential threading effects + // as we cannot deal with them anyway. + auto IsSameThreadAsLoad = [&](const Access &Acc) -> bool { + return CanIgnoreThreading(*Acc.getLocalInst()); }; // TODO: Use inter-procedural reachability and dominance. @@ -1178,14 +1180,10 @@ struct AAPointerInfoImpl if (FindInterferingWrites && Dominates) HasBeenWrittenTo = true; - // Track if all interesting accesses are in the same `nosync` function as - // the given instruction. - AllInSameNoSyncFn &= Acc.getRemoteInst()->getFunction() == &Scope; - // For now we only filter accesses based on CFG reasoning which does not // work yet if we have threading effects, or the access is complicated. if (CanUseCFGResoning && Dominates && UseDominanceReasoning && - IsSameThreadAsInst(Acc)) + IsSameThreadAsLoad(Acc)) DominatingWrites.insert(&Acc); InterferingAccesses.push_back({&Acc, Exact}); @@ -1198,8 +1196,6 @@ struct AAPointerInfoImpl // the worst case quadratic as we are looking for another write that will // hide the effect of this one. auto CanSkipAccess = [&](const Access &Acc, bool Exact) { - if (!IsSameThreadAsInst(Acc)) - return false; if ((!Acc.isWriteOrAssumption() || !AA::isPotentiallyReachable(A, *Acc.getRemoteInst(), I, QueryingAA, &ExclusionSet, IsLiveInCalleeCB)) && @@ -1210,6 +1206,8 @@ struct AAPointerInfoImpl if (!DT || !UseDominanceReasoning) return false; + if (!IsSameThreadAsLoad(Acc)) + return false; if (!DominatingWrites.count(&Acc)) return false; for (const Access *DomAcc : DominatingWrites) { @@ -1229,8 +1227,7 @@ struct AAPointerInfoImpl // succeeded for all or not. unsigned NumInterferingAccesses = InterferingAccesses.size(); for (auto &It : InterferingAccesses) { - if (!AllInSameNoSyncFn || - NumInterferingAccesses > MaxInterferingAccesses || + if (!CanUseCFGResoning || NumInterferingAccesses > MaxInterferingAccesses || !CanSkipAccess(*It.first, It.second)) { if (!UserCB(*It.first, It.second)) return false; diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll b/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll index 3536ee0..0347cc3 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC ;; This function returns its second argument on all return statements diff --git a/llvm/test/Transforms/Attributor/internal-noalias.ll b/llvm/test/Transforms/Attributor/internal-noalias.ll index 74608c9..2355660 100644 --- a/llvm/test/Transforms/Attributor/internal-noalias.ll +++ b/llvm/test/Transforms/Attributor/internal-noalias.ll @@ -7,8 +7,8 @@ define dso_local i32 @visible(i32* noalias %A, i32* noalias %B) #0 { ; TUNIT-LABEL: define {{[^@]+}}@visible ; TUNIT-SAME: (i32* noalias nocapture nofree readonly [[A:%.*]], i32* noalias nocapture nofree readonly align 4 [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* noalias nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree readonly align 4 [[B]]) #[[ATTR3:[0-9]+]] -; TUNIT-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* noalias nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree readonly align 4 [[B]]) #[[ATTR3]] +; TUNIT-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* noalias nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree readonly align 4 [[B]]) #[[ATTR4:[0-9]+]] +; TUNIT-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* noalias nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree readonly align 4 [[B]]) #[[ATTR4]] ; TUNIT-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL1]], [[CALL2]] ; TUNIT-NEXT: ret i32 [[ADD]] ; @@ -36,7 +36,7 @@ define private i32 @noalias_args(i32* %A, i32* %B) #0 { ; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 ; TUNIT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] -; TUNIT-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR3]] +; TUNIT-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR4]] ; TUNIT-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[CALL]] ; TUNIT-NEXT: ret i32 [[ADD2]] ; @@ -94,8 +94,8 @@ define dso_local i32 @visible_local(i32* %A) #0 { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: store i32 5, i32* [[B]], align 4 -; TUNIT-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR3]] -; TUNIT-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR3]] +; TUNIT-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR4]] +; TUNIT-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR4]] ; TUNIT-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL1]], [[CALL2]] ; TUNIT-NEXT: ret i32 [[ADD]] ; @@ -158,11 +158,10 @@ define i32 @visible_local_2() { } define internal i32 @noalias_args_argmem_rn(i32* %A, i32* %B) #1 { -; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable ; TUNIT-LABEL: define {{[^@]+}}@noalias_args_argmem_rn -; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] { -; TUNIT-NEXT: [[T0:%.*]] = load i32, i32* [[B]], align 4 -; TUNIT-NEXT: ret i32 [[T0]] +; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR3:[0-9]+]] { +; TUNIT-NEXT: ret i32 undef ; ; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@noalias_args_argmem_rn @@ -181,9 +180,8 @@ define i32 @visible_local_3() { ; TUNIT-LABEL: define {{[^@]+}}@visible_local_3 ; TUNIT-SAME: () #[[ATTR2]] { ; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4 -; TUNIT-NEXT: store i32 5, i32* [[B]], align 4 -; TUNIT-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B]]) #[[ATTR4:[0-9]+]] -; TUNIT-NEXT: ret i32 [[CALL]] +; TUNIT-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR5:[0-9]+]] +; TUNIT-NEXT: ret i32 5 ; ; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@visible_local_3 @@ -205,8 +203,9 @@ attributes #1 = { argmemonly noinline nounwind uwtable willreturn} ; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable } ; TUNIT: attributes #[[ATTR1]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable } ; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) } -; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind } -; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR3]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable } +; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind } +; TUNIT: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn } ;. ; CGSCC: attributes #[[ATTR0]] = { nofree noinline nosync nounwind willreturn memory(argmem: read) uwtable } ; CGSCC: attributes #[[ATTR1]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable } diff --git a/llvm/test/Transforms/Attributor/value-simplify-assume.ll b/llvm/test/Transforms/Attributor/value-simplify-assume.ll index 211c043e..b9f6f27 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-assume.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-assume.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC @Gstatic_int1 = internal global i32 zeroinitializer, align 4 @@ -422,9 +422,6 @@ define i1 @assume_3_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT-LABEL: define {{[^@]+}}@assume_3_nr ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 -; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 -; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]] ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: ; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 @@ -475,7 +472,6 @@ define i1 @assume_4_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT-LABEL: define {{[^@]+}}@assume_4_nr ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 -; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: ; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 @@ -528,9 +524,6 @@ define i1 @assume_5_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT-LABEL: define {{[^@]+}}@assume_5_nr ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 -; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 -; TUNIT-NEXT: [[L1:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR6]] ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: ; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 @@ -599,9 +592,7 @@ define i1 @assume_5c_nr(i1 %cond) norecurse { ; TUNIT-LABEL: define {{[^@]+}}@assume_5c_nr ; TUNIT-SAME: (i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 -; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 -; TUNIT-NEXT: [[L1:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR6]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR6]] ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: ; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 @@ -1046,9 +1037,6 @@ define i1 @assume_3(i1 %arg, i1 %cond) { ; TUNIT-LABEL: define {{[^@]+}}@assume_3 ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 -; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 -; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]] ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: ; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 @@ -1099,7 +1087,6 @@ define i1 @assume_4(i1 %arg, i1 %cond) { ; TUNIT-LABEL: define {{[^@]+}}@assume_4 ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 -; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: ; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 @@ -1152,9 +1139,6 @@ define i1 @assume_5(i1 %arg, i1 %cond) { ; TUNIT-LABEL: define {{[^@]+}}@assume_5 ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 -; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 -; TUNIT-NEXT: [[L1:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR6]] ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: ; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 @@ -1223,9 +1207,7 @@ define i1 @assume_5c(i1 %cond) { ; TUNIT-LABEL: define {{[^@]+}}@assume_5c ; TUNIT-SAME: (i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 -; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 -; TUNIT-NEXT: [[L1:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR6]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR6]] ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: ; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 diff --git a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll index 6e6113e..7ae99e4 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll @@ -1,10 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC target triple = "amdgcn-amd-amdhsa" -%struct.ident_t = type { i32, i32, i32, i32, i8* } @ReachableKernel = internal addrspace(3) global i32 3, align 4 @UnreachableKernel = internal addrspace(3) global i32 42, align 4 @ReachableKernelAS0 = internal global i32 7, align 4 @@ -111,8 +110,7 @@ define internal void @level2Kernela() { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4 ; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4 -; TUNIT-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 -; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef [[TMP2]]) #[[ATTR5:[0-9]+]] +; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR5:[0-9]+]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nosync nounwind @@ -140,8 +138,7 @@ define internal void @level2Kernelb() { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4 ; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4 -; TUNIT-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 -; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef [[TMP2]]) #[[ATTR5]] +; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR5]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nosync nounwind @@ -163,12 +160,18 @@ entry: } define internal void @level2Kernelall_late() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) -; CHECK-LABEL: define {{[^@]+}}@level2Kernelall_late -; CHECK-SAME: () #[[ATTR2]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) +; TUNIT-LABEL: define {{[^@]+}}@level2Kernelall_late +; TUNIT-SAME: () #[[ATTR2]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) +; CGSCC-LABEL: define {{[^@]+}}@level2Kernelall_late +; CGSCC-SAME: () #[[ATTR2]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 +; CGSCC-NEXT: ret void ; entry: store i32 1, i32 *addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 @@ -208,12 +211,10 @@ define internal void @level1(i32 %C) { ; TUNIT-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[C]], 0 ; TUNIT-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; TUNIT: if.then: -; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* [[LOCAL]], align 4 -; TUNIT-NEXT: call void @level2a(i32 [[TMP0]]) #[[ATTR3]] +; TUNIT-NEXT: call void @level2a() #[[ATTR3]] ; TUNIT-NEXT: br label [[IF_END:%.*]] ; TUNIT: if.else: -; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOCAL]], align 4 -; TUNIT-NEXT: call void @level2b(i32 [[TMP1]]) #[[ATTR3]] +; TUNIT-NEXT: call void @level2b() #[[ATTR3]] ; TUNIT-NEXT: br label [[IF_END]] ; TUNIT: if.end: ; TUNIT-NEXT: call void @level2all_late(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR3]] @@ -262,7 +263,6 @@ define internal void @level2all_early(i32* %addr) { ; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 -; TUNIT-NEXT: store i32 17, i32* [[ADDR]], align 4 ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) @@ -282,14 +282,11 @@ entry: define internal void @level2a(i32* %addr) { ; TUNIT: Function Attrs: norecurse nosync nounwind ; TUNIT-LABEL: define {{[^@]+}}@level2a -; TUNIT-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1]] { +; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[ADDR_PRIV:%.*]] = alloca i32, align 4 -; TUNIT-NEXT: store i32 [[TMP0]], i32* [[ADDR_PRIV]], align 4 -; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 -; TUNIT-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; TUNIT-NEXT: [[QQQQ2:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4 -; TUNIT-NEXT: call void @use(i32 noundef [[TMP1]], i32 noundef [[TMP2]], i32 [[QQQQ2]]) #[[ATTR5]] +; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 +; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 +; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR5]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nosync nounwind @@ -313,14 +310,11 @@ entry: define internal void @level2b(i32* %addr) { ; TUNIT: Function Attrs: norecurse nosync nounwind ; TUNIT-LABEL: define {{[^@]+}}@level2b -; TUNIT-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1]] { +; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[ADDR_PRIV:%.*]] = alloca i32, align 4 -; TUNIT-NEXT: store i32 [[TMP0]], i32* [[ADDR_PRIV]], align 4 -; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 -; TUNIT-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; TUNIT-NEXT: [[TMP3:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4 -; TUNIT-NEXT: call void @use(i32 noundef [[TMP1]], i32 noundef [[TMP2]], i32 [[TMP3]]) #[[ATTR5]] +; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 +; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 +; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR5]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nosync nounwind @@ -342,13 +336,20 @@ entry: } define internal void @level2all_late(i32* %addr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) -; CHECK-LABEL: define {{[^@]+}}@level2all_late -; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; CHECK-NEXT: store i32 5, i32* [[ADDR]], align 4 -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) +; TUNIT-LABEL: define {{[^@]+}}@level2all_late +; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) +; CGSCC-LABEL: define {{[^@]+}}@level2all_late +; CGSCC-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 +; CGSCC-NEXT: store i32 5, i32* [[ADDR]], align 4 +; CGSCC-NEXT: ret void ; entry: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 diff --git a/llvm/test/Transforms/Attributor/value-simplify-reachability.ll b/llvm/test/Transforms/Attributor/value-simplify-reachability.ll index ec1baf4..e135351 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-reachability.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-reachability.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC @GInt1 = internal global i32 undef, align 4 @@ -711,8 +711,7 @@ define internal void @exclusion_set3_helper(i1 %c, ptr %p) { ; TUNIT-NEXT: call void @usei32(i32 [[USE2]]) ; TUNIT-NEXT: br label [[T]] ; TUNIT: m: -; TUNIT-NEXT: [[USE3:%.*]] = load i32, ptr [[P]], align 4 -; TUNIT-NEXT: call void @usei32(i32 [[USE3]]) +; TUNIT-NEXT: call void @usei32(i32 42) ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nosync diff --git a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll deleted file mode 100644 index add432d..0000000 --- a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll +++ /dev/null @@ -1,115 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT -; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC - -target triple = "amdgcn-amd-amdhsa" - -%struct.ident_t = type { i32, i32, i32, i32, i8* } - -@G = internal addrspace(3) global i32 undef, align 4 - -; Make sure we do not delete the stores to @G without also replacing the load with `1`. -;. -; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4 -;. -define void @kernel() "kernel" { -; TUNIT: Function Attrs: norecurse -; TUNIT-LABEL: define {{[^@]+}}@kernel -; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { -; TUNIT-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false) -; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1 -; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; TUNIT: if.then: -; TUNIT-NEXT: store i32 1, i32 addrspace(3)* @G, align 4 -; TUNIT-NEXT: br label [[IF_MERGE:%.*]] -; TUNIT: if.else: -; TUNIT-NEXT: call void @barrier() #[[ATTR4:[0-9]+]] -; TUNIT-NEXT: [[L:%.*]] = load i32, i32 addrspace(3)* @G, align 4 -; TUNIT-NEXT: call void @use1(i32 [[L]]) #[[ATTR4]] -; TUNIT-NEXT: br label [[IF_MERGE]] -; TUNIT: if.merge: -; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]] -; TUNIT: if.then2: -; TUNIT-NEXT: store i32 2, i32 addrspace(3)* @G, align 4 -; TUNIT-NEXT: call void @barrier() #[[ATTR4]] -; TUNIT-NEXT: br label [[IF_END]] -; TUNIT: if.end: -; TUNIT-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1) -; TUNIT-NEXT: ret void -; -; CGSCC: Function Attrs: norecurse -; CGSCC-LABEL: define {{[^@]+}}@kernel -; CGSCC-SAME: () #[[ATTR0:[0-9]+]] { -; CGSCC-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false) -; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1 -; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CGSCC: if.then: -; CGSCC-NEXT: store i32 1, i32 addrspace(3)* @G, align 4 -; CGSCC-NEXT: br label [[IF_MERGE:%.*]] -; CGSCC: if.else: -; CGSCC-NEXT: call void @barrier() -; CGSCC-NEXT: [[L:%.*]] = load i32, i32 addrspace(3)* @G, align 4 -; CGSCC-NEXT: call void @use1(i32 [[L]]) -; CGSCC-NEXT: br label [[IF_MERGE]] -; CGSCC: if.merge: -; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]] -; CGSCC: if.then2: -; CGSCC-NEXT: store i32 2, i32 addrspace(3)* @G, align 4 -; CGSCC-NEXT: call void @barrier() -; CGSCC-NEXT: br label [[IF_END]] -; CGSCC: if.end: -; CGSCC-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1) -; CGSCC-NEXT: ret void -; - %call = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false) - %cmp = icmp eq i32 %call, -1 - br i1 %cmp, label %if.then, label %if.else -if.then: - store i32 1, i32 addrspace(3)* @G - br label %if.merge -if.else: - call void @barrier(); - %l = load i32, i32 addrspace(3)* @G - call void @use1(i32 %l) - br label %if.merge -if.merge: - br i1 %cmp, label %if.then2, label %if.end -if.then2: - store i32 2, i32 addrspace(3)* @G - call void @barrier(); - br label %if.end -if.end: - call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1) - ret void -} - -declare void @barrier() norecurse nounwind nocallback -declare void @use1(i32) nosync norecurse nounwind nocallback -declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) nocallback -declare void @__kmpc_target_deinit(%struct.ident_t*, i8) nocallback - -!llvm.module.flags = !{!0, !1} -!nvvm.annotations = !{!2} - -!0 = !{i32 7, !"openmp", i32 50} -!1 = !{i32 7, !"openmp-device", i32 50} -!2 = !{void ()* @kernel, !"kernel", i32 1} - -;. -; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" } -; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nocallback norecurse nounwind } -; TUNIT: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nosync nounwind } -; TUNIT: attributes #[[ATTR3:[0-9]+]] = { nocallback } -; TUNIT: attributes #[[ATTR4]] = { nounwind } -;. -; CGSCC: attributes #[[ATTR0]] = { norecurse "kernel" } -; CGSCC: attributes #[[ATTR1:[0-9]+]] = { nocallback norecurse nounwind } -; CGSCC: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nosync nounwind } -; CGSCC: attributes #[[ATTR3:[0-9]+]] = { nocallback } -;. -; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META2:![0-9]+]] = !{void ()* @kernel, !"kernel", i32 1} -;. -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK: {{.*}} -- 2.7.4