From 4e0f464ce2d995d9ee8466729dd5f8c2239f3bab Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sat, 17 Dec 2022 15:10:51 -0800 Subject: [PATCH] Reapply "[OpenMP][FIX] Restrict more unsound assmptions about threading" This reverts commit 3b052558125cbedf18c2ddb65780b50d6f437d54. This patch got reverted due to an unrelated memory leak that has been fixed. --- llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 25 +- .../Transforms/Attributor/value-simplify-assume.ll | 2 +- .../Transforms/Attributor/value-simplify-gpu.ll | 77 +- .../Attributor/value-simplify-reachability.ll | 834 +++++++++++++++++++++ .../Transforms/OpenMP/value-simplify-openmp-opt.ll | 115 +++ 5 files changed, 1002 insertions(+), 51 deletions(-) create mode 100644 llvm/test/Transforms/Attributor/value-simplify-reachability.ll create mode 100644 llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 937f257..fb0e0a0 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1077,25 +1077,23 @@ struct AAPointerInfoImpl QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL); const auto *ExecDomainAA = A.lookupAAFor( IRPosition::function(Scope), &QueryingAA, DepClassTy::OPTIONAL); - const bool NoSync = NoSyncAA.isAssumedNoSync(); + bool AllInSameNoSyncFn = NoSyncAA.isAssumedNoSync(); // Helper to determine if we need to consider threading, which we cannot // right now. However, if the function is (assumed) nosync or the thread // executing all instructions is the main thread only we can ignore // threading. auto CanIgnoreThreading = [&](const Instruction &I) -> bool { - if (NoSync) - return true; if (ExecDomainAA && ExecDomainAA->isExecutedByInitialThreadOnly(I)) return true; return false; }; // Helper to determine if the access is executed by the same thread as the - // load, for now it is sufficient to avoid any potential threading effects - // as we cannot deal with them anyway. - auto IsSameThreadAsLoad = [&](const Access &Acc) -> bool { - return CanIgnoreThreading(*Acc.getLocalInst()); + // given instruction, for now it is sufficient to avoid any potential + // threading effects as we cannot deal with them anyway. + auto IsSameThreadAsInst = [&](const Access &Acc) -> bool { + return AllInSameNoSyncFn || CanIgnoreThreading(*Acc.getLocalInst()); }; // TODO: Use inter-procedural reachability and dominance. @@ -1172,10 +1170,14 @@ struct AAPointerInfoImpl if (FindInterferingWrites && Dominates) HasBeenWrittenTo = true; + // Track if all interesting accesses are in the same `nosync` function as + // the given instruction. + AllInSameNoSyncFn &= Acc.getRemoteInst()->getFunction() == &Scope; + // For now we only filter accesses based on CFG reasoning which does not // work yet if we have threading effects, or the access is complicated. if (CanUseCFGResoning && Dominates && UseDominanceReasoning && - IsSameThreadAsLoad(Acc)) + IsSameThreadAsInst(Acc)) DominatingWrites.insert(&Acc); InterferingAccesses.push_back({&Acc, Exact}); @@ -1188,6 +1190,8 @@ struct AAPointerInfoImpl // the worst case quadratic as we are looking for another write that will // hide the effect of this one. auto CanSkipAccess = [&](const Access &Acc, bool Exact) { + if (!IsSameThreadAsInst(Acc)) + return false; if ((!Acc.isWriteOrAssumption() || !AA::isPotentiallyReachable(A, *Acc.getLocalInst(), I, QueryingAA, IsLiveInCalleeCB)) && @@ -1198,8 +1202,6 @@ struct AAPointerInfoImpl if (!DT || !UseDominanceReasoning) return false; - if (!IsSameThreadAsLoad(Acc)) - return false; if (!DominatingWrites.count(&Acc)) return false; for (const Access *DomAcc : DominatingWrites) { @@ -1219,7 +1221,8 @@ struct AAPointerInfoImpl // succeeded for all or not. unsigned NumInterferingAccesses = InterferingAccesses.size(); for (auto &It : InterferingAccesses) { - if (!CanUseCFGResoning || NumInterferingAccesses > MaxInterferingAccesses || + if (!AllInSameNoSyncFn || + NumInterferingAccesses > MaxInterferingAccesses || !CanSkipAccess(*It.first, It.second)) { if (!UserCB(*It.first, It.second)) return false; diff --git a/llvm/test/Transforms/Attributor/value-simplify-assume.ll b/llvm/test/Transforms/Attributor/value-simplify-assume.ll index aed4441..24eb89c 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-assume.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-assume.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC @Gstatic_int1 = internal global i32 zeroinitializer, align 4 diff --git a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll index 7ae99e4..6e6113e 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll @@ -1,9 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC target triple = "amdgcn-amd-amdhsa" +%struct.ident_t = type { i32, i32, i32, i32, i8* } @ReachableKernel = internal addrspace(3) global i32 3, align 4 @UnreachableKernel = internal addrspace(3) global i32 42, align 4 @ReachableKernelAS0 = internal global i32 7, align 4 @@ -110,7 +111,8 @@ define internal void @level2Kernela() { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4 ; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4 -; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR5:[0-9]+]] +; TUNIT-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 +; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef [[TMP2]]) #[[ATTR5:[0-9]+]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nosync nounwind @@ -138,7 +140,8 @@ define internal void @level2Kernelb() { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4 ; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4 -; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR5]] +; TUNIT-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 +; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef [[TMP2]]) #[[ATTR5]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nosync nounwind @@ -160,18 +163,12 @@ entry: } define internal void @level2Kernelall_late() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) -; TUNIT-LABEL: define {{[^@]+}}@level2Kernelall_late -; TUNIT-SAME: () #[[ATTR2]] { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: ret void -; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) -; CGSCC-LABEL: define {{[^@]+}}@level2Kernelall_late -; CGSCC-SAME: () #[[ATTR2]] { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 -; CGSCC-NEXT: ret void +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) +; CHECK-LABEL: define {{[^@]+}}@level2Kernelall_late +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 +; CHECK-NEXT: ret void ; entry: store i32 1, i32 *addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 @@ -211,10 +208,12 @@ define internal void @level1(i32 %C) { ; TUNIT-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[C]], 0 ; TUNIT-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; TUNIT: if.then: -; TUNIT-NEXT: call void @level2a() #[[ATTR3]] +; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* [[LOCAL]], align 4 +; TUNIT-NEXT: call void @level2a(i32 [[TMP0]]) #[[ATTR3]] ; TUNIT-NEXT: br label [[IF_END:%.*]] ; TUNIT: if.else: -; TUNIT-NEXT: call void @level2b() #[[ATTR3]] +; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOCAL]], align 4 +; TUNIT-NEXT: call void @level2b(i32 [[TMP1]]) #[[ATTR3]] ; TUNIT-NEXT: br label [[IF_END]] ; TUNIT: if.end: ; TUNIT-NEXT: call void @level2all_late(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR3]] @@ -263,6 +262,7 @@ define internal void @level2all_early(i32* %addr) { ; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 +; TUNIT-NEXT: store i32 17, i32* [[ADDR]], align 4 ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) @@ -282,11 +282,14 @@ entry: define internal void @level2a(i32* %addr) { ; TUNIT: Function Attrs: norecurse nosync nounwind ; TUNIT-LABEL: define {{[^@]+}}@level2a -; TUNIT-SAME: () #[[ATTR1]] { +; TUNIT-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 -; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR5]] +; TUNIT-NEXT: [[ADDR_PRIV:%.*]] = alloca i32, align 4 +; TUNIT-NEXT: store i32 [[TMP0]], i32* [[ADDR_PRIV]], align 4 +; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 +; TUNIT-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 +; TUNIT-NEXT: [[QQQQ2:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4 +; TUNIT-NEXT: call void @use(i32 noundef [[TMP1]], i32 noundef [[TMP2]], i32 [[QQQQ2]]) #[[ATTR5]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nosync nounwind @@ -310,11 +313,14 @@ entry: define internal void @level2b(i32* %addr) { ; TUNIT: Function Attrs: norecurse nosync nounwind ; TUNIT-LABEL: define {{[^@]+}}@level2b -; TUNIT-SAME: () #[[ATTR1]] { +; TUNIT-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 -; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR5]] +; TUNIT-NEXT: [[ADDR_PRIV:%.*]] = alloca i32, align 4 +; TUNIT-NEXT: store i32 [[TMP0]], i32* [[ADDR_PRIV]], align 4 +; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 +; TUNIT-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 +; TUNIT-NEXT: [[TMP3:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4 +; TUNIT-NEXT: call void @use(i32 noundef [[TMP1]], i32 noundef [[TMP2]], i32 [[TMP3]]) #[[ATTR5]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nosync nounwind @@ -336,20 +342,13 @@ entry: } define internal void @level2all_late(i32* %addr) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) -; TUNIT-LABEL: define {{[^@]+}}@level2all_late -; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; TUNIT-NEXT: ret void -; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) -; CGSCC-LABEL: define {{[^@]+}}@level2all_late -; CGSCC-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; CGSCC-NEXT: store i32 5, i32* [[ADDR]], align 4 -; CGSCC-NEXT: ret void +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) +; CHECK-LABEL: define {{[^@]+}}@level2all_late +; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 +; CHECK-NEXT: store i32 5, i32* [[ADDR]], align 4 +; CHECK-NEXT: ret void ; entry: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 diff --git a/llvm/test/Transforms/Attributor/value-simplify-reachability.ll b/llvm/test/Transforms/Attributor/value-simplify-reachability.ll new file mode 100644 index 0000000..b505ac0 --- /dev/null +++ b/llvm/test/Transforms/Attributor/value-simplify-reachability.ll @@ -0,0 +1,834 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC + +@GInt1 = internal global i32 undef, align 4 +@GInt2 = internal global i32 zeroinitializer, align 4 +@GInt3 = internal global i32 undef, align 4 +@GInt4 = internal global i32 zeroinitializer, align 4 +@GInt5 = internal global i32 undef, align 4 + +declare void @llvm.assume(i1) +declare void @useI32(i32) nosync nocallback +declare void @free(ptr) allockind("free") "alloc-family"="malloc" +declare noalias ptr @calloc(i64, i64) allockind("alloc,zeroed") allocsize(0, 1) "alloc-family"="malloc" + +;. +; CHECK: @[[GINT1:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 undef, align 4 +; CHECK: @[[GINT2:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 0, align 4 +; CHECK: @[[GINT3:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 undef, align 4 +; CHECK: @[[GINT4:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 0, align 4 +; CHECK: @[[GINT5:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 undef, align 4 +;. +define internal void @write1ToGInt1() { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) +; CHECK-LABEL: define {{[^@]+}}@write1ToGInt1 +; CHECK-SAME: () #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: store i32 1, ptr @GInt1, align 4 +; CHECK-NEXT: ret void +; + store i32 1, ptr @GInt1 + ret void +} + +define internal void @write1ToGInt2() { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) +; CHECK-LABEL: define {{[^@]+}}@write1ToGInt2 +; CHECK-SAME: () #[[ATTR4]] { +; CHECK-NEXT: store i32 1, ptr @GInt2, align 4 +; CHECK-NEXT: ret void +; + store i32 1, ptr @GInt2 + ret void +} + +define void @entry1(i1 %c, i32 %v) { +; TUNIT: Function Attrs: norecurse nosync +; TUNIT-LABEL: define {{[^@]+}}@entry1 +; TUNIT-SAME: (i1 [[C:%.*]], i32 [[V:%.*]]) #[[ATTR5:[0-9]+]] { +; TUNIT-NEXT: [[L0:%.*]] = load i32, ptr @GInt1, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L0]]) +; TUNIT-NEXT: call void @write1ToGInt1() #[[ATTR10:[0-9]+]] +; TUNIT-NEXT: [[L1:%.*]] = load i32, ptr @GInt1, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L1]]) +; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; TUNIT: T: +; TUNIT-NEXT: store i32 [[V]], ptr @GInt1, align 4 +; TUNIT-NEXT: [[L2:%.*]] = load i32, ptr @GInt1, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L2]]) +; TUNIT-NEXT: br label [[F]] +; TUNIT: F: +; TUNIT-NEXT: [[L3:%.*]] = load i32, ptr @GInt1, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L3]]) +; TUNIT-NEXT: call void @write1ToGInt1() #[[ATTR10]] +; TUNIT-NEXT: [[L4:%.*]] = load i32, ptr @GInt1, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L4]]) +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: nosync +; CGSCC-LABEL: define {{[^@]+}}@entry1 +; CGSCC-SAME: (i1 [[C:%.*]], i32 [[V:%.*]]) #[[ATTR5:[0-9]+]] { +; CGSCC-NEXT: [[L0:%.*]] = load i32, ptr @GInt1, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L0]]) +; CGSCC-NEXT: call void @write1ToGInt1() #[[ATTR10:[0-9]+]] +; CGSCC-NEXT: [[L1:%.*]] = load i32, ptr @GInt1, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L1]]) +; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CGSCC: T: +; CGSCC-NEXT: store i32 [[V]], ptr @GInt1, align 4 +; CGSCC-NEXT: [[L2:%.*]] = load i32, ptr @GInt1, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L2]]) +; CGSCC-NEXT: br label [[F]] +; CGSCC: F: +; CGSCC-NEXT: [[L3:%.*]] = load i32, ptr @GInt1, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L3]]) +; CGSCC-NEXT: call void @write1ToGInt1() #[[ATTR10]] +; CGSCC-NEXT: [[L4:%.*]] = load i32, ptr @GInt1, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L4]]) +; CGSCC-NEXT: ret void +; + %l0 = load i32, ptr @GInt1 + call void @useI32(i32 %l0) + call void @write1ToGInt1(); + %l1 = load i32, ptr @GInt1 + call void @useI32(i32 %l1) + br i1 %c, label %T, label %F +T: + store i32 %v, ptr @GInt1 + %l2 = load i32, ptr @GInt1 + call void @useI32(i32 %l2) + br label %F +F: + %l3 = load i32, ptr @GInt1 + call void @useI32(i32 %l3) + call void @write1ToGInt1(); + %l4 = load i32, ptr @GInt1 + call void @useI32(i32 %l4) + ret void +} + +define void @entry2(i1 %c, i32 %v) { +; TUNIT: Function Attrs: norecurse nosync +; TUNIT-LABEL: define {{[^@]+}}@entry2 +; TUNIT-SAME: (i1 [[C:%.*]], i32 [[V:%.*]]) #[[ATTR5]] { +; TUNIT-NEXT: [[L0:%.*]] = load i32, ptr @GInt2, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L0]]) +; TUNIT-NEXT: call void @write1ToGInt2() #[[ATTR10]] +; TUNIT-NEXT: [[L1:%.*]] = load i32, ptr @GInt2, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L1]]) +; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; TUNIT: T: +; TUNIT-NEXT: store i32 [[V]], ptr @GInt2, align 4 +; TUNIT-NEXT: [[L2:%.*]] = load i32, ptr @GInt2, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L2]]) +; TUNIT-NEXT: br label [[F]] +; TUNIT: F: +; TUNIT-NEXT: [[L3:%.*]] = load i32, ptr @GInt2, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L3]]) +; TUNIT-NEXT: call void @write1ToGInt2() #[[ATTR10]] +; TUNIT-NEXT: [[L4:%.*]] = load i32, ptr @GInt2, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L4]]) +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: nosync +; CGSCC-LABEL: define {{[^@]+}}@entry2 +; CGSCC-SAME: (i1 [[C:%.*]], i32 [[V:%.*]]) #[[ATTR5]] { +; CGSCC-NEXT: [[L0:%.*]] = load i32, ptr @GInt2, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L0]]) +; CGSCC-NEXT: call void @write1ToGInt2() #[[ATTR10]] +; CGSCC-NEXT: [[L1:%.*]] = load i32, ptr @GInt2, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L1]]) +; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CGSCC: T: +; CGSCC-NEXT: store i32 [[V]], ptr @GInt2, align 4 +; CGSCC-NEXT: [[L2:%.*]] = load i32, ptr @GInt2, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L2]]) +; CGSCC-NEXT: br label [[F]] +; CGSCC: F: +; CGSCC-NEXT: [[L3:%.*]] = load i32, ptr @GInt2, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L3]]) +; CGSCC-NEXT: call void @write1ToGInt2() #[[ATTR10]] +; CGSCC-NEXT: [[L4:%.*]] = load i32, ptr @GInt2, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L4]]) +; CGSCC-NEXT: ret void +; + %l0 = load i32, ptr @GInt2 + call void @useI32(i32 %l0) + call void @write1ToGInt2(); + %l1 = load i32, ptr @GInt2 + call void @useI32(i32 %l1) + br i1 %c, label %T, label %F +T: + store i32 %v, ptr @GInt2 + %l2 = load i32, ptr @GInt2 + call void @useI32(i32 %l2) + br label %F +F: + %l3 = load i32, ptr @GInt2 + call void @useI32(i32 %l3) + call void @write1ToGInt2(); + %l4 = load i32, ptr @GInt2 + call void @useI32(i32 %l4) + ret void +} +define void @entry3(i1 %c, i32 %v) { +; TUNIT: Function Attrs: norecurse nosync +; TUNIT-LABEL: define {{[^@]+}}@entry3 +; TUNIT-SAME: (i1 [[C:%.*]], i32 [[V:%.*]]) #[[ATTR5]] { +; TUNIT-NEXT: [[L0:%.*]] = load i32, ptr @GInt3, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L0]]) +; TUNIT-NEXT: store i32 1, ptr @GInt3, align 4 +; TUNIT-NEXT: [[L1:%.*]] = load i32, ptr @GInt3, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L1]]) +; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; TUNIT: T: +; TUNIT-NEXT: store i32 [[V]], ptr @GInt3, align 4 +; TUNIT-NEXT: [[L2:%.*]] = load i32, ptr @GInt3, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L2]]) +; TUNIT-NEXT: br label [[F]] +; TUNIT: F: +; TUNIT-NEXT: [[L3:%.*]] = load i32, ptr @GInt3, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L3]]) +; TUNIT-NEXT: store i32 1, ptr @GInt3, align 4 +; TUNIT-NEXT: [[L4:%.*]] = load i32, ptr @GInt3, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L4]]) +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: norecurse nosync +; CGSCC-LABEL: define {{[^@]+}}@entry3 +; CGSCC-SAME: (i1 [[C:%.*]], i32 [[V:%.*]]) #[[ATTR6:[0-9]+]] { +; CGSCC-NEXT: [[L0:%.*]] = load i32, ptr @GInt3, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L0]]) +; CGSCC-NEXT: store i32 1, ptr @GInt3, align 4 +; CGSCC-NEXT: [[L1:%.*]] = load i32, ptr @GInt3, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L1]]) +; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CGSCC: T: +; CGSCC-NEXT: store i32 [[V]], ptr @GInt3, align 4 +; CGSCC-NEXT: [[L2:%.*]] = load i32, ptr @GInt3, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L2]]) +; CGSCC-NEXT: br label [[F]] +; CGSCC: F: +; CGSCC-NEXT: [[L3:%.*]] = load i32, ptr @GInt3, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L3]]) +; CGSCC-NEXT: store i32 1, ptr @GInt3, align 4 +; CGSCC-NEXT: [[L4:%.*]] = load i32, ptr @GInt3, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L4]]) +; CGSCC-NEXT: ret void +; + %l0 = load i32, ptr @GInt3 + call void @useI32(i32 %l0) + store i32 1, ptr @GInt3 + %l1 = load i32, ptr @GInt3 + call void @useI32(i32 %l1) + br i1 %c, label %T, label %F +T: + store i32 %v, ptr @GInt3 + %l2 = load i32, ptr @GInt3 + call void @useI32(i32 %l2) + br label %F +F: + %l3 = load i32, ptr @GInt3 + call void @useI32(i32 %l3) + store i32 1, ptr @GInt3 + %l4 = load i32, ptr @GInt3 + call void @useI32(i32 %l4) + ret void +} + +define void @entry4(i1 %c, i32 %v) { +; TUNIT: Function Attrs: norecurse nosync +; TUNIT-LABEL: define {{[^@]+}}@entry4 +; TUNIT-SAME: (i1 [[C:%.*]], i32 [[V:%.*]]) #[[ATTR5]] { +; TUNIT-NEXT: [[L0:%.*]] = load i32, ptr @GInt4, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L0]]) +; TUNIT-NEXT: store i32 1, ptr @GInt4, align 4 +; TUNIT-NEXT: [[L1:%.*]] = load i32, ptr @GInt4, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L1]]) +; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; TUNIT: T: +; TUNIT-NEXT: store i32 [[V]], ptr @GInt4, align 4 +; TUNIT-NEXT: [[L2:%.*]] = load i32, ptr @GInt4, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L2]]) +; TUNIT-NEXT: br label [[F]] +; TUNIT: F: +; TUNIT-NEXT: [[L3:%.*]] = load i32, ptr @GInt4, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L3]]) +; TUNIT-NEXT: store i32 1, ptr @GInt4, align 4 +; TUNIT-NEXT: [[L4:%.*]] = load i32, ptr @GInt4, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L4]]) +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: norecurse nosync +; CGSCC-LABEL: define {{[^@]+}}@entry4 +; CGSCC-SAME: (i1 [[C:%.*]], i32 [[V:%.*]]) #[[ATTR6]] { +; CGSCC-NEXT: [[L0:%.*]] = load i32, ptr @GInt4, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L0]]) +; CGSCC-NEXT: store i32 1, ptr @GInt4, align 4 +; CGSCC-NEXT: [[L1:%.*]] = load i32, ptr @GInt4, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L1]]) +; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CGSCC: T: +; CGSCC-NEXT: store i32 [[V]], ptr @GInt4, align 4 +; CGSCC-NEXT: [[L2:%.*]] = load i32, ptr @GInt4, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L2]]) +; CGSCC-NEXT: br label [[F]] +; CGSCC: F: +; CGSCC-NEXT: [[L3:%.*]] = load i32, ptr @GInt4, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L3]]) +; CGSCC-NEXT: store i32 1, ptr @GInt4, align 4 +; CGSCC-NEXT: [[L4:%.*]] = load i32, ptr @GInt4, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L4]]) +; CGSCC-NEXT: ret void +; + %l0 = load i32, ptr @GInt4 + call void @useI32(i32 %l0) + store i32 1, ptr @GInt4 + %l1 = load i32, ptr @GInt4 + call void @useI32(i32 %l1) + br i1 %c, label %T, label %F +T: + store i32 %v, ptr @GInt4 + %l2 = load i32, ptr @GInt4 + call void @useI32(i32 %l2) + br label %F +F: + %l3 = load i32, ptr @GInt4 + call void @useI32(i32 %l3) + store i32 1, ptr @GInt4 + %l4 = load i32, ptr @GInt4 + call void @useI32(i32 %l4) + ret void +} + +; TODO: In this test we can replace %l0, in the others above we cannot. +define void @entry5(i1 %c, i32 %v) { +; TUNIT: Function Attrs: norecurse nosync +; TUNIT-LABEL: define {{[^@]+}}@entry5 +; TUNIT-SAME: (i1 [[C:%.*]], i32 [[V:%.*]]) #[[ATTR5]] { +; TUNIT-NEXT: [[L0:%.*]] = load i32, ptr @GInt5, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L0]]) +; TUNIT-NEXT: store i32 1, ptr @GInt5, align 4 +; TUNIT-NEXT: [[L1:%.*]] = load i32, ptr @GInt5, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L1]]) #[[ATTR6:[0-9]+]] +; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; TUNIT: T: +; TUNIT-NEXT: store i32 [[V]], ptr @GInt5, align 4 +; TUNIT-NEXT: [[L2:%.*]] = load i32, ptr @GInt5, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L2]]) #[[ATTR6]] +; TUNIT-NEXT: br label [[F]] +; TUNIT: F: +; TUNIT-NEXT: [[L3:%.*]] = load i32, ptr @GInt5, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L3]]) #[[ATTR6]] +; TUNIT-NEXT: store i32 1, ptr @GInt5, align 4 +; TUNIT-NEXT: [[L4:%.*]] = load i32, ptr @GInt5, align 4 +; TUNIT-NEXT: call void @useI32(i32 [[L4]]) #[[ATTR6]] +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: norecurse nosync +; CGSCC-LABEL: define {{[^@]+}}@entry5 +; CGSCC-SAME: (i1 [[C:%.*]], i32 [[V:%.*]]) #[[ATTR6]] { +; CGSCC-NEXT: [[L0:%.*]] = load i32, ptr @GInt5, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L0]]) +; CGSCC-NEXT: store i32 1, ptr @GInt5, align 4 +; CGSCC-NEXT: [[L1:%.*]] = load i32, ptr @GInt5, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L1]]) #[[ATTR7:[0-9]+]] +; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CGSCC: T: +; CGSCC-NEXT: store i32 [[V]], ptr @GInt5, align 4 +; CGSCC-NEXT: [[L2:%.*]] = load i32, ptr @GInt5, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L2]]) #[[ATTR7]] +; CGSCC-NEXT: br label [[F]] +; CGSCC: F: +; CGSCC-NEXT: [[L3:%.*]] = load i32, ptr @GInt5, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L3]]) #[[ATTR7]] +; CGSCC-NEXT: store i32 1, ptr @GInt5, align 4 +; CGSCC-NEXT: [[L4:%.*]] = load i32, ptr @GInt5, align 4 +; CGSCC-NEXT: call void @useI32(i32 [[L4]]) #[[ATTR7]] +; CGSCC-NEXT: ret void +; + %l0 = load i32, ptr @GInt5 + call void @useI32(i32 %l0) + store i32 1, ptr @GInt5 + %l1 = load i32, ptr @GInt5 + call void @useI32(i32 %l1) nocallback + br i1 %c, label %T, label %F +T: + store i32 %v, ptr @GInt5 + %l2 = load i32, ptr @GInt5 + call void @useI32(i32 %l2) nocallback + br label %F +F: + %l3 = load i32, ptr @GInt5 + call void @useI32(i32 %l3) nocallback + store i32 1, ptr @GInt5 + %l4 = load i32, ptr @GInt5 + call void @useI32(i32 %l4) nocallback + ret void +} + + +declare void @use_4_i8(i8, i8, i8, i8) nocallback + +define void @exclusion_set1(i1 %c1, i1 %c2, i1 %c3) { +; CHECK-LABEL: define {{[^@]+}}@exclusion_set1 +; CHECK-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL_H2S:%.*]] = alloca i8, i64 4, align 1 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[CALL_H2S]], i8 0, i64 4, i1 false) +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[CALL_H2S]], i64 1 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[CALL_H2S]], i64 2 +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, ptr [[CALL_H2S]], i64 3 +; CHECK-NEXT: [[L0_A:%.*]] = load i8, ptr [[CALL_H2S]], align 1 +; CHECK-NEXT: [[L1_A:%.*]] = load i8, ptr [[GEP1]], align 1 +; CHECK-NEXT: [[L2_A:%.*]] = load i8, ptr [[GEP2]], align 1 +; CHECK-NEXT: [[L3_A:%.*]] = load i8, ptr [[GEP3]], align 1 +; CHECK-NEXT: call void @use_4_i8(i8 noundef [[L0_A]], i8 noundef [[L1_A]], i8 noundef [[L2_A]], i8 noundef [[L3_A]]) +; CHECK-NEXT: store i8 1, ptr [[CALL_H2S]], align 4 +; CHECK-NEXT: [[L1_B:%.*]] = load i8, ptr [[GEP1]], align 1 +; CHECK-NEXT: [[L2_B:%.*]] = load i8, ptr [[GEP2]], align 1 +; CHECK-NEXT: [[L3_B:%.*]] = load i8, ptr [[GEP3]], align 1 +; CHECK-NEXT: call void @use_4_i8(i8 noundef 1, i8 noundef [[L1_B]], i8 noundef [[L2_B]], i8 noundef [[L3_B]]) +; CHECK-NEXT: br i1 [[C1]], label [[IF_MERGE1:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[L1_C:%.*]] = load i8, ptr [[GEP1]], align 1 +; CHECK-NEXT: [[L2_C:%.*]] = load i8, ptr [[GEP2]], align 1 +; CHECK-NEXT: [[L3_C:%.*]] = load i8, ptr [[GEP3]], align 1 +; CHECK-NEXT: call void @use_4_i8(i8 noundef 1, i8 noundef [[L1_C]], i8 noundef [[L2_C]], i8 noundef [[L3_C]]) +; CHECK-NEXT: store i8 2, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[L2_D:%.*]] = load i8, ptr [[GEP2]], align 1 +; CHECK-NEXT: [[L3_D:%.*]] = load i8, ptr [[GEP3]], align 1 +; CHECK-NEXT: call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef [[L2_D]], i8 noundef [[L3_D]]) +; CHECK-NEXT: br i1 [[C1]], label [[IF_MERGE1]], label [[IF_THEN2:%.*]] +; CHECK: if.then2: +; CHECK-NEXT: [[L2_E:%.*]] = load i8, ptr [[GEP2]], align 1 +; CHECK-NEXT: [[L3_E:%.*]] = load i8, ptr [[GEP3]], align 1 +; CHECK-NEXT: call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef [[L2_E]], i8 noundef [[L3_E]]) +; CHECK-NEXT: store i8 3, ptr [[GEP2]], align 4 +; CHECK-NEXT: [[L3_F:%.*]] = load i8, ptr [[GEP3]], align 1 +; CHECK-NEXT: call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef 3, i8 noundef [[L3_F]]) +; CHECK-NEXT: br i1 [[C2]], label [[IF_MERGE2:%.*]], label [[IF_THEN3:%.*]] +; CHECK: if.merge1: +; CHECK-NEXT: [[L1_G:%.*]] = load i8, ptr [[GEP1]], align 1 +; CHECK-NEXT: [[L2_G:%.*]] = load i8, ptr [[GEP2]], align 1 +; CHECK-NEXT: [[L3_G:%.*]] = load i8, ptr [[GEP3]], align 1 +; CHECK-NEXT: call void @use_4_i8(i8 noundef 1, i8 noundef [[L1_G]], i8 noundef [[L2_G]], i8 noundef [[L3_G]]) +; CHECK-NEXT: br label [[IF_MERGE2]] +; CHECK: if.merge2: +; CHECK-NEXT: [[L1_H:%.*]] = load i8, ptr [[GEP1]], align 1 +; CHECK-NEXT: [[L2_H:%.*]] = load i8, ptr [[GEP2]], align 1 +; CHECK-NEXT: [[L3_H:%.*]] = load i8, ptr [[GEP3]], align 1 +; CHECK-NEXT: call void @use_4_i8(i8 noundef 1, i8 noundef [[L1_H]], i8 noundef [[L2_H]], i8 noundef [[L3_H]]) +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.then3: +; CHECK-NEXT: [[L3_I:%.*]] = load i8, ptr [[GEP3]], align 1 +; CHECK-NEXT: call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef 3, i8 noundef [[L3_I]]) +; CHECK-NEXT: store i8 4, ptr [[GEP3]], align 4 +; CHECK-NEXT: call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef 3, i8 noundef 4) +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[L1_K:%.*]] = load i8, ptr [[GEP1]], align 1 +; CHECK-NEXT: [[L2_K:%.*]] = load i8, ptr [[GEP2]], align 1 +; CHECK-NEXT: [[L3_K:%.*]] = load i8, ptr [[GEP3]], align 1 +; CHECK-NEXT: call void @use_4_i8(i8 noundef 1, i8 noundef [[L1_K]], i8 noundef [[L2_K]], i8 noundef [[L3_K]]) +; CHECK-NEXT: ret void +; +entry: + %call = call noalias i8* @calloc(i64 1, i64 4) norecurse + %gep0 = getelementptr inbounds i8, i8* %call, i64 0 + %gep1 = getelementptr inbounds i8, i8* %call, i64 1 + %gep2 = getelementptr inbounds i8, i8* %call, i64 2 + %gep3 = getelementptr inbounds i8, i8* %call, i64 3 + + %l0_a = load i8, i8* %gep0 + %l1_a = load i8, i8* %gep1 + %l2_a = load i8, i8* %gep2 + %l3_a = load i8, i8* %gep3 + call void @use_4_i8(i8 %l0_a, i8 %l1_a, i8 %l2_a, i8 %l3_a) + + store i8 1, i8* %gep0, align 4 + + %l0_b = load i8, i8* %gep0 + %l1_b = load i8, i8* %gep1 + %l2_b = load i8, i8* %gep2 + %l3_b = load i8, i8* %gep3 + call void @use_4_i8(i8 %l0_b, i8 %l1_b, i8 %l2_b, i8 %l3_b) + + br i1 %c1, label %if.merge1, label %if.then + +if.then: + %l0_c = load i8, i8* %gep0 + %l1_c = load i8, i8* %gep1 + %l2_c = load i8, i8* %gep2 + %l3_c = load i8, i8* %gep3 + call void @use_4_i8(i8 %l0_c, i8 %l1_c, i8 %l2_c, i8 %l3_c) + + store i8 2, i8* %gep1, align 4 + + %l0_d = load i8, i8* %gep0 + %l1_d = load i8, i8* %gep1 + %l2_d = load i8, i8* %gep2 + %l3_d = load i8, i8* %gep3 + call void @use_4_i8(i8 %l0_d, i8 %l1_d, i8 %l2_d, i8 %l3_d) + + br i1 %c1, label %if.merge1, label %if.then2 + +if.then2: + %l0_e = load i8, i8* %gep0 + %l1_e = load i8, i8* %gep1 + %l2_e = load i8, i8* %gep2 + %l3_e = load i8, i8* %gep3 + call void @use_4_i8(i8 %l0_e, i8 %l1_e, i8 %l2_e, i8 %l3_e) + + store i8 3, i8* %gep2, align 4 + + %l0_f = load i8, i8* %gep0 + %l1_f = load i8, i8* %gep1 + %l2_f = load i8, i8* %gep2 + %l3_f = load i8, i8* %gep3 + call void @use_4_i8(i8 %l0_f, i8 %l1_f, i8 %l2_f, i8 %l3_f) + + br i1 %c2, label %if.merge2, label %if.then3 + +if.merge1: + + %l0_g = load i8, i8* %gep0 + %l1_g = load i8, i8* %gep1 + %l2_g = load i8, i8* %gep2 + %l3_g = load i8, i8* %gep3 + call void @use_4_i8(i8 %l0_g, i8 %l1_g, i8 %l2_g, i8 %l3_g) + + br label %if.merge2 + +if.merge2: + + %l0_h = load i8, i8* %gep0 + %l1_h = load i8, i8* %gep1 + %l2_h = load i8, i8* %gep2 + %l3_h = load i8, i8* %gep3 + call void @use_4_i8(i8 %l0_h, i8 %l1_h, i8 %l2_h, i8 %l3_h) + + br label %if.end + +if.then3: + + %l0_i = load i8, i8* %gep0 + %l1_i = load i8, i8* %gep1 + %l2_i = load i8, i8* %gep2 + %l3_i = load i8, i8* %gep3 + call void @use_4_i8(i8 %l0_i, i8 %l1_i, i8 %l2_i, i8 %l3_i) + + store i8 4, i8* %gep3, align 4 + + %l0_j = load i8, i8* %gep0 + %l1_j = load i8, i8* %gep1 + %l2_j = load i8, i8* %gep2 + %l3_j = load i8, i8* %gep3 + call void @use_4_i8(i8 %l0_j, i8 %l1_j, i8 %l2_j, i8 %l3_j) + + br label %if.end + +if.end: + %l0_k = load i8, i8* %gep0 + %l1_k = load i8, i8* %gep1 + %l2_k = load i8, i8* %gep2 + %l3_k = load i8, i8* %gep3 + call void @use_4_i8(i8 %l0_k, i8 %l1_k, i8 %l2_k, i8 %l3_k) + + call void @free(i8* %call) norecurse + ret void +} + +define void @exclusion_set2(i1 %c1, i1 %c2, i1 %c3) { +; TUNIT: Function Attrs: norecurse +; TUNIT-LABEL: define {{[^@]+}}@exclusion_set2 +; TUNIT-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) #[[ATTR7:[0-9]+]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: call void @use_4_i8(i8 1, i8 2, i8 3, i8 4) +; TUNIT-NEXT: call void @use_4_i8(i8 noundef 1, i8 2, i8 3, i8 4) +; TUNIT-NEXT: br i1 [[C1]], label [[IF_MERGE1:%.*]], label [[IF_THEN:%.*]] +; TUNIT: if.then: +; TUNIT-NEXT: call void @use_4_i8(i8 noundef 1, i8 2, i8 3, i8 4) +; TUNIT-NEXT: call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 3, i8 4) +; TUNIT-NEXT: br i1 [[C1]], label [[IF_MERGE1]], label [[IF_THEN2:%.*]] +; TUNIT: if.then2: +; TUNIT-NEXT: call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 3, i8 4) +; TUNIT-NEXT: call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef 3, i8 4) +; TUNIT-NEXT: br i1 [[C2]], label [[IF_MERGE2:%.*]], label [[IF_THEN3:%.*]] +; TUNIT: if.merge1: +; TUNIT-NEXT: call void @use_4_i8(i8 noundef 1, i8 2, i8 3, i8 4) +; TUNIT-NEXT: br label [[IF_MERGE2]] +; TUNIT: if.merge2: +; TUNIT-NEXT: call void @use_4_i8(i8 noundef 1, i8 2, i8 3, i8 4) +; TUNIT-NEXT: br label [[IF_END:%.*]] +; TUNIT: if.then3: +; TUNIT-NEXT: call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef 3, i8 4) +; TUNIT-NEXT: call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef 3, i8 noundef 4) +; TUNIT-NEXT: br label [[IF_END]] +; TUNIT: if.end: +; TUNIT-NEXT: call void @use_4_i8(i8 noundef 1, i8 2, i8 3, i8 4) +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: norecurse +; CGSCC-LABEL: define {{[^@]+}}@exclusion_set2 +; CGSCC-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) #[[ATTR8:[0-9]+]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: call void @use_4_i8(i8 1, i8 2, i8 3, i8 4) +; CGSCC-NEXT: call void @use_4_i8(i8 noundef 1, i8 2, i8 3, i8 4) +; CGSCC-NEXT: br i1 [[C1]], label [[IF_MERGE1:%.*]], label [[IF_THEN:%.*]] +; CGSCC: if.then: +; CGSCC-NEXT: call void @use_4_i8(i8 noundef 1, i8 2, i8 3, i8 4) +; CGSCC-NEXT: call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 3, i8 4) +; CGSCC-NEXT: br i1 [[C1]], label [[IF_MERGE1]], label [[IF_THEN2:%.*]] +; CGSCC: if.then2: +; CGSCC-NEXT: call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 3, i8 4) +; CGSCC-NEXT: call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef 3, i8 4) +; CGSCC-NEXT: br i1 [[C2]], label [[IF_MERGE2:%.*]], label [[IF_THEN3:%.*]] +; CGSCC: if.merge1: +; CGSCC-NEXT: call void @use_4_i8(i8 noundef 1, i8 2, i8 3, i8 4) +; CGSCC-NEXT: br label [[IF_MERGE2]] +; CGSCC: if.merge2: +; CGSCC-NEXT: call void @use_4_i8(i8 noundef 1, i8 2, i8 3, i8 4) +; CGSCC-NEXT: br label [[IF_END:%.*]] +; CGSCC: if.then3: +; CGSCC-NEXT: call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef 3, i8 4) +; CGSCC-NEXT: call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef 3, i8 noundef 4) +; CGSCC-NEXT: br label [[IF_END]] +; CGSCC: if.end: +; CGSCC-NEXT: call void @use_4_i8(i8 noundef 1, i8 2, i8 3, i8 4) +; CGSCC-NEXT: ret void +; +entry: + %alloc = alloca i8, i32 4 + %gep0 = getelementptr inbounds i8, ptr %alloc, i64 0 + %gep1 = getelementptr inbounds i8, ptr %alloc, i64 1 + %gep2 = getelementptr inbounds i8, ptr %alloc, i64 2 + %gep3 = getelementptr inbounds i8, ptr %alloc, i64 3 + + %l0_a = load i8, ptr %gep0 + %l1_a = load i8, ptr %gep1 + %l2_a = load i8, ptr %gep2 + %l3_a = load i8, ptr %gep3 + call void @use_4_i8(i8 %l0_a, i8 %l1_a, i8 %l2_a, i8 %l3_a) + + store i8 1, ptr %gep0, align 4 + + %l0_b = load i8, ptr %gep0 + %l1_b = load i8, ptr %gep1 + %l2_b = load i8, ptr %gep2 + %l3_b = load i8, ptr %gep3 + call void @use_4_i8(i8 %l0_b, i8 %l1_b, i8 %l2_b, i8 %l3_b) + + br i1 %c1, label %if.merge1, label %if.then + +if.then: + %l0_c = load i8, ptr %gep0 + %l1_c = load i8, ptr %gep1 + %l2_c = load i8, ptr %gep2 + %l3_c = load i8, ptr %gep3 + call void @use_4_i8(i8 %l0_c, i8 %l1_c, i8 %l2_c, i8 %l3_c) + + store i8 2, ptr %gep1, align 4 + + %l0_d = load i8, ptr %gep0 + %l1_d = load i8, ptr %gep1 + %l2_d = load i8, ptr %gep2 + %l3_d = load i8, ptr %gep3 + call void @use_4_i8(i8 %l0_d, i8 %l1_d, i8 %l2_d, i8 %l3_d) + + br i1 %c1, label %if.merge1, label %if.then2 + +if.then2: + %l0_e = load i8, ptr %gep0 + %l1_e = load i8, ptr %gep1 + %l2_e = load i8, ptr %gep2 + %l3_e = load i8, ptr %gep3 + call void @use_4_i8(i8 %l0_e, i8 %l1_e, i8 %l2_e, i8 %l3_e) + + store i8 3, ptr %gep2, align 4 + + %l0_f = load i8, ptr %gep0 + %l1_f = load i8, ptr %gep1 + %l2_f = load i8, ptr %gep2 + %l3_f = load i8, ptr %gep3 + call void @use_4_i8(i8 %l0_f, i8 %l1_f, i8 %l2_f, i8 %l3_f) + + br i1 %c2, label %if.merge2, label %if.then3 + +if.merge1: + + %l0_g = load i8, ptr %gep0 + %l1_g = load i8, ptr %gep1 + %l2_g = load i8, ptr %gep2 + %l3_g = load i8, ptr %gep3 + call void @use_4_i8(i8 %l0_g, i8 %l1_g, i8 %l2_g, i8 %l3_g) + + br label %if.merge2 + +if.merge2: + + %l0_h = load i8, ptr %gep0 + %l1_h = load i8, ptr %gep1 + %l2_h = load i8, ptr %gep2 + %l3_h = load i8, ptr %gep3 + call void @use_4_i8(i8 %l0_h, i8 %l1_h, i8 %l2_h, i8 %l3_h) + + br label %if.end + +if.then3: + + %l0_i = load i8, ptr %gep0 + %l1_i = load i8, ptr %gep1 + %l2_i = load i8, ptr %gep2 + %l3_i = load i8, ptr %gep3 + call void @use_4_i8(i8 %l0_i, i8 %l1_i, i8 %l2_i, i8 %l3_i) + + store i8 4, ptr %gep3, align 4 + + %l0_j = load i8, ptr %gep0 + %l1_j = load i8, ptr %gep1 + %l2_j = load i8, ptr %gep2 + %l3_j = load i8, ptr %gep3 + call void @use_4_i8(i8 %l0_j, i8 %l1_j, i8 %l2_j, i8 %l3_j) + + br label %if.end + +if.end: + %l0_k = load i8, ptr %gep0 + %l1_k = load i8, ptr %gep1 + %l2_k = load i8, ptr %gep2 + %l3_k = load i8, ptr %gep3 + call void @use_4_i8(i8 %l0_k, i8 %l1_k, i8 %l2_k, i8 %l3_k) + + ret void +} + +declare void @usei32(i32) nocallback nosync +define internal void @exclusion_set3_helper(i1 %c, ptr %p) { +; TUNIT: Function Attrs: nosync +; TUNIT-LABEL: define {{[^@]+}}@exclusion_set3_helper +; TUNIT-SAME: (i1 [[C:%.*]], ptr noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR8:[0-9]+]] { +; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; TUNIT: t: +; TUNIT-NEXT: store i32 42, ptr [[P]], align 4 +; TUNIT-NEXT: br label [[M:%.*]] +; TUNIT: f: +; TUNIT-NEXT: [[L:%.*]] = load i32, ptr [[P]], align 4 +; TUNIT-NEXT: [[ADD:%.*]] = add i32 [[L]], 1 +; TUNIT-NEXT: store i32 [[ADD]], ptr [[P]], align 4 +; TUNIT-NEXT: [[CND:%.*]] = icmp eq i32 [[L]], 100 +; TUNIT-NEXT: br i1 [[CND]], label [[F2:%.*]], label [[F]] +; TUNIT: f2: +; TUNIT-NEXT: [[USE1:%.*]] = load i32, ptr [[P]], align 4 +; TUNIT-NEXT: call void @usei32(i32 [[USE1]]) +; TUNIT-NEXT: store i32 77, ptr [[P]], align 4 +; TUNIT-NEXT: call void @exclusion_set3_helper(i1 noundef true, ptr noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[P]]) #[[ATTR8]] +; TUNIT-NEXT: [[USE2:%.*]] = load i32, ptr [[P]], align 4 +; TUNIT-NEXT: call void @usei32(i32 [[USE2]]) +; TUNIT-NEXT: br label [[T]] +; TUNIT: m: +; TUNIT-NEXT: [[USE3:%.*]] = load i32, ptr [[P]], align 4 +; TUNIT-NEXT: call void @usei32(i32 [[USE3]]) +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: nosync +; CGSCC-LABEL: define {{[^@]+}}@exclusion_set3_helper +; CGSCC-SAME: (i1 [[C:%.*]], ptr noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR5]] { +; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CGSCC: t: +; CGSCC-NEXT: store i32 42, ptr [[P]], align 4 +; CGSCC-NEXT: br label [[M:%.*]] +; CGSCC: f: +; CGSCC-NEXT: [[L:%.*]] = load i32, ptr [[P]], align 4 +; CGSCC-NEXT: [[ADD:%.*]] = add i32 [[L]], 1 +; CGSCC-NEXT: store i32 [[ADD]], ptr [[P]], align 4 +; CGSCC-NEXT: [[CND:%.*]] = icmp eq i32 [[L]], 100 +; CGSCC-NEXT: br i1 [[CND]], label [[F2:%.*]], label [[F]] +; CGSCC: f2: +; CGSCC-NEXT: [[USE1:%.*]] = load i32, ptr [[P]], align 4 +; CGSCC-NEXT: call void @usei32(i32 [[USE1]]) +; CGSCC-NEXT: store i32 77, ptr [[P]], align 4 +; CGSCC-NEXT: call void @exclusion_set3_helper(i1 noundef true, ptr noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[P]]) #[[ATTR5]] +; CGSCC-NEXT: [[USE2:%.*]] = load i32, ptr [[P]], align 4 +; CGSCC-NEXT: call void @usei32(i32 [[USE2]]) +; CGSCC-NEXT: br label [[T]] +; CGSCC: m: +; CGSCC-NEXT: [[USE3:%.*]] = load i32, ptr [[P]], align 4 +; CGSCC-NEXT: call void @usei32(i32 [[USE3]]) +; CGSCC-NEXT: ret void +; + br i1 %c, label %t, label %f +t: + store i32 42, ptr %p + br label %m +f: + %l = load i32, ptr %p + %add = add i32 %l, 1 + store i32 %add, ptr %p + %cnd = icmp eq i32 %l, 100 + br i1 %cnd, label %f2, label %f +f2: + %use1 = load i32, ptr %p + call void @usei32(i32 %use1) + store i32 77, ptr %p + call void @exclusion_set3_helper(i1 true, ptr %p) + %use2 = load i32, ptr %p + call void @usei32(i32 %use2) + br label %t +m: + %use3 = load i32, ptr %p + call void @usei32(i32 %use3) + ret void +} + +define i32 @exclusion_set3(i1 %c) { +; TUNIT: Function Attrs: norecurse nosync +; TUNIT-LABEL: define {{[^@]+}}@exclusion_set3 +; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR5]] { +; TUNIT-NEXT: [[A:%.*]] = alloca i32, align 4 +; TUNIT-NEXT: store i32 3, ptr [[A]], align 4 +; TUNIT-NEXT: call void @exclusion_set3_helper(i1 [[C]], ptr noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) #[[ATTR8]] +; TUNIT-NEXT: [[FINAL:%.*]] = load i32, ptr [[A]], align 4 +; TUNIT-NEXT: ret i32 [[FINAL]] +; +; CGSCC: Function Attrs: nosync +; CGSCC-LABEL: define {{[^@]+}}@exclusion_set3 +; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR5]] { +; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 +; CGSCC-NEXT: store i32 3, ptr [[A]], align 4 +; CGSCC-NEXT: call void @exclusion_set3_helper(i1 [[C]], ptr noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CGSCC-NEXT: [[FINAL:%.*]] = load i32, ptr [[A]], align 4 +; CGSCC-NEXT: ret i32 [[FINAL]] +; + %a = alloca i32 + store i32 3, ptr %a + call void @exclusion_set3_helper(i1 %c, ptr %a) + %final = load i32, ptr %a + ret i32 %final +} + +;. +; TUNIT: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nocallback nosync } +; TUNIT: attributes #[[ATTR2:[0-9]+]] = { allockind("free") "alloc-family"="malloc" } +; TUNIT: attributes #[[ATTR3:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" } +; TUNIT: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; TUNIT: attributes #[[ATTR5]] = { norecurse nosync } +; TUNIT: attributes #[[ATTR6]] = { nocallback } +; TUNIT: attributes #[[ATTR7]] = { norecurse } +; TUNIT: attributes #[[ATTR8]] = { nosync } +; TUNIT: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR10]] = { nosync nounwind } +;. +; CGSCC: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +; CGSCC: attributes #[[ATTR1:[0-9]+]] = { nocallback nosync } +; CGSCC: attributes #[[ATTR2:[0-9]+]] = { allockind("free") "alloc-family"="malloc" } +; CGSCC: attributes #[[ATTR3:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" } +; CGSCC: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR5]] = { nosync } +; CGSCC: attributes #[[ATTR6]] = { norecurse nosync } +; CGSCC: attributes #[[ATTR7]] = { nocallback } +; CGSCC: attributes #[[ATTR8]] = { norecurse } +; CGSCC: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR10]] = { nounwind } +;. diff --git a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll new file mode 100644 index 0000000..add432d --- /dev/null +++ b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll @@ -0,0 +1,115 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals +; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC + +target triple = "amdgcn-amd-amdhsa" + +%struct.ident_t = type { i32, i32, i32, i32, i8* } + +@G = internal addrspace(3) global i32 undef, align 4 + +; Make sure we do not delete the stores to @G without also replacing the load with `1`. +;. +; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4 +;. +define void @kernel() "kernel" { +; TUNIT: Function Attrs: norecurse +; TUNIT-LABEL: define {{[^@]+}}@kernel +; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { +; TUNIT-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false) +; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1 +; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; TUNIT: if.then: +; TUNIT-NEXT: store i32 1, i32 addrspace(3)* @G, align 4 +; TUNIT-NEXT: br label [[IF_MERGE:%.*]] +; TUNIT: if.else: +; TUNIT-NEXT: call void @barrier() #[[ATTR4:[0-9]+]] +; TUNIT-NEXT: [[L:%.*]] = load i32, i32 addrspace(3)* @G, align 4 +; TUNIT-NEXT: call void @use1(i32 [[L]]) #[[ATTR4]] +; TUNIT-NEXT: br label [[IF_MERGE]] +; TUNIT: if.merge: +; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]] +; TUNIT: if.then2: +; TUNIT-NEXT: store i32 2, i32 addrspace(3)* @G, align 4 +; TUNIT-NEXT: call void @barrier() #[[ATTR4]] +; TUNIT-NEXT: br label [[IF_END]] +; TUNIT: if.end: +; TUNIT-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1) +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: norecurse +; CGSCC-LABEL: define {{[^@]+}}@kernel +; CGSCC-SAME: () #[[ATTR0:[0-9]+]] { +; CGSCC-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false) +; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1 +; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CGSCC: if.then: +; CGSCC-NEXT: store i32 1, i32 addrspace(3)* @G, align 4 +; CGSCC-NEXT: br label [[IF_MERGE:%.*]] +; CGSCC: if.else: +; CGSCC-NEXT: call void @barrier() +; CGSCC-NEXT: [[L:%.*]] = load i32, i32 addrspace(3)* @G, align 4 +; CGSCC-NEXT: call void @use1(i32 [[L]]) +; CGSCC-NEXT: br label [[IF_MERGE]] +; CGSCC: if.merge: +; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]] +; CGSCC: if.then2: +; CGSCC-NEXT: store i32 2, i32 addrspace(3)* @G, align 4 +; CGSCC-NEXT: call void @barrier() +; CGSCC-NEXT: br label [[IF_END]] +; CGSCC: if.end: +; CGSCC-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1) +; CGSCC-NEXT: ret void +; + %call = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false) + %cmp = icmp eq i32 %call, -1 + br i1 %cmp, label %if.then, label %if.else +if.then: + store i32 1, i32 addrspace(3)* @G + br label %if.merge +if.else: + call void @barrier(); + %l = load i32, i32 addrspace(3)* @G + call void @use1(i32 %l) + br label %if.merge +if.merge: + br i1 %cmp, label %if.then2, label %if.end +if.then2: + store i32 2, i32 addrspace(3)* @G + call void @barrier(); + br label %if.end +if.end: + call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1) + ret void +} + +declare void @barrier() norecurse nounwind nocallback +declare void @use1(i32) nosync norecurse nounwind nocallback +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) nocallback +declare void @__kmpc_target_deinit(%struct.ident_t*, i8) nocallback + +!llvm.module.flags = !{!0, !1} +!nvvm.annotations = !{!2} + +!0 = !{i32 7, !"openmp", i32 50} +!1 = !{i32 7, !"openmp-device", i32 50} +!2 = !{void ()* @kernel, !"kernel", i32 1} + +;. +; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" } +; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nocallback norecurse nounwind } +; TUNIT: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nosync nounwind } +; TUNIT: attributes #[[ATTR3:[0-9]+]] = { nocallback } +; TUNIT: attributes #[[ATTR4]] = { nounwind } +;. +; CGSCC: attributes #[[ATTR0]] = { norecurse "kernel" } +; CGSCC: attributes #[[ATTR1:[0-9]+]] = { nocallback norecurse nounwind } +; CGSCC: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nosync nounwind } +; CGSCC: attributes #[[ATTR3:[0-9]+]] = { nocallback } +;. +; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; CHECK: [[META2:![0-9]+]] = !{void ()* @kernel, !"kernel", i32 1} +;. +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} -- 2.7.4