From 18a2975b57830a231e2b8f0299969edfc4f8477c Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Thu, 2 Feb 2023 01:06:00 -0800 Subject: [PATCH] [Attributor][FIX] Ensure we use the right AAExecutionDomain Before we might have ended up queriying the AAExecutionDomain of a different function, which resulted in wrong optimistic results. Partially fixes https://github.com/llvm/llvm-project/issues/60425 --- llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 20 ++++++++-- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 2 + .../reduced/aa_execution_domain_wrong_fn.ll | 46 ++++++++++++++++++++++ 3 files changed, 64 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index d6229d7..9c1e046 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1043,12 +1043,14 @@ struct AAPointerInfoImpl const auto &NoSyncAA = A.getAAFor( QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL); const auto *ExecDomainAA = A.lookupAAFor( - IRPosition::function(Scope), &QueryingAA, DepClassTy::OPTIONAL); + IRPosition::function(Scope), &QueryingAA, DepClassTy::NONE); bool AllInSameNoSyncFn = NoSyncAA.isAssumedNoSync(); bool InstIsExecutedByInitialThreadOnly = ExecDomainAA && ExecDomainAA->isExecutedByInitialThreadOnly(I); bool InstIsExecutedInAlignedRegion = ExecDomainAA && ExecDomainAA->isExecutedInAlignedRegion(A, I); + if (InstIsExecutedInAlignedRegion || InstIsExecutedByInitialThreadOnly) + A.recordDependence(*ExecDomainAA, QueryingAA, DepClassTy::OPTIONAL); InformationCache &InfoCache = A.getInfoCache(); bool IsThreadLocalObj = @@ -1063,14 +1065,24 @@ struct AAPointerInfoImpl auto CanIgnoreThreadingForInst = [&](const Instruction &I) -> bool { if (IsThreadLocalObj || AllInSameNoSyncFn) return true; - if (!ExecDomainAA) + const auto *FnExecDomainAA = + I.getFunction() == &Scope + ? ExecDomainAA + : A.lookupAAFor( + IRPosition::function(*I.getFunction()), &QueryingAA, + DepClassTy::NONE); + if (!FnExecDomainAA) return false; if (InstIsExecutedInAlignedRegion || - ExecDomainAA->isExecutedInAlignedRegion(A, I)) + FnExecDomainAA->isExecutedInAlignedRegion(A, I)) { + A.recordDependence(*FnExecDomainAA, QueryingAA, DepClassTy::OPTIONAL); return true; + } if (InstIsExecutedByInitialThreadOnly && - ExecDomainAA->isExecutedByInitialThreadOnly(I)) + FnExecDomainAA->isExecutedByInitialThreadOnly(I)) { + A.recordDependence(*FnExecDomainAA, QueryingAA, DepClassTy::OPTIONAL); return true; + } return false; }; diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 0f18d41..be2c2b7 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2678,6 +2678,8 @@ struct AAExecutionDomainFunction : public AAExecutionDomain { bool isExecutedInAlignedRegion(Attributor &A, const Instruction &I) const override { + assert(I.getFunction() == getAnchorScope() && + "Instruction is out of scope!"); if (!isValidState() || isa(I)) return false; diff --git a/llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll b/llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll new file mode 100644 index 0000000..ed36ebc --- /dev/null +++ b/llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes --check-globals --include-generated-funcs +; RUN: opt -passes=openmp-opt -S < %s | FileCheck %s --check-prefixes=CHECK + +%"struct.ompx::state::TeamStateTy" = type { %"struct.ompx::state::ICVStateTy", i32, i32, ptr } +%"struct.ompx::state::ICVStateTy" = type { i32, i32, i32, i32, i32, i32 } + +@_ZN4ompx5state9TeamStateE = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef + +define weak_odr amdgpu_kernel void @__omp_offloading_16_1d1156__Z38test_target_teams_distribute__parallelv_l16() { + %1 = tail call i32 @__kmpc_target_init(ptr null, i8 0, i1 false) + ret void +} + +define internal i32 @__kmpc_target_init(ptr %0, i8 %1, i1 %2) { + store <2 x i32> zeroinitializer, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16 + %4 = call i1 @__kmpc_kernel_parallel() + ret i32 0 +} + +define internal i1 @__kmpc_kernel_parallel() { + %1 = load ptr, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 8 + ret i1 false +} + +!llvm.module.flags = !{!0} + +!0 = !{i32 7, !"openmp", i32 50} +;. +; CHECK: @[[_ZN4OMPX5STATE9TEAMSTATEE:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef +;. +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_16_1d1156__Z38test_target_teams_distribute__parallelv_l16() { +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_target_init(ptr null, i8 0, i1 false) +; CHECK-NEXT: ret void +; +; +; CHECK: Function Attrs: norecurse nosync nounwind memory(write) +; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init +; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret i32 0 +; +;. +; CHECK: attributes #[[ATTR0]] = { norecurse nosync nounwind memory(write) } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nosync nounwind } +;. +; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} +;. -- 2.7.4