const auto &NoSyncAA = A.getAAFor<AANoSync>(
QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL);
const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>(
- IRPosition::function(Scope), &QueryingAA, DepClassTy::OPTIONAL);
+ IRPosition::function(Scope), &QueryingAA, DepClassTy::NONE);
bool AllInSameNoSyncFn = NoSyncAA.isAssumedNoSync();
bool InstIsExecutedByInitialThreadOnly =
ExecDomainAA && ExecDomainAA->isExecutedByInitialThreadOnly(I);
bool InstIsExecutedInAlignedRegion =
ExecDomainAA && ExecDomainAA->isExecutedInAlignedRegion(A, I);
+ if (InstIsExecutedInAlignedRegion || InstIsExecutedByInitialThreadOnly)
+ A.recordDependence(*ExecDomainAA, QueryingAA, DepClassTy::OPTIONAL);
InformationCache &InfoCache = A.getInfoCache();
bool IsThreadLocalObj =
auto CanIgnoreThreadingForInst = [&](const Instruction &I) -> bool {
if (IsThreadLocalObj || AllInSameNoSyncFn)
return true;
- if (!ExecDomainAA)
+ const auto *FnExecDomainAA =
+ I.getFunction() == &Scope
+ ? ExecDomainAA
+ : A.lookupAAFor<AAExecutionDomain>(
+ IRPosition::function(*I.getFunction()), &QueryingAA,
+ DepClassTy::NONE);
+ if (!FnExecDomainAA)
return false;
if (InstIsExecutedInAlignedRegion ||
- ExecDomainAA->isExecutedInAlignedRegion(A, I))
+ FnExecDomainAA->isExecutedInAlignedRegion(A, I)) {
+ A.recordDependence(*FnExecDomainAA, QueryingAA, DepClassTy::OPTIONAL);
return true;
+ }
if (InstIsExecutedByInitialThreadOnly &&
- ExecDomainAA->isExecutedByInitialThreadOnly(I))
+ FnExecDomainAA->isExecutedByInitialThreadOnly(I)) {
+ A.recordDependence(*FnExecDomainAA, QueryingAA, DepClassTy::OPTIONAL);
return true;
+ }
return false;
};
bool isExecutedInAlignedRegion(Attributor &A,
const Instruction &I) const override {
+ assert(I.getFunction() == getAnchorScope() &&
+ "Instruction is out of scope!");
if (!isValidState() || isa<CallBase>(I))
return false;
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes --check-globals --include-generated-funcs
+; RUN: opt -passes=openmp-opt -S < %s | FileCheck %s --check-prefixes=CHECK
+
+%"struct.ompx::state::TeamStateTy" = type { %"struct.ompx::state::ICVStateTy", i32, i32, ptr }
+%"struct.ompx::state::ICVStateTy" = type { i32, i32, i32, i32, i32, i32 }
+
+@_ZN4ompx5state9TeamStateE = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef
+
+define weak_odr amdgpu_kernel void @__omp_offloading_16_1d1156__Z38test_target_teams_distribute__parallelv_l16() {
+ %1 = tail call i32 @__kmpc_target_init(ptr null, i8 0, i1 false)
+ ret void
+}
+
+define internal i32 @__kmpc_target_init(ptr %0, i8 %1, i1 %2) {
+ store <2 x i32> zeroinitializer, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16
+ %4 = call i1 @__kmpc_kernel_parallel()
+ ret i32 0
+}
+
+define internal i1 @__kmpc_kernel_parallel() {
+ %1 = load ptr, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 8
+ ret i1 false
+}
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 7, !"openmp", i32 50}
+;.
+; CHECK: @[[_ZN4OMPX5STATE9TEAMSTATEE:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef
+;.
+; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_16_1d1156__Z38test_target_teams_distribute__parallelv_l16() {
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_target_init(ptr null, i8 0, i1 false)
+; CHECK-NEXT: ret void
+;
+;
+; CHECK: Function Attrs: norecurse nosync nounwind memory(write)
+; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
+; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: ret i32 0
+;
+;.
+; CHECK: attributes #[[ATTR0]] = { norecurse nosync nounwind memory(write) }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nosync nounwind }
+;.
+; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+;.