bool RewriteSignatures = true;
/// Flag to determine if we want to initialize all default AAs for an internal
- /// function marked live.
- /// TODO: This should probably be a callback, or maybe
- /// identifyDefaultAbstractAttributes should be virtual, something to allow
- /// customizable lazy initialization for internal functions.
+ /// function marked live. See also: InitializationCallback>
bool DefaultInitializeLiveInternals = true;
+ /// Callback function to be invoked on internal functions marked live.
+ std::function<void(Attributor &A, const Function &F)> InitializationCallback =
+ nullptr;
+
/// Helper to update an underlying call graph and to delete functions.
CallGraphUpdater &CGUpdater;
if (Configuration.DefaultInitializeLiveInternals)
identifyDefaultAbstractAttributes(const_cast<Function &>(F));
+ if (Configuration.InitializationCallback)
+ Configuration.InitializationCallback(*this, F);
}
/// Helper function to remove callsite.
void registerFoldRuntimeCall(RuntimeFunction RF);
/// Populate the Attributor with abstract attribute opportunities in the
- /// function.
+ /// functions.
void registerAAs(bool IsModulePass);
+
+public:
+ /// Callback to register AAs for live functions, including internal functions
+ /// marked live during the traversal.
+ static void registerAAsForFunction(Attributor &A, const Function &F);
};
Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
if (F->isDeclaration())
continue;
- if (!DisableOpenMPOptDeglobalization)
- A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
- A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F));
- if (!DisableOpenMPOptDeglobalization)
- A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
-
- for (auto &I : instructions(*F)) {
- if (auto *LI = dyn_cast<LoadInst>(&I)) {
- bool UsedAssumedInformation = false;
- A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr,
- UsedAssumedInformation, AA::Interprocedural);
- } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
- A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI));
- }
+ // We look at internal functions only on-demand but if any use is not a
+ // direct call or outside the current set of analyzed functions, we have
+ // to do it eagerly.
+ if (F->hasLocalLinkage()) {
+ if (llvm::all_of(F->uses(), [this](const Use &U) {
+ const auto *CB = dyn_cast<CallBase>(U.getUser());
+ return CB && CB->isCallee(&U) &&
+ !A.isRunOn(const_cast<Function *>(CB->getCaller()));
+ }))
+ continue;
+ }
+ registerAAsForFunction(A, *F);
+ }
+}
+
+void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) {
+ if (!DisableOpenMPOptDeglobalization)
+ A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
+ A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(F));
+ if (!DisableOpenMPOptDeglobalization)
+ A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(F));
+
+ for (auto &I : instructions(F)) {
+ if (auto *LI = dyn_cast<LoadInst>(&I)) {
+ bool UsedAssumedInformation = false;
+ A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr,
+ UsedAssumedInformation, AA::Interprocedural);
+ } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
+ A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI));
}
}
}
}
// Look at every function in the Module unless it was internalized.
+ SetVector<Function *> Functions;
SmallVector<Function *, 16> SCC;
for (Function &F : M)
- if (!F.isDeclaration() && !InternalizedMap.lookup(&F))
+ if (!F.isDeclaration() && !InternalizedMap.lookup(&F)) {
SCC.push_back(&F);
+ Functions.insert(&F);
+ }
if (SCC.empty())
return PreservedAnalyses::all();
AttributorConfig AC(CGUpdater);
AC.DefaultInitializeLiveInternals = false;
+ AC.IsModulePass = true;
AC.RewriteSignatures = false;
AC.MaxFixpointIterations = MaxFixpointIterations;
AC.OREGetter = OREGetter;
AC.PassName = DEBUG_TYPE;
+ AC.InitializationCallback = OpenMPOpt::registerAAsForFunction;
- SetVector<Function *> Functions;
Attributor A(Functions, InfoCache, AC);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
AC.MaxFixpointIterations = MaxFixpointIterations;
AC.OREGetter = OREGetter;
AC.PassName = DEBUG_TYPE;
+ AC.InitializationCallback = OpenMPOpt::registerAAsForFunction;
Attributor A(Functions, InfoCache, AC);
; CHECK-NEXT: ret void
;
;
-; CHECK: Function Attrs: norecurse nounwind memory(none)
-; CHECK-LABEL: define {{[^@]+}}@g
-; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: ret double 0.000000e+00
-;
-;
-; CHECK: Function Attrs: norecurse nosync nounwind memory(none)
-; CHECK-LABEL: define {{[^@]+}}@h.internalized
-; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[TMP0]], i64 undef
-; CHECK-NEXT: ret ptr [[TMP2]]
-;
-;
; CHECK-LABEL: define {{[^@]+}}@h
; CHECK-SAME: (ptr [[TMP0:%.*]]) {
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP0]], align 4
;
;.
; CHECK: attributes #[[ATTR0]] = { norecurse nounwind memory(none) }
-; CHECK: attributes #[[ATTR1]] = { norecurse nosync nounwind memory(none) }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr
-define internal void @__omp_offloading__fd02_85283c04_Device_l6_ctor() {
+define weak void @__omp_offloading__fd02_85283c04_Device_l6_ctor() {
entry:
%call.i = tail call double @__nv_log(double noundef 2.000000e+00) #1
%call.i2 = tail call double @__nv_log(double noundef 2.000000e+00) #1
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_85283c04_main_l11
; CHECK-SAME: (ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: ret void
; CHECK: user_code.entry:
; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @_ZL6Device, align 8, !tbaa [[TBAA11:![0-9]+]]
-; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR2]]
+; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; CHECK: region.guarded:
; CHECK-NEXT: store double [[TMP1]], ptr [[X]], align 8, !tbaa [[TBAA11]]
; CHECK-NEXT: br label [[REGION_BARRIER]]
; CHECK: region.barrier:
-; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]]
-; CHECK-NEXT: tail call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR2]]
+; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR1]]
+; CHECK-NEXT: tail call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR1]]
; CHECK-NEXT: br label [[COMMON_RET]]
;
;
-; CHECK: Function Attrs: norecurse
-; CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_85283c04_Device_l6_ctor
-; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+; CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_85283c04_Device_l6_ctor() {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1:[0-9]+]]
-; CHECK-NEXT: [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1]]
+; CHECK-NEXT: [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR0:[0-9]+]]
+; CHECK-NEXT: [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR0]]
; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[CALL_I]], [[CALL_I2]]
; CHECK-NEXT: store double [[DIV]], ptr @_ZL6Device, align 8, !tbaa [[TBAA11]]
; CHECK-NEXT: ret void
!0 = !{i32 7, !"openmp", i32 50}
!1 = !{i32 7, !"openmp-device", i32 50}
-; MODULE-LABEL: define {{[^@]+}}@nblist
-; MODULE-SAME: () #[[ATTR0:[0-9]+]] {
-; MODULE-NEXT: [[TMP1:%.*]] = call ptr @alloc()
-; MODULE-NEXT: call fastcc void @rec.internalized(ptr [[TMP1]], i64 0)
-; MODULE-NEXT: ret i32 0
-;
-;
-; MODULE-LABEL: define {{[^@]+}}@rec.internalized
-; MODULE-SAME: (ptr nocapture writeonly [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] {
-; MODULE-NEXT: call fastcc void @rec.internalized(ptr nocapture writeonly [[TMP0]], i64 0) #[[ATTR2:[0-9]+]]
-; MODULE-NEXT: ret void
-;
-;
; MODULE-LABEL: define {{[^@]+}}@rec
; MODULE-SAME: (ptr [[TMP0:%.*]], i64 [[TMP1:%.*]]) {
; MODULE-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[TMP1]]
ret void
}
+; CHECK: [openmp-opt] Basic block @foo entry is executed by a single thread.
+; Function Attrs: noinline
+define internal void @foo() {
+entry:
+ ret void
+}
+
; CHECK-NOT: [openmp-opt] Basic block @amdgcn entry is executed by a single thread.
; CHECK-DAG: [openmp-opt] Basic block @amdgcn if.then is executed by a single thread.
; CHECK-NOT: [openmp-opt] Basic block @amdgcn if.end is executed by a single thread.
ret void
}
-; CHECK: [openmp-opt] Basic block @foo entry is executed by a single thread.
-; Function Attrs: noinline
-define internal void @foo() {
-entry:
- ret void
-}
-
; CHECK: [openmp-opt] Basic block @bar.internalized entry is executed by a single thread.
; Function Attrs: noinline
define void @bar() {