#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MustExecute.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/AbstractCallSite.h"
/// NOTE: The mechanics of adding a new "concrete" abstract attribute are
/// described in the file comment.
struct Attributor {
+
+ using OptimizationRemarkGetter =
+ function_ref<OptimizationRemarkEmitter &(Function *)>;
+
/// Constructor
///
/// \param Functions The set of functions we are deriving attributes for.
bool RewriteSignatures = true)
: Allocator(InfoCache.Allocator), Functions(Functions),
InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed),
- DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures) {}
+ DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures),
+ OREGetter(None), PassName("") {}
+
+ /// Constructor
+ ///
+ /// \param Functions The set of functions we are deriving attributes for.
+ /// \param InfoCache Cache to hold various information accessible for
+ /// the abstract attributes.
+ /// \param CGUpdater Helper to update an underlying call graph.
+ /// \param Allowed If not null, a set limiting the attribute opportunities.
+ /// \param DeleteFns Whether to delete functions
+ /// \param OREGetter A callback function that returns an ORE object from a
+ /// Function pointer.
+ /// \param PassName The name of the pass emitting remarks.
+ Attributor(SetVector<Function *> &Functions, InformationCache &InfoCache,
+ CallGraphUpdater &CGUpdater, DenseSet<const char *> *Allowed,
+ bool DeleteFns, bool RewriteSignatures,
+ OptimizationRemarkGetter OREGetter, const char *PassName)
+ : Allocator(InfoCache.Allocator), Functions(Functions),
+ InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed),
+ DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures),
+ OREGetter(Optional<OptimizationRemarkGetter>(OREGetter)),
+ PassName(PassName) {}
~Attributor();
const AbstractAttribute &QueryingAA, const Value &V,
DepClassTy LivenessDepClass = DepClassTy::OPTIONAL);
+ /// Emit a remark generically.
+ ///
+ /// This template function can be used to generically emit a remark. The
+ /// RemarkKind should be one of the following:
+ /// - OptimizationRemark to indicate a successful optimization attempt
+ /// - OptimizationRemarkMissed to report a failed optimization attempt
+ /// - OptimizationRemarkAnalysis to provide additional information about an
+ /// optimization attempt
+ ///
+ /// The remark is built using a callback function \p RemarkCB that takes a
+ /// RemarkKind as input and returns a RemarkKind.
+ template <typename RemarkKind, typename RemarkCallBack>
+ void emitRemark(Instruction *I, StringRef RemarkName,
+ RemarkCallBack &&RemarkCB) const {
+ if (!OREGetter)
+ return;
+
+ Function *F = I->getFunction();
+ auto &ORE = OREGetter.getValue()(F);
+
+ ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, I)); });
+ }
+
+ /// Emit a remark on a function.
+ template <typename RemarkKind, typename RemarkCallBack>
+ void emitRemark(Function *F, StringRef RemarkName,
+ RemarkCallBack &&RemarkCB) const {
+ if (!OREGetter)
+ return;
+
+ auto &ORE = OREGetter.getValue()(F);
+
+ ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, F)); });
+ }
+
/// Helper struct used in the communication between an abstract attribute (AA)
/// that wants to change the signature of a function and the Attributor which
/// applies the changes. The struct is partially initialized with the
SmallDenseSet<WeakVH, 8> ToBeDeletedInsts;
///}
+ /// Callback to get an OptimizationRemarkEmitter from a Function *.
+ Optional<OptimizationRemarkGetter> OREGetter;
+
+ /// The name of the pass to emit remarks for.
+ const char *PassName = "";
+
friend AADepGraph;
friend AttributorCallGraph;
};
LLVM_DEBUG(dbgs() << "H2S: Removing malloc call: " << *MallocCall
<< "\n");
+ auto Remark = [&](OptimizationRemark OR) {
+ LibFunc IsAllocShared;
+ if (auto *CB = dyn_cast<CallBase>(MallocCall)) {
+ TLI->getLibFunc(*CB, IsAllocShared);
+ if (IsAllocShared == LibFunc___kmpc_alloc_shared)
+ return OR << "Moving globalized variable to the stack.";
+ }
+ return OR << "Moving memory allocation from the heap to the stack.";
+ };
+ A.emitRemark<OptimizationRemark>(MallocCall, "HeapToStack", Remark);
+
Align Alignment;
Value *Size;
if (isCallocLikeFn(MallocCall, TLI)) {
if (!NoCaptureAA.isAssumedNoCapture() ||
!ArgNoFreeAA.isAssumedNoFree()) {
+
+ // Emit a missed remark if this is missed OpenMP globalization.
+ auto Remark = [&](OptimizationRemarkMissed ORM) {
+ return ORM << "Could not move globalized variable to the stack. "
+ << "Variable is potentially "
+ << ((!NoCaptureAA.isAssumedNoCapture()) ? "captured."
+ : "freed.");
+ };
+
+ LibFunc IsAllocShared;
+ if (auto *AllocShared = dyn_cast<CallBase>(&I)) {
+ TLI->getLibFunc(*AllocShared, IsAllocShared);
+ if (IsAllocShared == LibFunc___kmpc_alloc_shared)
+ A.emitRemark<OptimizationRemarkMissed>(
+ AllocShared, "HeapToStackFailed", Remark);
+ }
+
LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n");
ValidUsesOnly = false;
}
auto *NewBuffer =
ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo());
+ auto Remark = [&](OptimizationRemark OR) {
+ return OR << "Replaced globalized variable with "
+ << ore::NV("SharedMemory", AllocSize->getZExtValue())
+ << ((AllocSize->getZExtValue() != 1) ? " bytes " : " byte ")
+ << "of shared memory";
+ };
+ A.emitRemark<OptimizationRemark>(CB, "OpenMPReplaceGlobalization",
+ Remark);
+
SharedMem->setAlignment(MaybeAlign(32));
A.changeValueAfterManifest(*CB, *NewBuffer);
OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions,
OMPInModule.getKernels());
- Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false);
+ Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, OREGetter,
+ DEBUG_TYPE);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Changed = OMPOpt.run(true);
OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
/*CGSCC*/ Functions, OMPInModule.getKernels());
- Attributor A(Functions, InfoCache, CGUpdater, nullptr, false);
+ Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, OREGetter,
+ DEBUG_TYPE);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Changed = OMPOpt.run(false);
*(Functions.back()->getParent()), AG, Allocator,
/*CGSCC*/ Functions, OMPInModule.getKernels());
- Attributor A(Functions, InfoCache, CGUpdater, nullptr, false);
+ Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
+ OREGetter, DEBUG_TYPE);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
return OMPOpt.run(false);
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
+; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -pass-remarks-missed=openmp-opt -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-REMARKS
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64"
+; CHECK-REMARKS: remark: remove_globalization.c:4:2: Could not move globalized variable to the stack. Variable is potentially captured.
+; CHECK-REMARKS: remark: remove_globalization.c:2:2: Moving globalized variable to the stack.
+
@S = external local_unnamed_addr global i8*
define void @kernel() {
; CHECK-NEXT: ret void
;
entry:
- %0 = call i8* @__kmpc_alloc_shared(i64 4)
+ %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !9
call void @use(i8* %0)
call void @__kmpc_free_shared(i8* %0)
ret void
; CHECK-NEXT: ret void
;
entry:
- %0 = call i8* @__kmpc_alloc_shared(i64 4)
+ %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10
call void @share(i8* %0)
call void @__kmpc_free_shared(i8* %0)
ret void
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{void ()* @kernel, !"kernel", i32 1}
+!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!7 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!8 = !DISubroutineType(types: !2)
+!9 = !DILocation(line: 2, column: 2, scope: !6)
+!10 = !DILocation(line: 4, column: 2, scope: !7)
; RUN: opt -S -passes='openmp-opt' < %s | FileCheck %s
+; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-REMARKS
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64"
@S = external local_unnamed_addr global i8*
+; CHECK-REMARKS: remark: replace_globalization.c:5:7: Replaced globalized variable with 16 bytes of shared memory
+; CHECK-REMARKS: remark: replace_globalization.c:5:14: Replaced globalized variable with 4 bytes of shared memory
; CHECK: [[SHARED_X:@.+]] = internal addrspace(3) global [16 x i8] undef
; CHECK: [[SHARED_Y:@.+]] = internal addrspace(3) global [4 x i8] undef
define void @use(i8* %x) {
entry:
- %addr = alloca i8*
store i8* %x, i8** @S
ret void
}