unsigned getAssumedAddrSpace(const Value *V) const;
+ bool isSingleThreaded() const;
+
std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value *V) const;
virtual bool
canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0;
virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
+ virtual bool isSingleThreaded() const = 0;
virtual std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value *V) const = 0;
virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
return Impl.getAssumedAddrSpace(V);
}
+ bool isSingleThreaded() const override { return Impl.isSingleThreaded(); }
+
std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value *V) const override {
return Impl.getPredicatedAddrSpace(V);
unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
+ bool isSingleThreaded() const { return false; }
+
std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value *V) const {
return std::make_pair(nullptr, -1);
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
}
+ bool isSingleThreaded() const {
+ return getTLI()->getTargetMachine().Options.ThreadModel ==
+ ThreadModel::Single;
+ }
+
std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value *V) const {
return getTLI()->getTargetMachine().getPredicatedAddrSpace(V);
const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,
SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,
PredIteratorCache &, LoopInfo *, DominatorTree *, AssumptionCache *AC,
- const TargetLibraryInfo *, Loop *, MemorySSAUpdater &, ICFLoopSafetyInfo *,
- OptimizationRemarkEmitter *, bool AllowSpeculation);
+ const TargetLibraryInfo *, TargetTransformInfo *, Loop *,
+ MemorySSAUpdater &, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *,
+ bool AllowSpeculation);
/// Does a BFS from a given node to all of its children inside a given loop.
/// The returned vector of nodes includes the starting point.
return TTIImpl->getAssumedAddrSpace(V);
}
+bool TargetTransformInfo::isSingleThreaded() const {
+ return TTIImpl->isSingleThreaded();
+}
+
std::pair<const Value *, unsigned>
TargetTransformInfo::getPredicatedAddrSpace(const Value *V) const {
return TTIImpl->getPredicatedAddrSpace(V);
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
"licm-control-flow-hoisting", cl::Hidden, cl::init(false),
cl::desc("Enable control flow (and PHI) hoisting in LICM"));
+static cl::opt<bool>
+ SingleThread("licm-force-thread-model-single", cl::Hidden, cl::init(false),
+ cl::desc("Force thread model single in LICM pass"));
+
static cl::opt<uint32_t> MaxNumUsesTraversed(
"licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
cl::desc("Max num uses visited for identifying load "
collectPromotionCandidates(MSSA, AA, L)) {
LocalPromoted |= promoteLoopAccessesToScalars(
PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
- DT, AC, TLI, L, MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation);
+ DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, ORE,
+ LicmAllowSpeculation);
}
Promoted |= LocalPromoted;
} while (LocalPromoted);
if (auto *A = dyn_cast<Argument>(Object))
return A->hasByValAttr();
+ if (auto *G = dyn_cast<GlobalVariable>(Object))
+ return !G->isConstant();
+
// TODO: Noalias has nothing to do with writability, this should check for
// an allocator function.
return isNoAliasCall(Object);
}
-bool isThreadLocalObject(const Value *Object, const Loop *L,
- DominatorTree *DT) {
+bool isThreadLocalObject(const Value *Object, const Loop *L, DominatorTree *DT,
+ TargetTransformInfo *TTI) {
// The object must be function-local to start with, and then not captured
// before/in the loop.
- return isIdentifiedFunctionLocal(Object) &&
- isNotCapturedBeforeOrInLoop(Object, L, DT);
+ return (isIdentifiedFunctionLocal(Object) &&
+ isNotCapturedBeforeOrInLoop(Object, L, DT)) ||
+ (TTI->isSingleThreaded() || SingleThread);
}
} // namespace
SmallVectorImpl<Instruction *> &InsertPts,
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
- const TargetLibraryInfo *TLI, Loop *CurLoop, MemorySSAUpdater &MSSAU,
- ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE,
- bool AllowSpeculation) {
+ const TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop,
+ MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
+ OptimizationRemarkEmitter *ORE, bool AllowSpeculation) {
// Verify inputs.
assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
SafetyInfo != nullptr &&
// violating the memory model.
if (StoreSafety == StoreSafetyUnknown) {
Value *Object = getUnderlyingObject(SomePtr);
- if (isWritableObject(Object) && isThreadLocalObject(Object, CurLoop, DT))
+ if (isWritableObject(Object) &&
+ isThreadLocalObject(Object, CurLoop, DT, TTI))
StoreSafety = StoreSafe;
}
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -licm < %s | FileCheck %s
+; RUN: opt -S -licm < %s | FileCheck %s --check-prefixes=CHECK,MT
+; RUN: opt -S -licm -licm-force-thread-model-single < %s | FileCheck %s --check-prefixes=CHECK,ST
@g = external global i32
@c = external constant i32
; mode only loads can be promoted, as a different thread might write to the
; global.
define void @promote_global(i1 %c, i1 %c2) {
-; CHECK-LABEL: @promote_global(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[G_PROMOTED:%.*]] = load i32, ptr @g, align 4
-; CHECK-NEXT: br label [[LOOP:%.*]]
-; CHECK: loop:
-; CHECK-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[G_PROMOTED]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
-; CHECK: if:
-; CHECK-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
-; CHECK-NEXT: store i32 [[V_INC]], ptr @g, align 4
-; CHECK-NEXT: br label [[LATCH]]
-; CHECK: latch:
-; CHECK-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
-; CHECK-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
-; CHECK: exit:
-; CHECK-NEXT: ret void
+; MT-LABEL: @promote_global(
+; MT-NEXT: entry:
+; MT-NEXT: [[G_PROMOTED:%.*]] = load i32, ptr @g, align 4
+; MT-NEXT: br label [[LOOP:%.*]]
+; MT: loop:
+; MT-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[G_PROMOTED]], [[ENTRY:%.*]] ]
+; MT-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
+; MT: if:
+; MT-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
+; MT-NEXT: store i32 [[V_INC]], ptr @g, align 4
+; MT-NEXT: br label [[LATCH]]
+; MT: latch:
+; MT-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
+; MT-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
+; MT: exit:
+; MT-NEXT: ret void
+;
+; ST-LABEL: @promote_global(
+; ST-NEXT: entry:
+; ST-NEXT: [[G_PROMOTED:%.*]] = load i32, ptr @g, align 4
+; ST-NEXT: br label [[LOOP:%.*]]
+; ST: loop:
+; ST-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[G_PROMOTED]], [[ENTRY:%.*]] ]
+; ST-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
+; ST: if:
+; ST-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
+; ST-NEXT: br label [[LATCH]]
+; ST: latch:
+; ST-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
+; ST-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
+; ST: exit:
+; ST-NEXT: [[V_INC1_LCSSA:%.*]] = phi i32 [ [[V_INC1]], [[LATCH]] ]
+; ST-NEXT: store i32 [[V_INC1_LCSSA]], ptr @g, align 4
+; ST-NEXT: ret void
;
entry:
br label %loop
; mode only loads can be promoted, as a different thread might write to the
; captured alloca.
define void @promote_captured_alloca(i1 %c, i1 %c2) {
-; CHECK-LABEL: @promote_captured_alloca(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT: call void @capture(ptr [[A]])
-; CHECK-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT: br label [[LOOP:%.*]]
-; CHECK: loop:
-; CHECK-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[A_PROMOTED]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
-; CHECK: if:
-; CHECK-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
-; CHECK-NEXT: store i32 [[V_INC]], ptr [[A]], align 4
-; CHECK-NEXT: br label [[LATCH]]
-; CHECK: latch:
-; CHECK-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
-; CHECK-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
-; CHECK: exit:
-; CHECK-NEXT: ret void
+; MT-LABEL: @promote_captured_alloca(
+; MT-NEXT: entry:
+; MT-NEXT: [[A:%.*]] = alloca i32, align 4
+; MT-NEXT: call void @capture(ptr [[A]])
+; MT-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 4
+; MT-NEXT: br label [[LOOP:%.*]]
+; MT: loop:
+; MT-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[A_PROMOTED]], [[ENTRY:%.*]] ]
+; MT-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
+; MT: if:
+; MT-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
+; MT-NEXT: store i32 [[V_INC]], ptr [[A]], align 4
+; MT-NEXT: br label [[LATCH]]
+; MT: latch:
+; MT-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
+; MT-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
+; MT: exit:
+; MT-NEXT: ret void
+;
+; ST-LABEL: @promote_captured_alloca(
+; ST-NEXT: entry:
+; ST-NEXT: [[A:%.*]] = alloca i32, align 4
+; ST-NEXT: call void @capture(ptr [[A]])
+; ST-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 4
+; ST-NEXT: br label [[LOOP:%.*]]
+; ST: loop:
+; ST-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[A_PROMOTED]], [[ENTRY:%.*]] ]
+; ST-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
+; ST: if:
+; ST-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
+; ST-NEXT: br label [[LATCH]]
+; ST: latch:
+; ST-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
+; ST-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
+; ST: exit:
+; ST-NEXT: [[V_INC1_LCSSA:%.*]] = phi i32 [ [[V_INC1]], [[LATCH]] ]
+; ST-NEXT: store i32 [[V_INC1_LCSSA]], ptr [[A]], align 4
+; ST-NEXT: ret void
;
entry:
%a = alloca i32