#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Assumptions.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
Function *F = getAnchorScope();
const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
+ LoopInfo *LI =
+ A.getInfoCache().getAnalysisResultForFunction<LoopAnalysis>(*F);
+ Optional<bool> MayContainIrreducibleControl;
+ auto IsInLoop = [&](BasicBlock &BB) {
+ if (!MayContainIrreducibleControl.has_value())
+ MayContainIrreducibleControl = mayContainIrreducibleControl(*F, LI);
+ if (MayContainIrreducibleControl.value())
+ return true;
+ return LI->getLoopFor(&BB) != nullptr;
+ };
+
for (auto &It : AllocationInfos) {
AllocationInfo &AI = *It.second;
if (AI.Status == AllocationInfo::INVALID)
Size = SizeOffsetPair.first;
}
+ Instruction *IP = (!SizeAPI.has_value() || IsInLoop(*AI.CB->getParent()))
+ ? AI.CB
+ : &F->getEntryBlock().front();
+
Align Alignment(1);
if (MaybeAlign RetAlign = AI.CB->getRetAlign())
Alignment = std::max(Alignment, *RetAlign);
// TODO: Hoist the alloca towards the function entry.
unsigned AS = DL.getAllocaAddrSpace();
Instruction *Alloca = new AllocaInst(Type::getInt8Ty(F->getContext()), AS,
- Size, Alignment, "", AI.CB);
+ Size, Alignment, "", IP);
if (Alloca->getType() != AI.CB->getType())
Alloca = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
ret void
}
+define void @move_alloca() {
+; IS________OPM-LABEL: define {{[^@]+}}@move_alloca() {
+; IS________OPM-NEXT: entry:
+; IS________OPM-NEXT: br label [[NOT_ENTRY:%.*]]
+; IS________OPM: not_entry:
+; IS________OPM-NEXT: [[TMP0:%.*]] = tail call noalias i8* @__kmpc_alloc_shared(i64 noundef 4)
+; IS________OPM-NEXT: tail call void @usei8(i8* noalias nocapture nofree [[TMP0]]) #[[ATTR6]]
+; IS________OPM-NEXT: tail call void @__kmpc_free_shared(i8* noalias nocapture [[TMP0]], i64 noundef 4)
+; IS________OPM-NEXT: ret void
+;
+; IS________NPM-LABEL: define {{[^@]+}}@move_alloca() {
+; IS________NPM-NEXT: entry:
+; IS________NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1, addrspace(5)
+; IS________NPM-NEXT: br label [[NOT_ENTRY:%.*]]
+; IS________NPM: not_entry:
+; IS________NPM-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[TMP0]] to i8*
+; IS________NPM-NEXT: tail call void @usei8(i8* noalias nocapture nofree [[MALLOC_CAST]]) #[[ATTR6]]
+; IS________NPM-NEXT: ret void
+;
+entry:
+ br label %not_entry
+
+not_entry:
+ %0 = tail call noalias i8* @__kmpc_alloc_shared(i64 4)
+ tail call void @usei8(i8* nocapture nofree %0) willreturn nounwind nosync
+ tail call void @__kmpc_free_shared(i8* %0, i64 4)
+ ret void
+}
+
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind willreturn }
; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test_nested_memory
; IS__TUNIT_NPM-SAME: (float* nocapture nofree writeonly [[DST:%.*]], double* nocapture nofree readonly [[SRC:%.*]]) {
; IS__TUNIT_NPM-NEXT: entry:
+; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 24, align 1
; IS__TUNIT_NPM-NEXT: [[LOCAL:%.*]] = alloca [[STRUCT_STY:%.*]], align 8
-; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast %struct.STy* [[LOCAL]] to i8*
+; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = bitcast %struct.STy* [[LOCAL]] to i8*
; IS__TUNIT_NPM-NEXT: [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], %struct.STy* [[LOCAL]], i64 0, i32 2
-; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 24, align 1
-; IS__TUNIT_NPM-NEXT: [[DST1:%.*]] = bitcast i8* [[TMP1]] to float**
+; IS__TUNIT_NPM-NEXT: [[DST1:%.*]] = bitcast i8* [[TMP0]] to float**
; IS__TUNIT_NPM-NEXT: store float* [[DST]], float** [[DST1]], align 8
-; IS__TUNIT_NPM-NEXT: [[SRC2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 8
+; IS__TUNIT_NPM-NEXT: [[SRC2:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 8
; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[SRC2]] to double**
; IS__TUNIT_NPM-NEXT: store double* [[SRC]], double** [[TMP2]], align 8
-; IS__TUNIT_NPM-NEXT: store i8* [[TMP1]], i8** bitcast (%struct.STy** getelementptr inbounds ([[STRUCT_STY]], %struct.STy* @global, i64 0, i32 2) to i8**), align 8
+; IS__TUNIT_NPM-NEXT: store i8* [[TMP0]], i8** bitcast (%struct.STy** getelementptr inbounds ([[STRUCT_STY]], %struct.STy* @global, i64 0, i32 2) to i8**), align 8
; IS__TUNIT_NPM-NEXT: call fastcc void @nested_memory_callee() #[[ATTR15:[0-9]+]]
; IS__TUNIT_NPM-NEXT: ret void
;
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2
; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; AMDGPU-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[TMP0]] to i8*
; AMDGPU-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[MALLOC_CAST]] to i32*
; AMDGPU-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]]
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; NVPTX-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; NVPTX-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32*
; NVPTX-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]]
; NVPTX-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; AMDGPU-DISABLED-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[TMP0]] to i8*
; AMDGPU-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[MALLOC_CAST]] to i32*
; AMDGPU-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]]
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; NVPTX-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32*
; NVPTX-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]]
; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]]