}
}
- void buildRecordForGlobalizedVars(bool IsInTargetMasterThreadRegion) {
+ void buildRecordForGlobalizedVars(bool IsInTTDRegion) {
assert(!GlobalizedRD &&
"Record for globalized variables is built already.");
ArrayRef<const ValueDecl *> EscapedDeclsForParallel, EscapedDeclsForTeams;
- if (IsInTargetMasterThreadRegion)
+ if (IsInTTDRegion)
EscapedDeclsForTeams = EscapedDecls.getArrayRef();
else
EscapedDeclsForParallel = EscapedDecls.getArrayRef();
/// Returns the record that handles all the escaped local variables and used
/// instead of their original storage.
- const RecordDecl *getGlobalizedRecord(bool IsInTargetMasterThreadRegion) {
+ const RecordDecl *getGlobalizedRecord(bool IsInTTDRegion) {
if (!GlobalizedRD)
- buildRecordForGlobalizedVars(IsInTargetMasterThreadRegion);
+ buildRecordForGlobalizedVars(IsInTTDRegion);
return GlobalizedRD;
}
}
} Action(EST, WST);
CodeGen.setAction(Action);
+ IsInTTDRegion = true;
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
IsOffloadEntry, CodeGen);
+ IsInTTDRegion = false;
// Now change the name of the worker function to correspond to this target
// region's entry function.
}
} Action(*this, EST, D);
CodeGen.setAction(Action);
+ IsInTTDRegion = true;
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
IsOffloadEntry, CodeGen);
+ IsInTTDRegion = false;
}
static void
}
} Action(IsInParallelRegion);
CodeGen.setAction(Action);
+ bool PrevIsInTTDRegion = IsInTTDRegion;
+ IsInTTDRegion = false;
bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion;
IsInTargetMasterThreadRegion = false;
auto *OutlinedFun =
cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen));
IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
+ IsInTTDRegion = PrevIsInTTDRegion;
if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD &&
!IsInParallelRegion) {
llvm::Function *WrapperFun =
CheckVarsEscapingDeclContext VarChecker(CGF);
VarChecker.Visit(Body);
const RecordDecl *GlobalizedVarsRecord =
- VarChecker.getGlobalizedRecord(IsInTargetMasterThreadRegion);
+ VarChecker.getGlobalizedRecord(IsInTTDRegion);
ArrayRef<const ValueDecl *> EscapedVariableLengthDecls =
VarChecker.getEscapedVariableLengthDecls();
if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty())
for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
assert(VD->isCanonicalDecl() && "Expected canonical declaration");
const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
- Data.insert(
- std::make_pair(VD, MappedVarData(FD, IsInTargetMasterThreadRegion)));
+ Data.insert(std::make_pair(VD, MappedVarData(FD, IsInTTDRegion)));
}
- if (!IsInTargetMasterThreadRegion && !NeedToDelayGlobalization &&
- !IsInParallelRegion) {
+ if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) {
CheckVarsEscapingDeclContext VarChecker(CGF);
VarChecker.Visit(Body);
I->getSecond().SecondaryGlobalRecord =
- VarChecker.getGlobalizedRecord(/*IsInTargetMasterThreadRegion=*/true);
+ VarChecker.getGlobalizedRecord(/*IsInTTDRegion=*/true);
I->getSecond().SecondaryLocalVarData.emplace();
DeclToAddrMapTy &Data = I->getSecond().SecondaryLocalVarData.getValue();
for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
assert(VD->isCanonicalDecl() && "Expected canonical declaration");
const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
- Data.insert(std::make_pair(
- VD, MappedVarData(FD, /*IsInTargetMasterThreadRegion=*/true)));
+ Data.insert(
+ std::make_pair(VD, MappedVarData(FD, /*IsInTTDRegion=*/true)));
}
}
if (!NeedToDelayGlobalization) {
/// true if we're emitting the code for the target region and next parallel
/// region is L0 for sure.
bool IsInTargetMasterThreadRegion = false;
+ /// true if currently emitting code for target/teams/distribute region, false
+ /// - otherwise.
+ bool IsInTTDRegion = false;
/// true if we're definitely in the parallel region.
bool IsInParallelRegion = false;
// CK1: [[SHAREDARGS2:%.+]] = alloca i8**
// CK1: call void @__kmpc_kernel_init
// CK1: call void @__kmpc_data_sharing_init_stack
-// CK1: [[GLOBALSTACK:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 256, i16 0)
+// CK1: [[GLOBALSTACK:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 8, i16 0)
// CK1: [[GLOBALSTACK2:%.+]] = bitcast i8* [[GLOBALSTACK]] to %struct._globalized_locals_ty*
-// CK1: [[A_ARR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 0
-// CK1: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-// CK1: [[LID:%.+]] = and i32 [[TID]], 31
-// CK1: [[A:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[A_ARR]], i32 0, i32 [[LID]]
-// CK1: [[B_ARR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 1
-// CK1: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-// CK1: [[LID:%.+]] = and i32 [[TID]], 31
-// CK1: [[B:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[B_ARR]], i32 0, i32 [[LID]]
+// CK1: [[A:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 0
+// CK1: [[B:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 1
// CK1: store i32 10, i32* [[A]]
// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i16 1)
// CK1: call void @__kmpc_begin_sharing_variables(i8*** [[SHAREDARGS1]], i64 1)
// CHECK-32: [[A_ADDR:%.+]] = alloca i32,
// CHECK-64: [[A_ADDR:%.+]] = alloca i64,
// CHECK-64: [[CONV:%.+]] = bitcast i64* [[A_ADDR]] to i32*
-// CHECK: [[STACK:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 128, i16 0)
+// CHECK: [[STACK:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 4, i16 0)
// CHECK: [[BC:%.+]] = bitcast i8* [[STACK]] to %struct._globalized_locals_ty*
// CHECK-32: [[A:%.+]] = load i32, i32* [[A_ADDR]],
// CHECK-64: [[A:%.+]] = load i32, i32* [[CONV]],
-// CHECK: [[GLOBAL_A_ADDR_ARR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-// CHECK: [[LID:%.+]] = and i32 [[TID]], 31
-// CHECK: [[GLOBAL_A_ADDR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[GLOBAL_A_ADDR_ARR]], i32 0, i32 [[LID]]
+// CHECK: [[GLOBAL_A_ADDR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
// CHECK: store i32 [[A]], i32* [[GLOBAL_A_ADDR]],
// CHECK-LABEL: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable{{.*}})
// CK1: store {{.+}} 0, {{.+}},
// CK1: store i{{[0-9]+}} [[ARGC]], i{{[0-9]+}}* [[ARGCADDR]],
// CK1-64: [[CONV:%.+]] = bitcast i{{[0-9]+}}* [[ARGCADDR]] to i{{[0-9]+}}*
-// CK1: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 128, i16 0)
+// CK1: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0)
// CK1-64: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]]
// CK1-32: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[ARGCADDR]]
-// CK1: [[ARGCADDR_ARR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CK1: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-// CK1: [[LID:%.+]] = and i32 [[TID]], 31
-// CK1: [[ARGCADDR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[ARGCADDR_ARR]], i32 0, i32 [[LID]]
+// CK1: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
// CK1: store i{{[0-9]+}} [[ARG]], i{{[0-9]+}}* [[ARGCADDR]],
// CK1: store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
// CK1: [[ARGCADDR_PTR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[ARGCADDR_PTR]],
// CK1: [[ARGCADDR_PTR:%.+]] = alloca i{{.+}}***,
// CK1: [[ARGCADDR:%.+]] = alloca i{{.+}}**,
// CK1: store i{{.+}}** [[ARGC]], i{{.+}}*** [[ARGCADDR]]
-// CK1: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{128|256}}, i16 0)
+// CK1: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{4|8}}, i16 0)
// CK1: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]]
-// CK1: [[ARGCADDR_ARR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CK1: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-// CK1: [[LID:%.+]] = and i32 [[TID]], 31
-// CK1: [[ARGCADDR:%.+]] = getelementptr inbounds [32 x i8**], [32 x i8**]* [[ARGCADDR_ARR]], i32 0, i32 [[LID]]
+// CK1: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
// CK1: store i{{[0-9]+}}** [[ARG]], i{{[0-9]+}}*** [[ARGCADDR]],
// CK1: store i8*** [[ARGCADDR]], i8**** [[ARGCADDR_PTR]],
// CK1: [[ARGCADDR_PTR_REF:%.+]] = load i{{.+}}**, i{{.+}}*** [[ARGCADDR_PTR]],
// CK2-64: [[ACONV:%.+]] = bitcast i64* [[AADDR]] to i32*
// CK2-64: [[BCONV:%.+]] = bitcast i64* [[BADDR]] to i32*
// CK2-64: [[CONV:%.+]] = bitcast i64* [[ARGCADDR]] to i32*
-// CK2: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 128, i16 0)
+// CK2: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0)
// CK2-64: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]]
// CK2-32: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[ARGCADDR]]
-// CK2: [[ARGCADDR_ARR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CK2: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-// CK2: [[LID:%.+]] = and i32 [[TID]], 31
-// CK2: [[ARGCADDR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[ARGCADDR_ARR]], i32 0, i32 [[LID]]
+// CK2: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
// CK2: store i{{[0-9]+}} [[ARG]], i{{[0-9]+}}* [[ARGCADDR]],
// CK2: {{%.+}} = call i32 @__kmpc_global_thread_num(
// CK2: store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
// CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]],
// CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]],
// CK2: store i{{[0-9]+}}** [[ARGC]], i{{[0-9]+}}*** [[ARGCADDR]],
-// CK2: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{128|256}}, i16 0)
+// CK2: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{4|8}}, i16 0)
// CK2: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]]
-// CK2: [[ARGCADDR_ARR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CK2: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-// CK2: [[LID:%.+]] = and i32 [[TID]], 31
-// CK2: [[ARGCADDR:%.+]] = getelementptr inbounds [32 x i8**], [32 x i8**]* [[ARGCADDR_ARR]], i32 0, i32 [[LID]]
+// CK2: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
// CK2: store i{{[0-9]+}}** [[ARG]], i{{[0-9]+}}*** [[ARGCADDR]],
// CK2: {{%.+}} = call i32 @__kmpc_global_thread_num(
// CK2: store i{{[0-9]+}}*** [[ARGCADDR]], i{{[0-9]+}}**** [[ARGCADDR_PTR]],