bool IsTargetTask =
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
isOpenMPTargetExecutionDirective(D.getDirectiveKind());
- // For target-based directives skip 3 firstprivate arrays BasePointersArray,
- // PointersArray and SizesArray. The original variables for these arrays are
- // not captured and we get their addresses explicitly.
+ // For target-based directives skip 4 firstprivate arrays BasePointersArray,
+ // PointersArray, SizesArray, and MappersArray. The original variables for
+ // these arrays are not captured and we get their addresses explicitly.
if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
(IsTargetTask && KmpTaskSharedsPtr.isValid())) {
SrcBase = CGF.MakeAddrLValue(
if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
const VarDecl *OriginalVD = Pair.second.Original;
// Check if the variable is the target-based BasePointersArray,
- // PointersArray or SizesArray.
+ // PointersArray, SizesArray, or MappersArray.
LValue SharedRefLValue;
QualType Type = PrivateLValue.getType();
const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
}
}
+namespace {
+/// Additional arguments for emitOffloadingArraysArgument function.
+struct ArgumentsOptions {
+ bool ForEndCall = false;
+ bool IsTask = false;
+ ArgumentsOptions() = default;
+ ArgumentsOptions(bool ForEndCall, bool IsTask)
+ : ForEndCall(ForEndCall), IsTask(IsTask) {}
+};
+} // namespace
+
/// Emit the arguments to be passed to the runtime library based on the
/// arrays of base pointers, pointers, sizes, map types, and mappers. If
/// ForEndCall, emit map types to be passed for the end of the region instead of
CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
llvm::Value *&MapTypesArrayArg, llvm::Value *&MappersArrayArg,
- CGOpenMPRuntime::TargetDataInfo &Info, bool ForEndCall = false) {
- assert((!ForEndCall || Info.separateBeginEndCalls()) &&
+ CGOpenMPRuntime::TargetDataInfo &Info,
+ const ArgumentsOptions &Options = ArgumentsOptions()) {
+ assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
"expected region end call to runtime only when end call is separate");
CodeGenModule &CGM = CGF.CGM;
if (Info.NumberOfPtrs) {
/*Idx0=*/0, /*Idx1=*/0);
MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
- ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
- : Info.MapTypesArray,
+ Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
+ : Info.MapTypesArray,
/*Idx0=*/0,
/*Idx1=*/0);
- MappersArrayArg =
- Info.HasMapper
- ? CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy)
- : llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
+ // Always emit the mapper array address in case of a target task for
+ // privatization.
+ if (!Options.IsTask && !Info.HasMapper)
+ MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
+ else
+ MappersArrayArg =
+ CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
} else {
BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
TargetDataInfo Info;
// Fill up the arrays and create the arguments.
emitOffloadingArrays(CGF, CombinedInfo, Info);
+ bool HasDependClauses = D.hasClausesOfKind<OMPDependClause>();
emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
Info.PointersArray, Info.SizesArray,
- Info.MapTypesArray, Info.MappersArray, Info);
+ Info.MapTypesArray, Info.MappersArray, Info,
+ {/*ForEndTask=*/false, HasDependClauses});
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
InputInfo.BasePointersArray =
Address(Info.BasePointersArray, CGM.getPointerAlign());
llvm::Value *MappersArrayArg = nullptr;
emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
SizesArrayArg, MapTypesArrayArg,
- MappersArrayArg, Info, /*ForEndCall=*/false);
+ MappersArrayArg, Info);
// Emit device ID if any.
llvm::Value *DeviceID = nullptr;
llvm::Value *MappersArrayArg = nullptr;
emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
SizesArrayArg, MapTypesArrayArg,
- MappersArrayArg, Info, /*ForEndCall=*/true);
+ MappersArrayArg, Info,
+ {/*ForEndCall=*/true, /*IsTask=*/false});
// Emit device ID if any.
llvm::Value *DeviceID = nullptr;
TargetDataInfo Info;
// Fill up the arrays and create the arguments.
emitOffloadingArrays(CGF, CombinedInfo, Info);
+ bool HasDependClauses = D.hasClausesOfKind<OMPDependClause>();
emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
Info.PointersArray, Info.SizesArray,
- Info.MapTypesArray, Info.MappersArray, Info);
+ Info.MapTypesArray, Info.MappersArray, Info,
+ {/*ForEndTask=*/false, HasDependClauses});
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
InputInfo.BasePointersArray =
Address(Info.BasePointersArray, CGM.getPointerAlign());
Address(Info.SizesArray, CGM.getPointerAlign());
InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
MapTypesArray = Info.MapTypesArray;
- if (D.hasClausesOfKind<OMPDependClause>())
+ if (HasDependClauses)
CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
else
emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 }
-// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [2 x i64] [i64 0, i64 4]
-// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [2 x i64] [i64 544, i64 800]
+// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 {{16|12}}]
+// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [3 x i64] [i64 544, i64 800, i64 3]
// CHECK-DAG: @{{.*}} = weak constant i8 0
// TCHECK: @{{.+}} = weak constant [[ENTTY]]
ty Y;
};
+#pragma omp declare mapper(id \
+ : TT <long long, char> \
+ s) map(s.X, s.Y)
int global;
extern int global;
// CHECK: [[BOOL:%.+]] = icmp ne i32 %{{.+}}, 0
// CHECK: br i1 [[BOOL]], label %[[THEN:.+]], label %[[ELSE:.+]]
// CHECK: [[THEN]]:
- // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], i32 0, i32 0
- // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0
+ // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
+ // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
+ // CHECK-DAG: [[MADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[M:%.+]], i[[SZ]] 0, i[[SZ]] 0
// CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]**
// CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]**
// CHECK-DAG: store i[[SZ]]* [[BP0:%[^,]+]], i[[SZ]]** [[CBPADDR0]]
// CHECK-DAG: store i[[SZ]]* [[BP0]], i[[SZ]]** [[CPADDR0]]
+ // CHECK-DAG: store i8* null, i8** [[MADDR0]],
- // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
- // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
+ // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 1
+ // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 1
+ // CHECK-DAG: [[MADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[M]], i[[SZ]] 0, i[[SZ]] 1
// CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
// CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
// CHECK-DAG: store i[[SZ]] [[BP1:%[^,]+]], i[[SZ]]* [[CBPADDR1]]
// CHECK-DAG: store i[[SZ]] [[BP1]], i[[SZ]]* [[CPADDR1]]
- // CHECK-DAG: getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
- // CHECK-DAG: getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+ // CHECK-DAG: store i8* null, i8** [[MADDR1]],
+
+ // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 2
+ // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 2
+ // CHECK-DAG: [[MADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[M]], i[[SZ]] 0, i[[SZ]] 2
+ // CHECK-DAG: [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to [[STRUCT_TT:%.+]]**
+ // CHECK-DAG: [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to [[STRUCT_TT]]**
+ // CHECK-DAG: store [[STRUCT_TT]]* [[D_ADDR:%.+]], [[STRUCT_TT]]** [[CBPADDR2]]
+ // CHECK-DAG: store [[STRUCT_TT]]* [[D_ADDR]], [[STRUCT_TT]]** [[CPADDR2]]
+ // CHECK-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MAPPER_ID:@.+]] to i8*), i8** [[MADDR2]],
+
+ // CHECK-DAG: [[BP_START:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 0
+ // CHECK-DAG: [[P_START:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 0
+ // CHECK-DAG: [[M_START:%.+]] = bitcast [3 x i8*]* [[M]] to i8**
// CHECK: [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
// CHECK: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
// CHECK: store i32 [[DEV]], i32* [[GEP]],
// CHECK: [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
// CHECK: [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
- // CHECK: [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @{{.*}}, i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+ // CHECK: [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @{{.*}}, i32 [[GTID]], i32 1, i[[SZ]] {{152|88}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
// CHECK: [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
+ // CHECK: [[BASE:%.+]] = getelementptr inbounds [[TASK_TY1_]], [[TASK_TY1_]]* [[BC_TASK]], i32 0, i32 1
+ // CHECK-64: [[BP_BASE:%.+]] = getelementptr inbounds [[PRIVS_TY:%.+]], [[PRIVS_TY:%.+]]* [[BASE]], i32 0, i32 1
+ // CHECK-64: [[BP_CAST:%.+]] = bitcast [3 x i8*]* [[BP_BASE]] to i8*
+ // CHECK-64: [[BP_SRC:%.+]] = bitcast i8** [[BP_START]] to i8*
+ // CHECK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[BP_CAST]], i8* align 8 [[BP_SRC]], i64 24, i1 false)
+ // CHECK-64: [[P_BASE:%.+]] = getelementptr inbounds [[PRIVS_TY]], [[PRIVS_TY]]* [[BASE]], i32 0, i32 2
+ // CHECK-64: [[P_CAST:%.+]] = bitcast [3 x i8*]* [[P_BASE]] to i8*
+ // CHECK-64: [[P_SRC:%.+]] = bitcast i8** [[P_START]] to i8*
+ // CHECK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[P_CAST]], i8* align 8 [[P_SRC]], i64 24, i1 false)
+ // CHECK-64: [[SZ_BASE:%.+]] = getelementptr inbounds [[PRIVS_TY]], [[PRIVS_TY]]* [[BASE]], i32 0, i32 3
+ // CHECK-64: [[SZ_CAST:%.+]] = bitcast [3 x i64]* [[SZ_BASE]] to i8*
+ // CHECK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SZ_CAST]], i8* align 8 bitcast ([3 x i64]* [[SIZET]] to i8*), i64 24, i1 false)
+ // CHECK-64: [[M_BASE:%.+]] = getelementptr inbounds [[PRIVS_TY]], [[PRIVS_TY]]* [[BASE]], i32 0, i32 4
+ // CHECK-64: [[M_CAST:%.+]] = bitcast [3 x i8*]* [[M_BASE]] to i8*
+ // CHECK-64: [[M_SRC:%.+]] = bitcast i8** [[M_START]] to i8*
+ // CHECK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[M_CAST]], i8* align 8 [[M_SRC]], i64 24, i1 false)
+ // CHECK-32: [[SZ_BASE:%.+]] = getelementptr inbounds [[PRIVS_TY:%.+]], [[PRIVS_TY:%.+]]* [[BASE]], i32 0, i32 0
+ // CHECK-32: [[SZ_CAST:%.+]] = bitcast [3 x i64]* [[SZ_BASE]] to i8*
+ // CHECK-32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[SZ_CAST]], i8* align 4 bitcast ([3 x i64]* [[SIZET]] to i8*), i32 24, i1 false)
+ // CHECK-32: [[BP_BASE:%.+]] = getelementptr inbounds [[PRIVS_TY]], [[PRIVS_TY]]* [[BASE]], i32 0, i32 3
+ // CHECK-32: [[BP_CAST:%.+]] = bitcast [3 x i8*]* [[BP_BASE]] to i8*
+ // CHECK-32: [[BP_SRC:%.+]] = bitcast i8** [[BP_START]] to i8*
+ // CHECK-32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[BP_CAST]], i8* align 4 [[BP_SRC]], i32 12, i1 false)
+ // CHECK-32: [[P_BASE:%.+]] = getelementptr inbounds [[PRIVS_TY]], [[PRIVS_TY]]* [[BASE]], i32 0, i32 4
+ // CHECK-32: [[P_CAST:%.+]] = bitcast [3 x i8*]* [[P_BASE]] to i8*
+ // CHECK-32: [[P_SRC:%.+]] = bitcast i8** [[P_START]] to i8*
+ // CHECK-32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[P_CAST]], i8* align 4 [[P_SRC]], i32 12, i1 false)
+ // CHECK-32: [[M_BASE:%.+]] = getelementptr inbounds [[PRIVS_TY]], [[PRIVS_TY]]* [[BASE]], i32 0, i32 5
+ // CHECK-32: [[M_CAST:%.+]] = bitcast [3 x i8*]* [[M_BASE]] to i8*
+ // CHECK-32: [[M_SRC:%.+]] = bitcast i8** [[M_START]] to i8*
+ // CHECK-32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[M_CAST]], i8* align 4 [[M_SRC]], i32 12, i1 false)
// CHECK: getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP_START:%.+]], i[[SZ]] 1
// CHECK: getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP_START]], i[[SZ]] 2
// CHECK: [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
// CHECK: br label %[[EXIT:.+]]
// CHECK: [[EXIT]]:
-#pragma omp target device(global + a) nowait depend(inout \
- : global, a, bn) if (a)
+#pragma omp target device(global + a) nowait depend(inout \
+ : global, a, bn) if (a) map(mapper(id), tofrom \
+ : d)
{
static int local1;
*plocal = global;
// CHECK: define internal void [[HVT1:@.+]](i[[SZ]]* %{{.+}}, i[[SZ]] %{{.+}})
-// CHECK: define internal{{.*}} i32 [[TASK_ENTRY1_]](i32{{.*}}, [[TASK_TY1_]]* noalias %1)
-// CHECK: call void (i8*, ...) %
-// CHECK: [[SZT:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+// CHECK: define internal void [[MAPPER_ID]](i8* %{{.+}}, i8* %{{.+}}, i8* %{{.+}}, i64 %{{.+}}, i64 %{{.+}})
+
+// CHECK: define internal{{.*}} i32 [[TASK_ENTRY1_]](i32{{.*}}, [[TASK_TY1_]]* noalias %{{.+}})
+// CHECK: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, i[[SZ]]*** %{{.+}}, i32** %{{.+}}, [3 x i8*]** [[BPTR_ADDR:%.+]], [3 x i8*]** [[PTR_ADDR:%.+]], [3 x i64]** [[SZ_ADDR:%.+]], [3 x i8*]** [[M_ADDR:%.+]])
+// CHECK: [[BPTR_REF:%.+]] = load [3 x i8*]*, [3 x i8*]** [[BPTR_ADDR]],
+// CHECK: [[PTR_REF:%.+]] = load [3 x i8*]*, [3 x i8*]** [[PTR_ADDR]],
+// CHECK: [[SZ_REF:%.+]] = load [3 x i64]*, [3 x i64]** [[SZ_ADDR]],
+// CHECK: [[M_REF:%.+]] = load [3 x i8*]*, [3 x i8*]** [[M_ADDR]],
+// CHECK: [[BPR:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPTR_REF]], i[[SZ]] 0, i[[SZ]] 0
+// CHECK: [[PR:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTR_REF]], i[[SZ]] 0, i[[SZ]] 0
+// CHECK: [[SZT:%.+]] = getelementptr inbounds [3 x i64], [3 x i64]* [[SZ_REF]], i[[SZ]] 0, i[[SZ]] 0
+// CHECK: [[M:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[M_REF]], i[[SZ]] 0, i[[SZ]] 0
// CHECK: [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
// CHECK: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
// CHECK: [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK: [[RET:%.+]] = call i32 @__tgt_target_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]])
+// CHECK: [[RET:%.+]] = call i32 @__tgt_target_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 3, i8** [[BPR]], i8** [[PR]], i64* [[SZT]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M]])
// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]