LANGOPT(OpenMPThreadSubscription , 1, 0, "Assume work-shared loops do not have more iterations than participating threads.")
LANGOPT(OpenMPTeamSubscription , 1, 0, "Assume distributed loops do not have more iterations than participating teams.")
LANGOPT(OpenMPNoThreadState , 1, 0, "Assume that no thread in a parallel region will modify an ICV.")
+LANGOPT(OpenMPOffloadMandatory , 1, 0, "Assert that offloading is mandatory and do not create a host fallback.")
LANGOPT(RenderScript , 1, 0, "RenderScript")
LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device")
Flags<[CC1Option, NoArgumentUnused, HelpHidden]>,
HelpText<"Assert no thread in a parallel region modifies an ICV">,
MarshallingInfoFlag<LangOpts<"OpenMPNoThreadState">>;
+def fopenmp_offload_mandatory : Flag<["-"], "fopenmp-offload-mandatory">, Group<f_Group>,
+ Flags<[CC1Option, NoArgumentUnused]>,
+ HelpText<"Do not create a host fallback if offloading to the device fails.">,
+ MarshallingInfoFlag<LangOpts<"OpenMPOffloadMandatory">>;
defm openmp_target_new_runtime: BoolFOption<"openmp-target-new-runtime",
LangOpts<"OpenMPTargetNewRuntime">, DefaultTrue,
PosFlag<SetTrue, [CC1Option], "Use the new bitcode library for OpenMP offloading">,
// mangled name of the function that encloses the target region and BB is the
// line number of the target region.
+ const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice ||
+ !CGM.getLangOpts().OpenMPOffloadMandatory;
unsigned DeviceID;
unsigned FileID;
unsigned Line;
CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
- OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
+ if (BuildOutlinedFn)
+ OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
// If this target outline function is not an offload entry, we don't need to
// register it.
llvm::Constant::getNullValue(CGM.Int8Ty), Name);
}
+ // If we do not allow host fallback we still need a named address to use.
+ llvm::Constant *TargetRegionEntryAddr = OutlinedFn;
+ if (!BuildOutlinedFn) {
+ assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) &&
+ "Named kernel already exists?");
+ TargetRegionEntryAddr = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
+ llvm::GlobalValue::InternalLinkage,
+ llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName);
+ }
+
// Register the information for the entry associated with this target region.
OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
- DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
+ DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID,
OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
// Add NumTeams and ThreadLimit attributes to the outlined GPU function
int32_t DefaultValTeams = -1;
getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
- if (DefaultValTeams > 0) {
+ if (DefaultValTeams > 0 && OutlinedFn) {
OutlinedFn->addFnAttr("omp_target_num_teams",
std::to_string(DefaultValTeams));
}
int32_t DefaultValThreads = -1;
getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
- if (DefaultValThreads > 0) {
+ if (DefaultValThreads > 0 && OutlinedFn) {
OutlinedFn->addFnAttr("omp_target_thread_limit",
std::to_string(DefaultValThreads));
}
- CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
+ if (BuildOutlinedFn)
+ CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
}
/// Checks if the expression is constant or does not have non-trivial function
if (!CGF.HaveInsertPoint())
return;
- assert(OutlinedFn && "Invalid outlined function!");
+ const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice &&
+ CGM.getLangOpts().OpenMPOffloadMandatory;
+
+ assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
D.hasClausesOfKind<OMPNowaitClause>();
CodeGenFunction::OMPTargetDataInfo InputInfo;
llvm::Value *MapTypesArray = nullptr;
llvm::Value *MapNamesArray = nullptr;
- // Fill up the pointer arrays and transfer execution to the device.
- auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
- &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
- &CapturedVars,
- SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
- if (Device.getInt() == OMPC_DEVICE_ancestor) {
- // Reverse offloading is not supported, so just execute on the host.
+ // Generate code for the host fallback function.
+ auto &&FallbackGen = [this, OutlinedFn, OutlinedFnID, &D, &CapturedVars,
+ RequiresOuterTask, &CS,
+ OffloadingMandatory](CodeGenFunction &CGF) {
+ if (OffloadingMandatory) {
+ CGF.Builder.CreateUnreachable();
+ } else {
if (RequiresOuterTask) {
CapturedVars.clear();
CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
}
emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
+ }
+ };
+ // Fill up the pointer arrays and transfer execution to the device.
+ auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
+ &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
+ &CapturedVars, SizeEmitter,
+ FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
+ if (Device.getInt() == OMPC_DEVICE_ancestor) {
+ // Reverse offloading is not supported, so just execute on the host.
+ FallbackGen(CGF);
return;
}
CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
CGF.EmitBlock(OffloadFailedBlock);
- if (RequiresOuterTask) {
- CapturedVars.clear();
- CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
- }
- emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
+ FallbackGen(CGF);
+
CGF.EmitBranch(OffloadContBlock);
CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
};
// Notify that the host version must be executed.
- auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
- RequiresOuterTask](CodeGenFunction &CGF,
- PrePostActionTy &) {
- if (RequiresOuterTask) {
- CapturedVars.clear();
- CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
- }
- emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
+ auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, RequiresOuterTask,
+ FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
+ FallbackGen(CGF);
};
auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
if (CGM.getLangOpts().OMPTargetTriples.empty())
IsOffloadEntry = false;
+ if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) {
+ unsigned DiagID = CGM.getDiags().getCustomDiagID(
+ DiagnosticsEngine::Error,
+ "No offloading entry generated while offloading is mandatory.");
+ CGM.getDiags().Report(DiagID);
+ }
+
assert(CGF.CurFuncDecl && "No parent declaration for target region!");
StringRef ParentName;
// In case we have Ctors/Dtors we use the complete type variant to produce
CmdArgs.push_back("-fopenmp-assume-threads-oversubscription");
if (Args.hasArg(options::OPT_fopenmp_assume_no_thread_state))
CmdArgs.push_back("-fopenmp-assume-no-thread-state");
+ if (Args.hasArg(options::OPT_fopenmp_offload_mandatory))
+ CmdArgs.push_back("-fopenmp-offload-mandatory");
break;
default:
// By default, if Clang doesn't know how to generate useful OpenMP code
<< HostDevTy;
return;
}
- if (!LangOpts.OpenMPIsDevice && DevTy &&
+ if (!LangOpts.OpenMPIsDevice && !LangOpts.OpenMPOffloadMandatory && DevTy &&
*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) {
// Diagnose nohost function called during host codegen.
StringRef NoHostDevTy = getOpenMPSimpleClauseTypeName(
--- /dev/null
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+"
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-offload-mandatory -emit-llvm %s -o - | FileCheck %s --check-prefix=MANDATORY
+// expected-no-diagnostics
+
+void foo() {}
+#pragma omp declare target(foo)
+
+void bar() {}
+#pragma omp declare target device_type(nohost) to(bar)
+
+void host() {
+#pragma omp target
+ { bar(); }
+}
+
+void host_if(bool cond) {
+#pragma omp target if(cond)
+ { bar(); }
+}
+
+void host_dev(int device) {
+#pragma omp target device(device)
+ { bar(); }
+}
+// MANDATORY-LABEL: define {{[^@]+}}@_Z3foov
+// MANDATORY-SAME: () #[[ATTR0:[0-9]+]] {
+// MANDATORY-NEXT: entry:
+// MANDATORY-NEXT: ret void
+//
+//
+// MANDATORY-LABEL: define {{[^@]+}}@_Z4hostv
+// MANDATORY-SAME: () #[[ATTR0]] {
+// MANDATORY-NEXT: entry:
+// MANDATORY-NEXT: [[TMP0:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4hostv_l12.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null)
+// MANDATORY-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
+// MANDATORY-NEXT: br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// MANDATORY: omp_offload.failed:
+// MANDATORY-NEXT: unreachable
+// MANDATORY: omp_offload.cont:
+// MANDATORY-NEXT: ret void
+//
+//
+// MANDATORY-LABEL: define {{[^@]+}}@_Z7host_ifb
+// MANDATORY-SAME: (i1 noundef zeroext [[COND:%.*]]) #[[ATTR0]] {
+// MANDATORY-NEXT: entry:
+// MANDATORY-NEXT: [[COND_ADDR:%.*]] = alloca i8, align 1
+// MANDATORY-NEXT: [[FROMBOOL:%.*]] = zext i1 [[COND]] to i8
+// MANDATORY-NEXT: store i8 [[FROMBOOL]], i8* [[COND_ADDR]], align 1
+// MANDATORY-NEXT: [[TMP0:%.*]] = load i8, i8* [[COND_ADDR]], align 1
+// MANDATORY-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
+// MANDATORY-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// MANDATORY: omp_if.then:
+// MANDATORY-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7host_ifb_l17.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null)
+// MANDATORY-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+// MANDATORY-NEXT: br i1 [[TMP2]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// MANDATORY: omp_offload.failed:
+// MANDATORY-NEXT: unreachable
+// MANDATORY: omp_offload.cont:
+// MANDATORY-NEXT: br label [[OMP_IF_END:%.*]]
+// MANDATORY: omp_if.else:
+// MANDATORY-NEXT: unreachable
+// MANDATORY: omp_if.end:
+// MANDATORY-NEXT: ret void
+//
+//
+// MANDATORY-LABEL: define {{[^@]+}}@_Z8host_devi
+// MANDATORY-SAME: (i32 noundef signext [[DEVICE:%.*]]) #[[ATTR0]] {
+// MANDATORY-NEXT: entry:
+// MANDATORY-NEXT: [[DEVICE_ADDR:%.*]] = alloca i32, align 4
+// MANDATORY-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// MANDATORY-NEXT: store i32 [[DEVICE]], i32* [[DEVICE_ADDR]], align 4
+// MANDATORY-NEXT: [[TMP0:%.*]] = load i32, i32* [[DEVICE_ADDR]], align 4
+// MANDATORY-NEXT: store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// MANDATORY-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// MANDATORY-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
+// MANDATORY-NEXT: [[TMP3:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 [[TMP2]], i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8host_devi_l22.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null)
+// MANDATORY-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+// MANDATORY-NEXT: br i1 [[TMP4]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// MANDATORY: omp_offload.failed:
+// MANDATORY-NEXT: unreachable
+// MANDATORY: omp_offload.cont:
+// MANDATORY-NEXT: ret void
+//
+//
+// MANDATORY-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
+// MANDATORY-SAME: () #[[ATTR3:[0-9]+]] {
+// MANDATORY-NEXT: entry:
+// MANDATORY-NEXT: call void @__tgt_register_requires(i64 1)
+// MANDATORY-NEXT: ret void
+//