/// environment for the given \p FPType if given. Otherwise, the default
/// assumed mode for any floating point type.
virtual llvm::DenormalMode getDefaultDenormalModeForType(
- const llvm::opt::ArgList &DriverArgs,
- Action::OffloadKind DeviceOffloadKind,
+ const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
const llvm::fltSemantics *FPType = nullptr) const {
return llvm::DenormalMode::getIEEE();
}
}
llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
- const llvm::opt::ArgList &DriverArgs, Action::OffloadKind DeviceOffloadKind,
+ const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
const llvm::fltSemantics *FPType) const {
// Denormals should always be enabled for f16 and f64.
if (!FPType || FPType != &llvm::APFloat::IEEEsingle())
return llvm::DenormalMode::getIEEE();
- if (DeviceOffloadKind == Action::OFK_Cuda) {
+ if (JA.getOffloadingDeviceKind() == Action::OFK_HIP ||
+ JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
+ auto Kind = llvm::AMDGPU::parseArchAMDGCN(JA.getOffloadingArch());
if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
options::OPT_fno_cuda_flush_denormals_to_zero,
- false))
+ getDefaultDenormsAreZeroForTarget(Kind)))
return llvm::DenormalMode::getPreserveSign();
+
+ return llvm::DenormalMode::getIEEE();
}
const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
// them all?
bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
getDefaultDenormsAreZeroForTarget(Kind);
- // Outputs are flushed to zero, preserving sign
+
+ // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are
+ // also implicit treated as zero (DAZ).
return DAZ ? llvm::DenormalMode::getPreserveSign() :
llvm::DenormalMode::getIEEE();
}
static bool getDefaultDenormsAreZeroForTarget(llvm::AMDGPU::GPUKind GPUKind);
llvm::DenormalMode getDefaultDenormalModeForType(
- const llvm::opt::ArgList &DriverArgs,
- Action::OffloadKind DeviceOffloadKind,
+ const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
const llvm::fltSemantics *FPType = nullptr) const override;
};
static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
bool OFastEnabled, const ArgList &Args,
ArgStringList &CmdArgs,
- Action::OffloadKind DeviceOffloadKind) {
+ const JobAction &JA) {
// Handle various floating point optimization flags, mapping them to the
// appropriate LLVM code generation flags. This is complicated by several
// "umbrella" flags, so we do this by stepping through the flags incrementally
// -ffp-exception-behavior options: strict, maytrap, ignore
StringRef FPExceptionBehavior = "";
const llvm::DenormalMode DefaultDenormalFPMath =
- TC.getDefaultDenormalModeForType(Args, DeviceOffloadKind);
+ TC.getDefaultDenormalModeForType(Args, JA);
const llvm::DenormalMode DefaultDenormalFP32Math =
- TC.getDefaultDenormalModeForType(Args, DeviceOffloadKind,
- &llvm::APFloat::IEEEsingle());
+ TC.getDefaultDenormalModeForType(Args, JA, &llvm::APFloat::IEEEsingle());
llvm::DenormalMode DenormalFPMath = DefaultDenormalFPMath;
llvm::DenormalMode DenormalFP32Math = DefaultDenormalFP32Math;
CmdArgs.push_back("-mdisable-tail-calls");
RenderFloatingPointOptions(TC, D, isOptimizationLevelFast(Args), Args,
- CmdArgs, JA.getOffloadingDeviceKind());
+ CmdArgs, JA);
// Render ABI arguments
switch (TC.getArch()) {
if (Args.hasArg(options::OPT_fsplit_stack))
CmdArgs.push_back("-split-stacks");
- RenderFloatingPointOptions(TC, D, OFastEnabled, Args, CmdArgs,
- JA.getOffloadingDeviceKind());
+ RenderFloatingPointOptions(TC, D, OFastEnabled, Args, CmdArgs, JA);
if (Arg *A = Args.getLastArg(options::OPT_mdouble_EQ)) {
if (TC.getArch() == llvm::Triple::avr)
}
llvm::DenormalMode CudaToolChain::getDefaultDenormalModeForType(
- const llvm::opt::ArgList &DriverArgs, Action::OffloadKind DeviceOffloadKind,
+ const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
const llvm::fltSemantics *FPType) const {
- if (DeviceOffloadKind == Action::OFK_Cuda) {
+ if (JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
options::OPT_fno_cuda_flush_denormals_to_zero,
return llvm::DenormalMode::getPreserveSign();
}
- assert(DeviceOffloadKind != Action::OFK_Host);
+ assert(JA.getOffloadingDeviceKind() != Action::OFK_Host);
return llvm::DenormalMode::getIEEE();
}
Action::OffloadKind DeviceOffloadKind) const override;
llvm::DenormalMode getDefaultDenormalModeForType(
- const llvm::opt::ArgList &DriverArgs,
- Action::OffloadKind DeviceOffloadKind,
+ const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
const llvm::fltSemantics *FPType = nullptr) const override;
// Never try to use the integrated assembler with CUDA; always fork out to
ToolChain::addProfileRTLibs(Args, CmdArgs);
}
-llvm::DenormalMode Linux::getDefaultDenormalModeForType(
- const llvm::opt::ArgList &DriverArgs,
- Action::OffloadKind DeviceOffloadKind,
- const llvm::fltSemantics *FPType) const {
+llvm::DenormalMode
+Linux::getDefaultDenormalModeForType(const llvm::opt::ArgList &DriverArgs,
+ const JobAction &JA,
+ const llvm::fltSemantics *FPType) const {
switch (getTriple().getArch()) {
case llvm::Triple::x86:
case llvm::Triple::x86_64: {
std::vector<std::string> ExtraOpts;
llvm::DenormalMode getDefaultDenormalModeForType(
- const llvm::opt::ArgList &DriverArgs,
- Action::OffloadKind DeviceOffloadKind,
- const llvm::fltSemantics *FPType = nullptr) const override;
+ const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
+ const llvm::fltSemantics *FPType = nullptr) const override;
protected:
Tool *buildAssembler() const override;
Action::OffloadKind DeviceOffloadingKind) const override;
llvm::DenormalMode getDefaultDenormalModeForType(
- const llvm::opt::ArgList &DriverArgs,
- Action::OffloadKind DeviceOffloadKind,
- const llvm::fltSemantics *FPType) const override {
+ const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
+ const llvm::fltSemantics *FPType) const override {
// DAZ and FTZ are on by default.
return llvm::DenormalMode::getPreserveSign();
}
// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s
// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
-// Test explicit argument.
+// Test explicit argument, with CUDA offload kind
// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fcuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fno-cuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+
+// Test explicit argument, with HIP offload kind
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fcuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fno-cuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+
// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fcuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fno-cuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
-// Test the default changing with no argument based on the subtarget.
+// Test the default changing with no argument based on the subtarget in HIP mode
// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+
+// Test multiple offload archs with different defaults.
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=MIXED-DEFAULT-MODE %s
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -fcuda-flush-denormals-to-zero --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZX2 %s
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -fno-cuda-flush-denormals-to-zero --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+
+
// CPUFTZ-NOT: -fdenormal-fp-math
// FTZ-NOT: -fdenormal-fp-math-f32=
// The default of ieee is omitted
// NOFTZ-NOT: "-fdenormal-fp-math"
// NOFTZ-NOT: "-fdenormal-fp-math-f32"
+
+// MIXED-DEFAULT-MODE-NOT: -denormal-fp-math
+// MIXED-DEFAULT-MODE: "-fdenormal-fp-math-f32=preserve-sign,preserve-sign"
+// MIXED-DEFAULT-MODE-SAME: "-target-cpu" "gfx803"
+// MIXED-DEFAULT-MODE-NOT: -denormal-fp-math
+
+// FTZX2: "-fdenormal-fp-math-f32=preserve-sign,preserve-sign"
+// FTZX2-SAME: "-target-cpu" "gfx803"
+// FTZX2: "-fdenormal-fp-math-f32=preserve-sign,preserve-sign"
+// FTZX2-SAME: "-target-cpu" "gfx900"