Rename it to -fgpu-flush-denormals-to-zero.
Reviewed by: Artem Belevich
Differential Revision: https://reviews.llvm.org/D99688
HelpText<"Ignore environment variables to detect CUDA installation">;
def ptxas_path_EQ : Joined<["--"], "ptxas-path=">, Group<i_Group>,
HelpText<"Path to ptxas (used for compiling CUDA code)">;
+def fgpu_flush_denormals_to_zero : Flag<["-"], "fgpu-flush-denormals-to-zero">,
+ HelpText<"Flush denormal floating point values to zero in CUDA/HIP device mode.">;
+def fno_gpu_flush_denormals_to_zero : Flag<["-"], "fno-gpu-flush-denormals-to-zero">;
def fcuda_flush_denormals_to_zero : Flag<["-"], "fcuda-flush-denormals-to-zero">,
- HelpText<"Flush denormal floating point values to zero in CUDA device mode.">;
-def fno_cuda_flush_denormals_to_zero : Flag<["-"], "fno-cuda-flush-denormals-to-zero">;
+ Alias<fgpu_flush_denormals_to_zero>;
+def fno_cuda_flush_denormals_to_zero : Flag<["-"], "fno-cuda-flush-denormals-to-zero">,
+ Alias<fno_gpu_flush_denormals_to_zero>;
defm gpu_rdc : BoolFOption<"gpu-rdc",
LangOpts<"GPURelocatableDeviceCode">, DefaultFalse,
PosFlag<SetTrue, [CC1Option], "Generate relocatable device code, also known as separate compilation mode">,
auto Arch = getProcessorFromTargetID(getTriple(), JA.getOffloadingArch());
auto Kind = llvm::AMDGPU::parseArchAMDGCN(Arch);
if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
- DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
- options::OPT_fno_cuda_flush_denormals_to_zero,
+ DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
+ options::OPT_fno_gpu_flush_denormals_to_zero,
getDefaultDenormsAreZeroForTarget(Kind)))
return llvm::DenormalMode::getPreserveSign();
const llvm::fltSemantics *FPType) const {
if (JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
- DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
- options::OPT_fno_cuda_flush_denormals_to_zero,
- false))
+ DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
+ options::OPT_fno_gpu_flush_denormals_to_zero, false))
return llvm::DenormalMode::getPreserveSign();
}
// If --hip-device-lib is not set, add the default bitcode libraries.
// TODO: There are way too many flags that change this. Do we need to check
// them all?
- bool DAZ = DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
- options::OPT_fno_cuda_flush_denormals_to_zero,
+ bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
+ options::OPT_fno_gpu_flush_denormals_to_zero,
getDefaultDenormsAreZeroForTarget(Kind));
// TODO: Check standard C++ flags?
bool FiniteOnly = false;
// Checks that device function calls get emitted with the "denormal-fp-math-f32"
// attribute set when we compile CUDA device code with
// -fdenormal-fp-math-f32. Further, check that we reflect the presence or
-// absence of -fcuda-flush-denormals-to-zero in a module flag.
+// absence of -fgpu-flush-denormals-to-zero in a module flag.
// AMDGCN targets always have f64/f16 denormals enabled.
//
//
// For AMDGCN target with fast FMAF (e.g. gfx900), it has ieee denormals by
// default and preserve-sign when there with the option
-// -fcuda-flush-denormals-to-zero.
+// -fgpu-flush-denormals-to-zero.
// CHECK-LABEL: define void @foo() #0
extern "C" __device__ void foo() {}
// Checks that cuda compilation does the right thing when passed
-// -fcuda-flush-denormals-to-zero. This should be translated to
+// -fgpu-flush-denormals-to-zero. This should be translated to
// -fdenormal-fp-math-f32=preserve-sign
+// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+
+// Test alias options -f[no-]cuda-flush-denormals-to-zero
// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s
// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
-// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s
-// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
// Test explicit argument, with CUDA offload kind
-// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fcuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
-// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fno-cuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fgpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fno-gpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
// Test explicit argument, with HIP offload kind
-// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fcuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
-// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fno-cuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fgpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fno-gpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
-// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fcuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
-// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fno-cuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fgpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fno-gpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
// Test the default changing with no argument based on the subtarget in HIP mode
// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
// Test multiple offload archs with different defaults.
// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=MIXED-DEFAULT-MODE %s
-// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -fcuda-flush-denormals-to-zero --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZX2 %s
-// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -fno-cuda-flush-denormals-to-zero --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -fgpu-flush-denormals-to-zero --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZX2 %s
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -fno-gpu-flush-denormals-to-zero --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
// CPUFTZ-NOT: -fdenormal-fp-math
// Test explicit flag, opposite of target default.
// RUN: %clang -### -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx900 \
-// RUN: -fcuda-flush-denormals-to-zero \
+// RUN: -fgpu-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD
// Test explicit flag, opposite of target default.
// RUN: %clang -### -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx803 \
-// RUN: -fno-cuda-flush-denormals-to-zero \
+// RUN: -fno-gpu-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD
// Test explicit flag, same as target default.
// RUN: %clang -### -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx900 \
-// RUN: -fno-cuda-flush-denormals-to-zero \
+// RUN: -fno-gpu-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD
// Test explicit flag, same as target default.
// RUN: %clang -### -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx803 \
-// RUN: -fcuda-flush-denormals-to-zero \
+// RUN: -fgpu-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD
// Test last flag wins, not flushing
// RUN: %clang -### -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx803 \
-// RUN: -fcuda-flush-denormals-to-zero -fno-cuda-flush-denormals-to-zero \
+// RUN: -fgpu-flush-denormals-to-zero -fno-gpu-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD
// RUN: %clang -### -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx900 \
-// RUN: -fcuda-flush-denormals-to-zero -fno-cuda-flush-denormals-to-zero \
+// RUN: -fgpu-flush-denormals-to-zero -fno-gpu-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD
// RUN: %clang -### -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx900 \
-// RUN: -fno-cuda-flush-denormals-to-zero -fcuda-flush-denormals-to-zero \
+// RUN: -fno-gpu-flush-denormals-to-zero -fgpu-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD
// RUN: %clang -### -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx803 \
-// RUN: -fno-cuda-flush-denormals-to-zero -fcuda-flush-denormals-to-zero \
+// RUN: -fno-gpu-flush-denormals-to-zero -fgpu-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD