[OpenMP] Overload `std::isnan` and friends multiple times for the GPU

author Johannes Doerfert <johannes@jdoerfert.de>

Wed, 12 Aug 2020 21:45:46 +0000 (16:45 -0500)

committer Johannes Doerfert <johannes@jdoerfert.de>

Wed, 16 Sep 2020 18:37:09 +0000 (13:37 -0500)
author Johannes Doerfert <johannes@jdoerfert.de>
Wed, 12 Aug 2020 21:45:46 +0000 (16:45 -0500)
committer Johannes Doerfert <johannes@jdoerfert.de>
Wed, 16 Sep 2020 18:37:09 +0000 (13:37 -0500)
diff --git a/clang/lib/Headers/__clang_cuda_cmath.h b/clang/lib/Headers/__clang_cuda_cmath.h

index 8ba1826..f49463d 100644 (file)
--- a/clang/lib/Headers/__clang_cuda_cmath.h
+++ b/clang/lib/Headers/__clang_cuda_cmath.h
@@ -66,10 +66,38 @@ __DEVICE__ float frexp(float __arg, int *__exp) {
  }
  
  // For inscrutable reasons, the CUDA headers define these functions for us on
-// Windows. For OpenMP we omit these as some old system headers have
-// non-conforming `isinf(float)` and `isnan(float)` implementations that return
-// an `int`. The system versions of these functions should be fine anyway.
-#if !defined(_MSC_VER) && !defined(__OPENMP_NVPTX__)
+// Windows.
+#if !defined(_MSC_VER) || defined(__OPENMP_NVPTX__)
+
+// For OpenMP we work around some old system headers that have non-conforming
+// `isinf(float)` and `isnan(float)` implementations that return an `int`. We do
+// this by providing two versions of these functions, differing only in the
+// return type. To avoid conflicting definitions we disable implicit base
+// function generation. That means we will end up with two specializations, one
+// per type, but only one has a base function defined by the system header.
+#if defined(__OPENMP_NVPTX__)
+#pragma omp begin declare variant match(                                       \
+    implementation = {extension(disable_implicit_base)})
+
+// FIXME: We lack an extension to customize the mangling of the variants, e.g.,
+//        add a suffix. This means we would clash with the names of the variants
+//        (note that we do not create implicit base functions here). To avoid
+//        this clash we add a new trait to some of them that is always true
+//        (this is LLVM after all ;)). It will only influence the mangled name
+//        of the variants inside the inner region and avoid the clash.
+#pragma omp begin declare variant match(implementation = {vendor(llvm)})
+
+__DEVICE__ int isinf(float __x) { return ::__isinff(__x); }
+__DEVICE__ int isinf(double __x) { return ::__isinf(__x); }
+__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); }
+__DEVICE__ int isfinite(double __x) { return ::__isfinited(__x); }
+__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); }
+__DEVICE__ int isnan(double __x) { return ::__isnan(__x); }
+
+#pragma omp end declare variant
+
+#endif
+
  __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
  __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
  __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
@@ -79,6 +107,11 @@ __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
  __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
  __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
  __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
+
+#if defined(__OPENMP_NVPTX__)
+#pragma omp end declare variant
+#endif
+
  #endif
  
  __DEVICE__ bool isgreater(float __x, float __y) {
diff --git a/clang/test/Headers/Inputs/include/cmath b/clang/test/Headers/Inputs/include/cmath

index 5e4e8b6..20e3489 100644 (file)
--- a/clang/test/Headers/Inputs/include/cmath
+++ b/clang/test/Headers/Inputs/include/cmath
@@ -82,8 +82,13 @@ bool isless(float, float);
  bool islessgreater(double, double);
  bool islessgreater(float, float);
  bool isnan(long double);
+#ifdef USE_ISNAN_WITH_INT_RETURN
+int isnan(double);
+int isnan(float);
+#else
  bool isnan(double);
  bool isnan(float);
+#endif
  bool isnormal(double);
  bool isnormal(float);
  bool isunordered(double, double);
diff --git a/clang/test/Headers/openmp_device_math_isnan.cpp b/clang/test/Headers/openmp_device_math_isnan.cpp

new file mode 100644 (file)

index 0000000..35443db
--- /dev/null
+++ b/clang/test/Headers/openmp_device_math_isnan.cpp
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=BOOL_RETURN
+// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast | FileCheck %s --check-prefix=BOOL_RETURN
+// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -DUSE_ISNAN_WITH_INT_RETURN
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN
+// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN
+// expected-no-diagnostics
+
+#include <cmath>
+
+double math(float f, double d) {
+  double r = 0;
+  // INT_RETURN: call i32 @__nv_isnanf(float
+  // BOOL_RETURN: call i32 @__nv_isnanf(float
+  r += std::isnan(f);
+  // INT_RETURN: call i32 @__nv_isnand(double
+  // BOOL_RETURN: call i32 @__nv_isnand(double
+  r += std::isnan(d);
+  return r;
+}
+
+long double foo(float f, double d, long double ld) {
+  double r = ld;
+  r += math(f, d);
+#pragma omp target map(r)
+  { r += math(f, d); }
+  return r;
+}
author	Johannes Doerfert <johannes@jdoerfert.de>
	Wed, 12 Aug 2020 21:45:46 +0000 (16:45 -0500)
committer	Johannes Doerfert <johannes@jdoerfert.de>
	Wed, 16 Sep 2020 18:37:09 +0000 (13:37 -0500)
clang/lib/Headers/__clang_cuda_cmath.h		patch \| blob \| history
clang/test/Headers/Inputs/include/cmath		patch \| blob \| history
clang/test/Headers/openmp_device_math_isnan.cpp	[new file with mode: 0644]	patch \| blob