From: Jacques Pienaar <jpienaar@google.com>
Date: Tue, 24 Feb 2015 21:45:33 +0000 (+0000)
Subject: CUDA: Add option to allow host device functions to call host functions
X-Git-Tag: llvmorg-3.7.0-rc1~11011
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a50178c23ed2cb3f0f230ebb7670f47a5cc37bfd;p=platform%2Fupstream%2Fllvm.git

CUDA: Add option to allow host device functions to call host functions

Commiting code from review http://reviews.llvm.org/D7841

llvm-svn: 230385
---

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c402e24..59101f0 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -6067,6 +6067,9 @@ def err_global_call_not_config : Error<
 def err_ref_bad_target : Error<
   "reference to %select{__device__|__global__|__host__|__host__ __device__}0 "
   "function %1 in %select{__device__|__global__|__host__|__host__ __device__}2 function">;
+def warn_host_calls_from_host_device : Warning<
+  "calling __host__ function %0 from __host__ __device__ function %1 can lead to runtime errors">,
+  InGroup<CudaCompat>;
 
 def warn_non_pod_vararg_with_format_string : Warning<
   "cannot pass %select{non-POD|non-trivial}0 object of type %1 to variadic "
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 48e3c73..fb39887 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -160,6 +160,7 @@ LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns")
 LANGOPT(CUDA              , 1, 0, "CUDA")
 LANGOPT(OpenMP            , 1, 0, "OpenMP support")
 LANGOPT(CUDAIsDevice      , 1, 0, "Compiling for CUDA device")
+LANGOPT(CUDAAllowHostCallsFromHostDevice, 1, 0, "Allow host device functions to call host functions")
 
 LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators")
 LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions")
diff --git a/clang/include/clang/Driver/CC1Options.td b/clang/include/clang/Driver/CC1Options.td
index f1c30c4..f60fb4e 100644
--- a/clang/include/clang/Driver/CC1Options.td
+++ b/clang/include/clang/Driver/CC1Options.td
@@ -608,6 +608,9 @@ def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">,
 
 def fcuda_is_device : Flag<["-"], "fcuda-is-device">,
   HelpText<"Generate code for CUDA device">;
+def fcuda_allow_host_calls_from_host_device : Flag<["-"],
+    "fcuda-allow-host-calls-from-host-device">,
+  HelpText<"Allow host device functions to call host functions">;
 
 } // let Flags = [CC1Option]
 
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index bfd9429..ef76cbf 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -253,7 +253,7 @@ static bool ParseAnalyzerArgs(AnalyzerOptions &Opts, ArgList &Args,
     for (unsigned i = 0, e = checkers.size(); i != e; ++i)
       Opts.CheckersControlList.push_back(std::make_pair(checkers[i], enable));
   }
-  
+
   // Go through the analyzer configuration options.
   for (arg_iterator it = Args.filtered_begin(OPT_analyzer_config),
        ie = Args.filtered_end(); it != ie; ++it) {
@@ -1393,6 +1393,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   if (Args.hasArg(OPT_fcuda_is_device))
     Opts.CUDAIsDevice = 1;
 
+  if (Args.hasArg(OPT_fcuda_allow_host_calls_from_host_device))
+    Opts.CUDAAllowHostCallsFromHostDevice = 1;
+
   if (Opts.ObjC1) {
     if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
       StringRef value = arg->getValue();
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 64222fb..6033821 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -92,9 +92,21 @@ bool Sema::CheckCUDATarget(const FunctionDecl *Caller,
     if (Caller->isImplicit()) return false;
 
     bool InDeviceMode = getLangOpts().CUDAIsDevice;
-    if ((InDeviceMode && CalleeTarget != CFT_Device) ||
-        (!InDeviceMode && CalleeTarget != CFT_Host))
+    if (!InDeviceMode && CalleeTarget != CFT_Host)
+        return true;
+    if (InDeviceMode && CalleeTarget != CFT_Device) {
+      // Allow host device functions to call host functions if explicitly
+      // requested.
+      if (CalleeTarget == CFT_Host &&
+          getLangOpts().CUDAAllowHostCallsFromHostDevice) {
+        Diag(Caller->getLocation(),
+             diag::warn_host_calls_from_host_device)
+            << Callee->getNameAsString() << Caller->getNameAsString();
+        return false;
+      }
+
       return true;
+    }
   }
 
   return false;
diff --git a/clang/test/CodeGenCUDA/host-device-calls-host.cu b/clang/test/CodeGenCUDA/host-device-calls-host.cu
new file mode 100644
index 0000000..8140f61
--- /dev/null
+++ b/clang/test/CodeGenCUDA/host-device-calls-host.cu
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-allow-host-calls-from-host-device -fcuda-is-device -Wno-cuda-compat -emit-llvm -o - | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+extern "C"
+void host_function() {}
+
+// CHECK-LABEL: define void @hd_function_a
+extern "C"
+__host__ __device__ void hd_function_a() {
+  // CHECK: call void @host_function
+  host_function();
+}
+
+// CHECK: declare void @host_function
+
+// CHECK-LABEL: define void @hd_function_b
+extern "C"
+__host__ __device__ void hd_function_b(bool b) { if (b) host_function(); }
+
+// CHECK-LABEL: define void @device_function_b
+extern "C"
+__device__ void device_function_b() { hd_function_b(false); }
+
+// CHECK-LABEL: define void @global_function
+extern "C"
+__global__ void global_function() {
+  // CHECK: call void @device_function_b
+  device_function_b();
+}
+
+// CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1}
diff --git a/clang/test/SemaCUDA/function-target-hd.cu b/clang/test/SemaCUDA/function-target-hd.cu
new file mode 100644
index 0000000..25fcc6e
--- /dev/null
+++ b/clang/test/SemaCUDA/function-target-hd.cu
@@ -0,0 +1,71 @@
+// Test the Sema analysis of caller-callee relationships of host device
+// functions when compiling CUDA code. There are 4 permutations of this test as
+// host and device compilation are separate compilation passes, and clang has
+// an option to allow host calls from host device functions. __CUDA_ARCH__ is
+// defined when compiling for the device and TEST_WARN_HD when host calls are
+// allowed from host device functions. So for example, if __CUDA_ARCH__ is
+// defined and TEST_WARN_HD is not then device compilation is happening but
+// host device functions are not allowed to call device functions.
+
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD
+
+#include "Inputs/cuda.h"
+
+__host__ void hd1h(void);
+#if defined(__CUDA_ARCH__) && !defined(TEST_WARN_HD)
+// expected-note@-2 {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
+#endif
+__device__ void hd1d(void);
+#ifndef __CUDA_ARCH__
+// expected-note@-2 {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
+#endif
+__host__ void hd1hg(void);
+__device__ void hd1dg(void);
+#ifdef __CUDA_ARCH__
+__host__ void hd1hig(void);
+#if !defined(TEST_WARN_HD)
+// expected-note@-2 {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
+#endif
+#else
+__device__ void hd1dig(void); // expected-note {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
+#endif
+__host__ __device__ void hd1hd(void);
+__global__ void hd1g(void); // expected-note {{'hd1g' declared here}}
+
+__host__ __device__ void hd1(void) {
+#if defined(TEST_WARN_HD) && defined(__CUDA_ARCH__)
+// expected-warning@-2 {{calling __host__ function hd1h from __host__ __device__ function hd1}}
+// expected-warning@-3 {{calling __host__ function hd1hig from __host__ __device__ function hd1}}
+#endif
+  hd1d();
+#ifndef __CUDA_ARCH__
+// expected-error@-2 {{no matching function}}
+#endif
+  hd1h();
+#if defined(__CUDA_ARCH__) && !defined(TEST_WARN_HD)
+// expected-error@-2 {{no matching function}}
+#endif
+
+  // No errors as guarded
+#ifdef __CUDA_ARCH__
+  hd1d();
+#else
+  hd1h();
+#endif
+
+  // Errors as incorrectly guarded
+#ifndef __CUDA_ARCH__
+  hd1dig(); // expected-error {{no matching function}}
+#else
+  hd1hig();
+#ifndef TEST_WARN_HD
+// expected-error@-2 {{no matching function}}
+#endif
+#endif
+
+  hd1hd();
+  hd1g<<<1, 1>>>(); // expected-error {{reference to __global__ function 'hd1g' in __host__ __device__ function}}
+}
diff --git a/clang/test/SemaCUDA/function-target.cu b/clang/test/SemaCUDA/function-target.cu
index ca56030..83dce50 100644
--- a/clang/test/SemaCUDA/function-target.cu
+++ b/clang/test/SemaCUDA/function-target.cu
@@ -31,41 +31,3 @@ __device__ void d1(void) {
   d1hd();
   d1g<<<1, 1>>>(); // expected-error {{reference to __global__ function 'd1g' in __device__ function}}
 }
-
-// Expected 0-1 as in one of host/device side compilation it is an error, while
-// not in the other
-__host__ void hd1h(void); // expected-note 0-1 {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
-__device__ void hd1d(void); // expected-note 0-1 {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
-__host__ void hd1hg(void);
-__device__ void hd1dg(void);
-#ifdef __CUDA_ARCH__
-__host__ void hd1hig(void); // expected-note {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
-#else
-__device__ void hd1dig(void); // expected-note {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
-#endif
-__host__ __device__ void hd1hd(void);
-__global__ void hd1g(void); // expected-note {{'hd1g' declared here}}
-
-__host__ __device__ void hd1(void) {
-  // Expected 0-1 as in one of host/device side compilation it is an error,
-  // while not in the other
-  hd1d(); // expected-error 0-1 {{no matching function}}
-  hd1h(); // expected-error 0-1 {{no matching function}}
-
-  // No errors as guarded
-#ifdef __CUDA_ARCH__
-  hd1d();
-#else
-  hd1h();
-#endif
-
-  // Errors as incorrectly guarded
-#ifndef __CUDA_ARCH__
-  hd1dig(); // expected-error {{no matching function}}
-#else
-  hd1hig(); // expected-error {{no matching function}}
-#endif
-
-  hd1hd();
-  hd1g<<<1, 1>>>(); // expected-error {{reference to __global__ function 'hd1g' in __host__ __device__ function}}
-}