CUDA host device code with two code paths

author Reid Kleckner <reid@kleckner.net>

Wed, 3 Dec 2014 21:53:36 +0000 (21:53 +0000)

committer Reid Kleckner <reid@kleckner.net>

Wed, 3 Dec 2014 21:53:36 +0000 (21:53 +0000)
author Reid Kleckner <reid@kleckner.net>
Wed, 3 Dec 2014 21:53:36 +0000 (21:53 +0000)
committer Reid Kleckner <reid@kleckner.net>
Wed, 3 Dec 2014 21:53:36 +0000 (21:53 +0000)
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def

index 2c02d2a..cf9638d 100644 (file)
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -157,6 +157,7 @@ LANGOPT(NativeHalfType    , 1, 0, "Native half type support")
  LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns")
  LANGOPT(CUDA              , 1, 0, "CUDA")
  LANGOPT(OpenMP            , 1, 0, "OpenMP support")
+LANGOPT(CUDAIsDevice      , 1, 0, "Compiling for CUDA device")
  
  LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators")
  LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions")
diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp

index ae6a678..b28808b 100644 (file)
--- a/clang/lib/Basic/Targets.cpp
+++ b/clang/lib/Basic/Targets.cpp
@@ -1377,6 +1377,16 @@ namespace {
    class NVPTXTargetInfo : public TargetInfo {
      static const char * const GCCRegNames[];
      static const Builtin::Info BuiltinInfo[];
+
+  // The GPU profiles supported by the NVPTX backend
+  enum GPUKind {
+    GK_NONE,
+    GK_SM20,
+    GK_SM21,
+    GK_SM30,
+    GK_SM35,
+  } GPU;
+
    public:
      NVPTXTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
        BigEndian = false;
@@ -1387,11 +1397,34 @@ namespace {
        // Define available target features
        // These must be defined in sorted order!
        NoAsmVariants = true;
+      // Set the default GPU to sm20
+      GPU = GK_SM20;
      }
      void getTargetDefines(const LangOptions &Opts,
                            MacroBuilder &Builder) const override {
        Builder.defineMacro("__PTX__");
        Builder.defineMacro("__NVPTX__");
+      if (Opts.CUDAIsDevice) {
+        // Set __CUDA_ARCH__ for the GPU specified.
+        std::string CUDAArchCode;
+        switch (GPU) {
+        case GK_SM20:
+          CUDAArchCode = "200";
+          break;
+        case GK_SM21:
+          CUDAArchCode = "210";
+          break;
+        case GK_SM30:
+          CUDAArchCode = "300";
+          break;
+        case GK_SM35:
+          CUDAArchCode = "350";
+          break;
+        default:
+          llvm_unreachable("Unhandled target CPU");
+        }
+        Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
+      }
      }
      void getTargetBuiltins(const Builtin::Info *&Records,
                             unsigned &NumRecords) const override {
@@ -1434,14 +1467,14 @@ namespace {
        return TargetInfo::CharPtrBuiltinVaList;
      }
      bool setCPU(const std::string &Name) override {
-      bool Valid = llvm::StringSwitch<bool>(Name)
-        .Case("sm_20", true)
-        .Case("sm_21", true)
-        .Case("sm_30", true)
-        .Case("sm_35", true)
-        .Default(false);
-
-      return Valid;
+      GPU = llvm::StringSwitch<GPUKind>(Name)
+                .Case("sm_20", GK_SM20)
+                .Case("sm_21", GK_SM21)
+                .Case("sm_30", GK_SM30)
+                .Case("sm_35", GK_SM35)
+                .Default(GK_NONE);
+
+      return GPU != GK_NONE;
      }
    };
  
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp

index bdd0f42..65ffd54 100644 (file)
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1349,6 +1349,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
    if (Args.hasArg(OPT_fno_operator_names))
      Opts.CXXOperatorNames = 0;
  
+  if (Args.hasArg(OPT_fcuda_is_device))
+    Opts.CUDAIsDevice = 1;
+
    if (Opts.ObjC1) {
      if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
        StringRef value = arg->getValue();
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp

index f671a2f..3550ac2 100644 (file)
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -870,6 +870,13 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
      Builder.defineMacro("_OPENMP", "201307");
    }
  
+  // CUDA device path compilaton
+  if (LangOpts.CUDAIsDevice) {
+    // The CUDA_ARCH value is set for the GPU target specified in the NVPTX
+    // backend's target defines.
+    Builder.defineMacro("__CUDA_ARCH__");
+  }
+
    // Get other target #defines.
    TI.getTargetDefines(LangOpts, Builder);
  }
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp

index 6671520..46a8317 100644 (file)
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -14,6 +14,7 @@
  #include "clang/Sema/Sema.h"
  #include "clang/AST/ASTContext.h"
  #include "clang/AST/Decl.h"
+#include "clang/Lex/Preprocessor.h"
  #include "clang/Sema/SemaDiagnostic.h"
  #include "llvm/ADT/Optional.h"
  #include "llvm/ADT/SmallVector.h"
@@ -72,21 +73,29 @@ bool Sema::CheckCUDATarget(CUDAFunctionTarget CallerTarget,
    if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
      return true;
  
-  // CUDA B.1.1 "The __device__ qualifier declares a function that is...
+  // CUDA B.1.1 "The __device__ qualifier declares a function that is [...]
    // Callable from the device only."
    if (CallerTarget == CFT_Host && CalleeTarget == CFT_Device)
      return true;
  
-  // CUDA B.1.2 "The __global__ qualifier declares a function that is...
+  // CUDA B.1.2 "The __global__ qualifier declares a function that is [...]
    // Callable from the host only."
-  // CUDA B.1.3 "The __host__ qualifier declares a function that is...
+  // CUDA B.1.3 "The __host__ qualifier declares a function that is [...]
    // Callable from the host only."
    if ((CallerTarget == CFT_Device || CallerTarget == CFT_Global) &&
        (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))
      return true;
  
-  if (CallerTarget == CFT_HostDevice && CalleeTarget != CFT_HostDevice)
-    return true;
+  // CUDA B.1.3 "The __device__ and __host__ qualifiers can be used together
+  // however, in which case the function is compiled for both the host and the
+  // device. The __CUDA_ARCH__ macro [...] can be used to differentiate code
+  // paths between host and device."
+  bool InDeviceMode = getLangOpts().CUDAIsDevice;
+  if (CallerTarget == CFT_HostDevice && CalleeTarget != CFT_HostDevice) {
+    if ((InDeviceMode && CalleeTarget != CFT_Device) ||
+        (!InDeviceMode && CalleeTarget != CFT_Host))
+      return true;
+  }
  
    return false;
  }
diff --git a/clang/test/SemaCUDA/function-target.cu b/clang/test/SemaCUDA/function-target.cu

index 51bc8c9..ca56030 100644 (file)
--- a/clang/test/SemaCUDA/function-target.cu
+++ b/clang/test/SemaCUDA/function-target.cu
@@ -1,4 +1,5 @@
  // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
  
  #include "Inputs/cuda.h"
  
@@ -31,14 +32,40 @@ __device__ void d1(void) {
    d1g<<<1, 1>>>(); // expected-error {{reference to __global__ function 'd1g' in __device__ function}}
  }
  
-__host__ void hd1h(void); // expected-note {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
-__device__ void hd1d(void); // expected-note {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
+// Expected 0-1 as in one of host/device side compilation it is an error, while
+// not in the other
+__host__ void hd1h(void); // expected-note 0-1 {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
+__device__ void hd1d(void); // expected-note 0-1 {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
+__host__ void hd1hg(void);
+__device__ void hd1dg(void);
+#ifdef __CUDA_ARCH__
+__host__ void hd1hig(void); // expected-note {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
+#else
+__device__ void hd1dig(void); // expected-note {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
+#endif
  __host__ __device__ void hd1hd(void);
  __global__ void hd1g(void); // expected-note {{'hd1g' declared here}}
  
  __host__ __device__ void hd1(void) {
-  hd1h(); // expected-error {{no matching function}}
-  hd1d(); // expected-error {{no matching function}}
+  // Expected 0-1 as in one of host/device side compilation it is an error,
+  // while not in the other
+  hd1d(); // expected-error 0-1 {{no matching function}}
+  hd1h(); // expected-error 0-1 {{no matching function}}
+
+  // No errors as guarded
+#ifdef __CUDA_ARCH__
+  hd1d();
+#else
+  hd1h();
+#endif
+
+  // Errors as incorrectly guarded
+#ifndef __CUDA_ARCH__
+  hd1dig(); // expected-error {{no matching function}}
+#else
+  hd1hig(); // expected-error {{no matching function}}
+#endif
+
    hd1hd();
    hd1g<<<1, 1>>>(); // expected-error {{reference to __global__ function 'hd1g' in __host__ __device__ function}}
  }
author	Reid Kleckner <reid@kleckner.net>
	Wed, 3 Dec 2014 21:53:36 +0000 (21:53 +0000)
committer	Reid Kleckner <reid@kleckner.net>
	Wed, 3 Dec 2014 21:53:36 +0000 (21:53 +0000)
clang/include/clang/Basic/LangOptions.def		patch \| blob \| history
clang/lib/Basic/Targets.cpp		patch \| blob \| history
clang/lib/Frontend/CompilerInvocation.cpp		patch \| blob \| history
clang/lib/Frontend/InitPreprocessor.cpp		patch \| blob \| history
clang/lib/Sema/SemaCUDA.cpp		patch \| blob \| history
clang/test/SemaCUDA/function-target.cu		patch \| blob \| history