LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns")
LANGOPT(CUDA , 1, 0, "CUDA")
LANGOPT(OpenMP , 1, 0, "OpenMP support")
+LANGOPT(CUDAIsDevice , 1, 0, "Compiling for CUDA device")
LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators")
LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions")
class NVPTXTargetInfo : public TargetInfo {
static const char * const GCCRegNames[];
static const Builtin::Info BuiltinInfo[];
+
+ // The GPU profiles supported by the NVPTX backend
+ enum GPUKind {
+ GK_NONE,
+ GK_SM20,
+ GK_SM21,
+ GK_SM30,
+ GK_SM35,
+ } GPU;
+
public:
NVPTXTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
BigEndian = false;
// Define available target features
// These must be defined in sorted order!
NoAsmVariants = true;
+ // Set the default GPU to sm20
+ GPU = GK_SM20;
}
void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const override {
Builder.defineMacro("__PTX__");
Builder.defineMacro("__NVPTX__");
+ if (Opts.CUDAIsDevice) {
+ // Set __CUDA_ARCH__ for the GPU specified.
+ std::string CUDAArchCode;
+ switch (GPU) {
+ case GK_SM20:
+ CUDAArchCode = "200";
+ break;
+ case GK_SM21:
+ CUDAArchCode = "210";
+ break;
+ case GK_SM30:
+ CUDAArchCode = "300";
+ break;
+ case GK_SM35:
+ CUDAArchCode = "350";
+ break;
+ default:
+ llvm_unreachable("Unhandled target CPU");
+ }
+ Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
+ }
}
void getTargetBuiltins(const Builtin::Info *&Records,
unsigned &NumRecords) const override {
return TargetInfo::CharPtrBuiltinVaList;
}
bool setCPU(const std::string &Name) override {
- bool Valid = llvm::StringSwitch<bool>(Name)
- .Case("sm_20", true)
- .Case("sm_21", true)
- .Case("sm_30", true)
- .Case("sm_35", true)
- .Default(false);
-
- return Valid;
+ GPU = llvm::StringSwitch<GPUKind>(Name)
+ .Case("sm_20", GK_SM20)
+ .Case("sm_21", GK_SM21)
+ .Case("sm_30", GK_SM30)
+ .Case("sm_35", GK_SM35)
+ .Default(GK_NONE);
+
+ return GPU != GK_NONE;
}
};
if (Args.hasArg(OPT_fno_operator_names))
Opts.CXXOperatorNames = 0;
+ if (Args.hasArg(OPT_fcuda_is_device))
+ Opts.CUDAIsDevice = 1;
+
if (Opts.ObjC1) {
if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
StringRef value = arg->getValue();
Builder.defineMacro("_OPENMP", "201307");
}
+ // CUDA device path compilaton
+ if (LangOpts.CUDAIsDevice) {
+ // The CUDA_ARCH value is set for the GPU target specified in the NVPTX
+ // backend's target defines.
+ Builder.defineMacro("__CUDA_ARCH__");
+ }
+
// Get other target #defines.
TI.getTargetDefines(LangOpts, Builder);
}
#include "clang/Sema/Sema.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
+#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/SemaDiagnostic.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
return true;
- // CUDA B.1.1 "The __device__ qualifier declares a function that is...
+ // CUDA B.1.1 "The __device__ qualifier declares a function that is [...]
// Callable from the device only."
if (CallerTarget == CFT_Host && CalleeTarget == CFT_Device)
return true;
- // CUDA B.1.2 "The __global__ qualifier declares a function that is...
+ // CUDA B.1.2 "The __global__ qualifier declares a function that is [...]
// Callable from the host only."
- // CUDA B.1.3 "The __host__ qualifier declares a function that is...
+ // CUDA B.1.3 "The __host__ qualifier declares a function that is [...]
// Callable from the host only."
if ((CallerTarget == CFT_Device || CallerTarget == CFT_Global) &&
(CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))
return true;
- if (CallerTarget == CFT_HostDevice && CalleeTarget != CFT_HostDevice)
- return true;
+ // CUDA B.1.3 "The __device__ and __host__ qualifiers can be used together
+ // however, in which case the function is compiled for both the host and the
+ // device. The __CUDA_ARCH__ macro [...] can be used to differentiate code
+ // paths between host and device."
+ bool InDeviceMode = getLangOpts().CUDAIsDevice;
+ if (CallerTarget == CFT_HostDevice && CalleeTarget != CFT_HostDevice) {
+ if ((InDeviceMode && CalleeTarget != CFT_Device) ||
+ (!InDeviceMode && CalleeTarget != CFT_Host))
+ return true;
+ }
return false;
}
// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
#include "Inputs/cuda.h"
d1g<<<1, 1>>>(); // expected-error {{reference to __global__ function 'd1g' in __device__ function}}
}
-__host__ void hd1h(void); // expected-note {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
-__device__ void hd1d(void); // expected-note {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
+// Expected 0-1 as in one of host/device side compilation it is an error, while
+// not in the other
+__host__ void hd1h(void); // expected-note 0-1 {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
+__device__ void hd1d(void); // expected-note 0-1 {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
+__host__ void hd1hg(void);
+__device__ void hd1dg(void);
+#ifdef __CUDA_ARCH__
+__host__ void hd1hig(void); // expected-note {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
+#else
+__device__ void hd1dig(void); // expected-note {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
+#endif
__host__ __device__ void hd1hd(void);
__global__ void hd1g(void); // expected-note {{'hd1g' declared here}}
__host__ __device__ void hd1(void) {
- hd1h(); // expected-error {{no matching function}}
- hd1d(); // expected-error {{no matching function}}
+ // Expected 0-1 as in one of host/device side compilation it is an error,
+ // while not in the other
+ hd1d(); // expected-error 0-1 {{no matching function}}
+ hd1h(); // expected-error 0-1 {{no matching function}}
+
+ // No errors as guarded
+#ifdef __CUDA_ARCH__
+ hd1d();
+#else
+ hd1h();
+#endif
+
+ // Errors as incorrectly guarded
+#ifndef __CUDA_ARCH__
+ hd1dig(); // expected-error {{no matching function}}
+#else
+ hd1hig(); // expected-error {{no matching function}}
+#endif
+
hd1hd();
hd1g<<<1, 1>>>(); // expected-error {{reference to __global__ function 'hd1g' in __host__ __device__ function}}
}