From afc9d674fe5a14b95c50a38d8605a159c2460427 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 3 May 2022 14:13:56 -0400 Subject: [PATCH] [CUDA][HIP] support __noinline__ as keyword CUDA/HIP programs use __noinline__ like a keyword e.g. __noinline__ void foo() {} since __noinline__ is defined as a macro __attribute__((noinline)) in CUDA/HIP runtime header files. However, gcc and clang supports __attribute__((__noinline__)) the same as __attribute__((noinline)). Some C++ libraries use __attribute__((__noinline__)) in their header files. When CUDA/HIP programs include such header files, clang will emit error about invalid attributes. This patch fixes this issue by supporting __noinline__ as a keyword, so that CUDA/HIP runtime could remove the macro definition. Reviewed by: Aaron Ballman, Artem Belevich Differential Revision: https://reviews.llvm.org/D124866 --- clang/docs/ReleaseNotes.rst | 7 +++++-- clang/include/clang/Basic/Attr.td | 5 +++-- clang/include/clang/Basic/AttrDocs.td | 4 ++++ clang/include/clang/Basic/Features.def | 3 +++ clang/include/clang/Basic/TokenKinds.def | 3 +++ clang/include/clang/Parse/Parser.h | 1 + clang/lib/Basic/IdentifierTable.cpp | 3 +++ clang/lib/Parse/ParseDecl.cpp | 14 +++++++++++++ clang/test/CodeGenCUDA/noinline.cu | 34 ++++++++++++++++++++++++++++++++ clang/test/Lexer/has_feature.cu | 8 ++++++++ clang/test/SemaCUDA/noinline.cu | 19 ++++++++++++++++++ 11 files changed, 97 insertions(+), 4 deletions(-) create mode 100644 clang/test/CodeGenCUDA/noinline.cu create mode 100644 clang/test/Lexer/has_feature.cu create mode 100644 clang/test/SemaCUDA/noinline.cu diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index c3605ab..ff7b428 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -351,8 +351,11 @@ C++2b Feature Support - Implemented `P0849R8: auto(x): decay-copy in the language `_. - Implemented `P2242R3: Non-literal variables (and labels and gotos) in constexpr functions `_. -CUDA Language Changes in Clang ------------------------------- +CUDA/HIP Language Changes in Clang +---------------------------------- + +- Added `__noinline__` as a keyword to avoid diagnostics due to usage of + `__attribute__((__noinline__))` in CUDA/HIP programs. Objective-C Language Changes in Clang ------------------------------------- diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index eb896fe..39359f4 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1775,8 +1775,9 @@ def Convergent : InheritableAttr { } def NoInline : DeclOrStmtAttr { - let Spellings = [GCC<"noinline">, CXX11<"clang", "noinline">, - C2x<"clang", "noinline">, Declspec<"noinline">]; + let Spellings = [Keyword<"__noinline__">, GCC<"noinline">, + CXX11<"clang", "noinline">, C2x<"clang", "noinline">, + Declspec<"noinline">]; let Accessors = [Accessor<"isClangNoInline", [CXX11<"clang", "noinline">, C2x<"clang", "noinline">]>]; let Documentation = [NoInlineDocs]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 02b6031..c7ef52f 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -538,6 +538,10 @@ spellings of the attribute are not supported on statements. If a statement is marked ``[[clang::noinline]]`` and contains calls, those calls inside the statement will not be inlined by the compiler. +``__noinline__`` can be used as a keyword in CUDA/HIP languages. This is to +avoid diagnostics due to usage of ``__attribute__((__noinline__))`` +with ``__noinline__`` defined as a macro as ``__attribute__((noinline))``. + .. code-block:: c int example(void) { diff --git a/clang/include/clang/Basic/Features.def b/clang/include/clang/Basic/Features.def index fbaa617..c3f3fe7 100644 --- a/clang/include/clang/Basic/Features.def +++ b/clang/include/clang/Basic/Features.def @@ -270,5 +270,8 @@ EXTENSION(cxx_attributes_on_using_declarations, LangOpts.CPlusPlus11) FEATURE(cxx_abi_relative_vtable, LangOpts.CPlusPlus && LangOpts.RelativeCXXABIVTables) +// CUDA/HIP Features +FEATURE(cuda_noinline_keyword, LangOpts.CUDA) + #undef EXTENSION #undef FEATURE diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 0933896..7b65a15 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -599,6 +599,9 @@ KEYWORD(pipe , KEYOPENCLC | KEYOPENCLCXX) // C++ for OpenCL s2.3.1: addrspace_cast operator KEYWORD(addrspace_cast , KEYOPENCLCXX) +// CUDA/HIP function attributes +KEYWORD(__noinline__ , KEYCUDA) + // OpenMP Type Traits UNARY_EXPR_OR_TYPE_TRAIT(__builtin_omp_required_simd_align, OpenMPRequiredSimdAlign, KEYALL) diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 99fe375..caa58d9 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -2827,6 +2827,7 @@ private: void ParseOpenCLKernelAttributes(ParsedAttributes &attrs); void ParseOpenCLQualifiers(ParsedAttributes &Attrs); void ParseNullabilityTypeSpecifiers(ParsedAttributes &attrs); + void ParseCUDAFunctionAttributes(ParsedAttributes &attrs); VersionTuple ParseVersionTuple(SourceRange &Range); void ParseAvailabilityAttribute(IdentifierInfo &Availability, diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index b86cb7a..af19de4 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -108,6 +108,7 @@ namespace { KEYOPENCLCXX = 0x400000, KEYMSCOMPAT = 0x800000, KEYSYCL = 0x1000000, + KEYCUDA = 0x2000000, KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX20, KEYALL = (0x1ffffff & ~KEYNOMS18 & ~KEYNOOPENCL) // KEYNOMS18 and KEYNOOPENCL are used to exclude. @@ -158,6 +159,8 @@ static KeywordStatus getKeywordStatus(const LangOptions &LangOpts, return KS_Future; if (LangOpts.isSYCL() && (Flags & KEYSYCL)) return KS_Enabled; + if (LangOpts.CUDA && (Flags & KEYCUDA)) + return KS_Enabled; return KS_Disabled; } diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 8d6e84b..89e13cf 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -897,6 +897,15 @@ void Parser::ParseOpenCLKernelAttributes(ParsedAttributes &attrs) { } } +void Parser::ParseCUDAFunctionAttributes(ParsedAttributes &attrs) { + while (Tok.is(tok::kw___noinline__)) { + IdentifierInfo *AttrName = Tok.getIdentifierInfo(); + SourceLocation AttrNameLoc = ConsumeToken(); + attrs.addNew(AttrName, AttrNameLoc, nullptr, AttrNameLoc, nullptr, 0, + ParsedAttr::AS_Keyword); + } +} + void Parser::ParseOpenCLQualifiers(ParsedAttributes &Attrs) { IdentifierInfo *AttrName = Tok.getIdentifierInfo(); SourceLocation AttrNameLoc = Tok.getLocation(); @@ -3690,6 +3699,11 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, ParseOpenCLKernelAttributes(DS.getAttributes()); continue; + // CUDA/HIP single token adornments. + case tok::kw___noinline__: + ParseCUDAFunctionAttributes(DS.getAttributes()); + continue; + // Nullability type specifiers. case tok::kw__Nonnull: case tok::kw__Nullable: diff --git a/clang/test/CodeGenCUDA/noinline.cu b/clang/test/CodeGenCUDA/noinline.cu new file mode 100644 index 0000000..41e8231 --- /dev/null +++ b/clang/test/CodeGenCUDA/noinline.cu @@ -0,0 +1,34 @@ +// Uses -O2 since the defalt -O0 option adds noinline to all functions. + +// RUN: %clang_cc1 -triple nvptx-nvidia-cuda -fcuda-is-device \ +// RUN: -O2 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: -O2 -disable-llvm-passes -emit-llvm -o - -x hip %s | FileCheck %s + +// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux \ +// RUN: -O2 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s + +#include "Inputs/cuda.h" + +__noinline__ __device__ __host__ void fun1() {} + +__attribute__((noinline)) __device__ __host__ void fun2() {} + +__attribute__((__noinline__)) __device__ __host__ void fun3() {} + +[[gnu::__noinline__]] __device__ __host__ void fun4() {} + +#define __noinline__ __attribute__((__noinline__)) +__noinline__ __device__ __host__ void fun5() {} + +__device__ __host__ void fun6() {} + +// CHECK: define{{.*}}@_Z4fun1v{{.*}}#[[ATTR1:[0-9]*]] +// CHECK: define{{.*}}@_Z4fun2v{{.*}}#[[ATTR1:[0-9]*]] +// CHECK: define{{.*}}@_Z4fun3v{{.*}}#[[ATTR1:[0-9]*]] +// CHECK: define{{.*}}@_Z4fun4v{{.*}}#[[ATTR1:[0-9]*]] +// CHECK: define{{.*}}@_Z4fun5v{{.*}}#[[ATTR1:[0-9]*]] +// CHECK: define{{.*}}@_Z4fun6v{{.*}}#[[ATTR2:[0-9]*]] +// CHECK: attributes #[[ATTR1]] = {{.*}}noinline +// CHECK-NOT: attributes #[[ATTR2]] = {{.*}}noinline diff --git a/clang/test/Lexer/has_feature.cu b/clang/test/Lexer/has_feature.cu new file mode 100644 index 0000000..5cb6535 --- /dev/null +++ b/clang/test/Lexer/has_feature.cu @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -E -triple x86_64-linux-gnu %s -o - | FileCheck %s + +// CHECK: has_noinline_keyword +#if __has_feature(cuda_noinline_keyword) +int has_noinline_keyword(); +#else +int no_noinine_keyword(); +#endif diff --git a/clang/test/SemaCUDA/noinline.cu b/clang/test/SemaCUDA/noinline.cu new file mode 100644 index 0000000..bd96343 --- /dev/null +++ b/clang/test/SemaCUDA/noinline.cu @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -fsyntax-only -verify=cuda %s +// RUN: %clang_cc1 -fsyntax-only -verify=cuda -pedantic %s +// RUN: %clang_cc1 -fsyntax-only -verify=cpp -x c++ %s + +// cuda-no-diagnostics + +__noinline__ void fun1() { } // cpp-error {{unknown type name '__noinline__'}} + +__attribute__((noinline)) void fun2() { } +__attribute__((__noinline__)) void fun3() { } +[[gnu::__noinline__]] void fun4() { } + +#define __noinline__ __attribute__((__noinline__)) +__noinline__ void fun5() {} + +#undef __noinline__ +#10 "cuda.h" 3 +#define __noinline__ __attribute__((__noinline__)) +__noinline__ void fun6() {} -- 2.7.4