[libc] Add special handling for CUDA PTX features
authorJoseph Huber <jhuber6@vols.utk.edu>
Mon, 17 Apr 2023 14:11:09 +0000 (09:11 -0500)
committerJoseph Huber <jhuber6@vols.utk.edu>
Mon, 17 Apr 2023 16:51:34 +0000 (11:51 -0500)
The NVIDIA compilation path requires some special options. This is
mostly because compilation is dependent on having a valid CUDA
toolchain. We don't actually need the CUDA toolchain to create the
exported `libcgpu.a` library because it's pure LLVM-IR. However, for
some language features we need the PTX version to be set. This is
normally set by checking the CUDA version, but without one installed it
will fail to build. We instead choose a minimum set of features on the
desired target, inferred from
https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes
and the PTX refernece for functions like `nanosleep`.

Reviewed By: tianshilei1992

Differential Revision: https://reviews.llvm.org/D148532

libc/cmake/modules/LLVMLibCObjectRules.cmake
libc/cmake/modules/LLVMLibCTestRules.cmake
libc/startup/gpu/nvptx/CMakeLists.txt

index c372969..5a9fdc6 100644 (file)
@@ -64,6 +64,50 @@ function(_get_common_compile_options output_var flags)
   set(${output_var} ${compile_options} PARENT_SCOPE)
 endfunction()
 
+# Obtains NVPTX specific arguments for compilation.
+# The PTX feature is primarily based on the CUDA toolchain version. We want to
+# be able to target NVPTX without an existing architecture, so we need to set
+# this manually. This simply sets the PTX feature to the minimum required for
+# the features we wish to use on that target.
+# Adjust as needed for desired PTX features.
+function(get_nvptx_compile_options output_var gpu_arch)
+  list(APPEND nvptx_options "-march=${gpu_arch}")
+  if(${gpu_arch} STREQUAL "sm_35")
+    list(APPEND nvptx_options "--cuda-feature=+ptx42")
+  elseif(${gpu_arch} STREQUAL "sm_37")
+    list(APPEND nvptx_options "--cuda-feature=+ptx43")
+  elseif(${gpu_arch} STREQUAL "sm_50")
+    list(APPEND nvptx_options "--cuda-feature=+ptx43")
+  elseif(${gpu_arch} STREQUAL "sm_52")
+    list(APPEND nvptx_options "--cuda-feature=+ptx43")
+  elseif(${gpu_arch} STREQUAL "sm_53")
+    list(APPEND nvptx_options "--cuda-feature=+ptx43")
+  elseif(${gpu_arch} STREQUAL "sm_60")
+    list(APPEND nvptx_options "--cuda-feature=+ptx50")
+  elseif(${gpu_arch} STREQUAL "sm_61")
+    list(APPEND nvptx_options "--cuda-feature=+ptx50")
+  elseif(${gpu_arch} STREQUAL "sm_62")
+    list(APPEND nvptx_options "--cuda-feature=+ptx50")
+  elseif(${gpu_arch} STREQUAL "sm_70")
+    list(APPEND nvptx_options "--cuda-feature=+ptx63")
+  elseif(${gpu_arch} STREQUAL "sm_72")
+    list(APPEND nvptx_options "--cuda-feature=+ptx63")
+  elseif(${gpu_arch} STREQUAL "sm_75")
+    list(APPEND nvptx_options "--cuda-feature=+ptx63")
+  elseif(${gpu_arch} STREQUAL "sm_80")
+    list(APPEND nvptx_options "--cuda-feature=+ptx72")
+  elseif(${gpu_arch} STREQUAL "sm_86")
+    list(APPEND nvptx_options "--cuda-feature=+ptx72")
+  else()
+    message(FATAL_ERROR "Unknown Nvidia GPU architecture '${gpu_arch}'")
+  endif()
+
+  if(LIBC_CUDA_ROOT)
+    list(APPEND nvptx_options "--cuda-path=${LIBC_CUDA_ROOT}")
+  endif()
+  set(${output_var} ${nvptx_options} PARENT_SCOPE)
+endfunction()
+
 # Builds the object target for the GPU.
 # This compiles the target for all supported architectures and embeds it into
 # host binary for installing. The internal target contains the GPU code directly
@@ -103,7 +147,8 @@ function(_build_gpu_objects fq_target_name internal_target_name)
         list(APPEND compile_options "-mcpu=${gpu_arch}")
       elseif("${gpu_arch}" IN_LIST all_nvptx_architectures)
         set(gpu_target_triple "nvptx64-nvidia-cuda")
-        list(APPEND compile_options "-march=${gpu_arch}")
+        get_nvptx_compile_options(nvptx_options ${gpu_arch})
+        list(APPEND compile_options "${nvptx_options}")
       else()
         message(FATAL_ERROR "Unknown GPU architecture '${gpu_arch}'")
       endif()
@@ -200,9 +245,8 @@ function(_build_gpu_objects fq_target_name internal_target_name)
     if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU)
       target_compile_options(${internal_target_name} PRIVATE -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto)
     elseif(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
-      target_compile_options(${internal_target_name} PRIVATE
-                             -march=${LIBC_GPU_TARGET_ARCHITECTURE}
-                             --cuda-path=${LIBC_CUDA_ROOT})
+      get_nvptx_compile_options(nvptx_options ${LIBC_GPU_TARGET_ARCHITECTURE})
+      target_compile_options(${internal_target_name} PRIVATE ${nvptx_options})
     endif()
     target_include_directories(${internal_target_name} PRIVATE ${include_dirs})
     if(full_deps_list)
index 592b632..75bb004 100644 (file)
@@ -506,9 +506,9 @@ function(add_integration_test test_name)
                            -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto
                            --target=${LIBC_GPU_TARGET_TRIPLE})
   elseif(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
+    get_nvptx_compile_options(nvptx_options ${LIBC_GPU_TARGET_ARCHITECTURE})
     target_compile_options(${fq_build_target_name} PRIVATE
-                           --cuda-path=${LIBC_CUDA_ROOT}
-                           -march=${LIBC_GPU_TARGET_ARCHITECTURE}
+                           ${nvptx_options}
                            --target=${LIBC_GPU_TARGET_TRIPLE})
   endif()
 
index f8839e9..4966169 100644 (file)
@@ -1,3 +1,4 @@
+get_nvptx_compile_options(nvptx_options ${LIBC_GPU_TARGET_ARCHITECTURE})
 add_startup_object(
   crt1
   SRC
@@ -8,9 +9,8 @@ add_startup_object(
     -ffreestanding # To avoid compiler warnings about calling the main function.
     -fno-builtin
     -nogpulib # Do not include any GPU vendor libraries.
-    -march=${LIBC_GPU_TARGET_ARCHITECTURE}
     --target=${LIBC_GPU_TARGET_TRIPLE}
-    --cuda-path=${LIBC_CUDA_ROOT}
+    ${nvptx_options}
   NO_GPU_BUNDLE # Compile this file directly without special GPU handling.
 )
 get_fq_target_name(crt1 fq_name)