[libc] Add special handling for CUDA PTX features

author Joseph Huber <jhuber6@vols.utk.edu>

Mon, 17 Apr 2023 14:11:09 +0000 (09:11 -0500)

committer Joseph Huber <jhuber6@vols.utk.edu>

Mon, 17 Apr 2023 16:51:34 +0000 (11:51 -0500)
author Joseph Huber <jhuber6@vols.utk.edu>
Mon, 17 Apr 2023 14:11:09 +0000 (09:11 -0500)
committer Joseph Huber <jhuber6@vols.utk.edu>
Mon, 17 Apr 2023 16:51:34 +0000 (11:51 -0500)
diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake

index c372969..5a9fdc6 100644 (file)
--- a/libc/cmake/modules/LLVMLibCObjectRules.cmake
+++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake
@@ -64,6 +64,50 @@ function(_get_common_compile_options output_var flags)
    set(${output_var} ${compile_options} PARENT_SCOPE)
  endfunction()
  
+# Obtains NVPTX specific arguments for compilation.
+# The PTX feature is primarily based on the CUDA toolchain version. We want to
+# be able to target NVPTX without an existing architecture, so we need to set
+# this manually. This simply sets the PTX feature to the minimum required for
+# the features we wish to use on that target.
+# Adjust as needed for desired PTX features.
+function(get_nvptx_compile_options output_var gpu_arch)
+  list(APPEND nvptx_options "-march=${gpu_arch}")
+  if(${gpu_arch} STREQUAL "sm_35")
+    list(APPEND nvptx_options "--cuda-feature=+ptx42")
+  elseif(${gpu_arch} STREQUAL "sm_37")
+    list(APPEND nvptx_options "--cuda-feature=+ptx43")
+  elseif(${gpu_arch} STREQUAL "sm_50")
+    list(APPEND nvptx_options "--cuda-feature=+ptx43")
+  elseif(${gpu_arch} STREQUAL "sm_52")
+    list(APPEND nvptx_options "--cuda-feature=+ptx43")
+  elseif(${gpu_arch} STREQUAL "sm_53")
+    list(APPEND nvptx_options "--cuda-feature=+ptx43")
+  elseif(${gpu_arch} STREQUAL "sm_60")
+    list(APPEND nvptx_options "--cuda-feature=+ptx50")
+  elseif(${gpu_arch} STREQUAL "sm_61")
+    list(APPEND nvptx_options "--cuda-feature=+ptx50")
+  elseif(${gpu_arch} STREQUAL "sm_62")
+    list(APPEND nvptx_options "--cuda-feature=+ptx50")
+  elseif(${gpu_arch} STREQUAL "sm_70")
+    list(APPEND nvptx_options "--cuda-feature=+ptx63")
+  elseif(${gpu_arch} STREQUAL "sm_72")
+    list(APPEND nvptx_options "--cuda-feature=+ptx63")
+  elseif(${gpu_arch} STREQUAL "sm_75")
+    list(APPEND nvptx_options "--cuda-feature=+ptx63")
+  elseif(${gpu_arch} STREQUAL "sm_80")
+    list(APPEND nvptx_options "--cuda-feature=+ptx72")
+  elseif(${gpu_arch} STREQUAL "sm_86")
+    list(APPEND nvptx_options "--cuda-feature=+ptx72")
+  else()
+    message(FATAL_ERROR "Unknown Nvidia GPU architecture '${gpu_arch}'")
+  endif()
+
+  if(LIBC_CUDA_ROOT)
+    list(APPEND nvptx_options "--cuda-path=${LIBC_CUDA_ROOT}")
+  endif()
+  set(${output_var} ${nvptx_options} PARENT_SCOPE)
+endfunction()
+
  # Builds the object target for the GPU.
  # This compiles the target for all supported architectures and embeds it into
  # host binary for installing. The internal target contains the GPU code directly
@@ -103,7 +147,8 @@ function(_build_gpu_objects fq_target_name internal_target_name)
          list(APPEND compile_options "-mcpu=${gpu_arch}")
        elseif("${gpu_arch}" IN_LIST all_nvptx_architectures)
          set(gpu_target_triple "nvptx64-nvidia-cuda")
-        list(APPEND compile_options "-march=${gpu_arch}")
+        get_nvptx_compile_options(nvptx_options ${gpu_arch})
+        list(APPEND compile_options "${nvptx_options}")
        else()
          message(FATAL_ERROR "Unknown GPU architecture '${gpu_arch}'")
        endif()
@@ -200,9 +245,8 @@ function(_build_gpu_objects fq_target_name internal_target_name)
      if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU)
        target_compile_options(${internal_target_name} PRIVATE -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto)
      elseif(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
-      target_compile_options(${internal_target_name} PRIVATE
-                             -march=${LIBC_GPU_TARGET_ARCHITECTURE}
-                             --cuda-path=${LIBC_CUDA_ROOT})
+      get_nvptx_compile_options(nvptx_options ${LIBC_GPU_TARGET_ARCHITECTURE})
+      target_compile_options(${internal_target_name} PRIVATE ${nvptx_options})
      endif()
      target_include_directories(${internal_target_name} PRIVATE ${include_dirs})
      if(full_deps_list)
diff --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake

index 592b632..75bb004 100644 (file)
--- a/libc/cmake/modules/LLVMLibCTestRules.cmake
+++ b/libc/cmake/modules/LLVMLibCTestRules.cmake
@@ -506,9 +506,9 @@ function(add_integration_test test_name)
                             -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto
                             --target=${LIBC_GPU_TARGET_TRIPLE})
    elseif(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
+    get_nvptx_compile_options(nvptx_options ${LIBC_GPU_TARGET_ARCHITECTURE})
      target_compile_options(${fq_build_target_name} PRIVATE
-                           --cuda-path=${LIBC_CUDA_ROOT}
-                           -march=${LIBC_GPU_TARGET_ARCHITECTURE}
+                           ${nvptx_options}
                             --target=${LIBC_GPU_TARGET_TRIPLE})
    endif()
  
diff --git a/libc/startup/gpu/nvptx/CMakeLists.txt b/libc/startup/gpu/nvptx/CMakeLists.txt

index f8839e9..4966169 100644 (file)
--- a/libc/startup/gpu/nvptx/CMakeLists.txt
+++ b/libc/startup/gpu/nvptx/CMakeLists.txt
@@ -1,3 +1,4 @@
+get_nvptx_compile_options(nvptx_options ${LIBC_GPU_TARGET_ARCHITECTURE})
  add_startup_object(
    crt1
    SRC
@@ -8,9 +9,8 @@ add_startup_object(
      -ffreestanding # To avoid compiler warnings about calling the main function.
      -fno-builtin
      -nogpulib # Do not include any GPU vendor libraries.
-    -march=${LIBC_GPU_TARGET_ARCHITECTURE}
      --target=${LIBC_GPU_TARGET_TRIPLE}
-    --cuda-path=${LIBC_CUDA_ROOT}
+    ${nvptx_options}
    NO_GPU_BUNDLE # Compile this file directly without special GPU handling.
  )
  get_fq_target_name(crt1 fq_name)
author	Joseph Huber <jhuber6@vols.utk.edu>
	Mon, 17 Apr 2023 14:11:09 +0000 (09:11 -0500)
committer	Joseph Huber <jhuber6@vols.utk.edu>
	Mon, 17 Apr 2023 16:51:34 +0000 (11:51 -0500)
libc/cmake/modules/LLVMLibCObjectRules.cmake		patch \| blob \| history
libc/cmake/modules/LLVMLibCTestRules.cmake		patch \| blob \| history
libc/startup/gpu/nvptx/CMakeLists.txt		patch \| blob \| history