From d51d2b5909036cd13fcca7d5b5d1bed48dff3053 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 16 Feb 2023 14:46:39 -0600 Subject: [PATCH] [libc] Support add_object_library for the GPU build This patch unifies the handling of generating the GPU build targets between the `add_entrypoint_library` and the `add_object_library` functions. The `_build_gpu_objects` function will create two targets. One contains a single object file with several GPU binaries embedded in it, a so-called fatbinary. The other is a direct compile of the supported target to be used internally only. This patch pulls out some of the properties logic so that we can handle both more easily. This patch also required adding an ovverride `NO_GPU_BUILD` for cases when we only want to build the source file as normal. Reviewed By: sivachandra Differential Revision: https://reviews.llvm.org/D144214 --- libc/cmake/modules/LLVMLibCObjectRules.cmake | 154 +++++++++++++++------------ libc/startup/gpu/amdgpu/CMakeLists.txt | 1 + libc/startup/gpu/nvptx/CMakeLists.txt | 1 + 3 files changed, 90 insertions(+), 66 deletions(-) diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake index dc5b6f2..bdd600d 100644 --- a/libc/cmake/modules/LLVMLibCObjectRules.cmake +++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake @@ -59,19 +59,23 @@ function(_get_common_compile_options output_var flags) set(${output_var} ${compile_options} PARENT_SCOPE) endfunction() -# Builds the entrypoint target for the GPU. +# Builds the object target for the GPU. +# This compiles the target for all supported architectures and embeds it into +# host binary for installing. The internal target contains the GPU code directly +# compiled for a single architecture used internally. # Usage: -# _build_gpu_entrypoint_objects( +# _build_gpu_objects( # +# # SRCS # HDRS # DEPENDS # COMPILE_OPTIONS # FLAGS # ) -function(_build_gpu_entrypoint_objects fq_target_name) +function(_build_gpu_objects fq_target_name internal_target_name) cmake_parse_arguments( - "ADD_GPU_ENTRYPOINT_OBJ" + "ADD_GPU_OBJ" "" # No optional arguments "NAME;CXX_STANDARD" # Single value arguments "SRCS;HDRS;DEPENDS;COMPILE_OPTIONS;FLAGS" # Multi value arguments @@ -82,7 +86,7 @@ function(_build_gpu_entrypoint_objects fq_target_name) # this so we can support multiple accelerators on the same machine. foreach(gpu_arch ${all_gpu_architectures}) set(gpu_target_name ${fq_target_name}.${gpu_arch}) - set(compile_options ${ADD_GPU_ENTRYPOINT_OBJ_COMPILE_OPTIONS}) + set(compile_options ${ADD_GPU_OBJ_COMPILE_OPTIONS}) # Derive the triple from the specified architecture. if("${gpu_arch}" IN_LIST all_amdgpu_architectures) set(gpu_target_triple "amdgcn-amd-amdhsa") @@ -101,14 +105,16 @@ function(_build_gpu_entrypoint_objects fq_target_name) add_library(${gpu_target_name} EXCLUDE_FROM_ALL OBJECT - ${ADD_GPU_ENTRYPOINT_OBJ_SRCS} - ${ADD_GPU_ENTRYPOINT_OBJ_HDRS} + ${ADD_GPU_OBJ_SRCS} + ${ADD_GPU_OBJ_HDRS} ) target_compile_options(${gpu_target_name} PRIVATE ${compile_options}) target_include_directories(${gpu_target_name} PRIVATE ${include_dirs}) - add_dependencies(${gpu_target_name} ${ADD_GPU_ENTRYPOINT_OBJ_DEPENDS}) target_compile_definitions(${gpu_target_name} PRIVATE LIBC_COPT_PUBLIC_PACKAGING) + if(ADD_GPU_OBJ_DEPENDS) + add_dependencies(${gpu_target_name} ${ADD_GPU_OBJ_DEPENDS}) + endif() # Append this target to a list of images to package into a single binary. set(input_file $) @@ -135,7 +141,7 @@ function(_build_gpu_entrypoint_objects fq_target_name) # TODO: In the future we will want to combine every architecture for a target # into a single bitcode file and use that. For now we simply build for # every single one and let the offloading linker handle it. - get_filename_component(stub_filename ${ADD_GPU_ENTRYPOINT_OBJ_SRCS} NAME) + get_filename_component(stub_filename ${ADD_GPU_OBJ_SRCS} NAME) file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/stubs) file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/stubs/${stub_filename} "// Empty file.\n") add_library( @@ -151,19 +157,7 @@ function(_build_gpu_entrypoint_objects fq_target_name) target_include_directories(${fq_target_name} PRIVATE ${include_dirs}) add_dependencies(${fq_target_name} ${full_deps_list} ${packaged_target_name}) - set_target_properties( - ${fq_target_name} - PROPERTIES - ENTRYPOINT_NAME ${ADD_ENTRYPOINT_OBJ_NAME} - TARGET_TYPE ${ENTRYPOINT_OBJ_TARGET_TYPE} - OBJECT_FILE "$" - CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD} - DEPS "${fq_deps_list}" - FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" - ) - # We only build the internal target for a single supported architecture. - set(internal_target_name ${fq_target_name}.__internal__) set(include_dirs ${LIBC_BUILD_DIR}/include ${LIBC_SOURCE_DIR} ${LIBC_BUILD_DIR}) if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU OR LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) @@ -171,8 +165,8 @@ function(_build_gpu_entrypoint_objects fq_target_name) ${internal_target_name} EXCLUDE_FROM_ALL OBJECT - ${ADD_ENTRYPOINT_OBJ_SRCS} - ${ADD_ENTRYPOINT_OBJ_HDRS} + ${ADD_GPU_OBJ_SRCS} + ${ADD_GPU_OBJ_HDRS} ) target_compile_options(${internal_target_name} BEFORE PRIVATE ${common_compile_options} --target=${LIBC_GPU_TARGET_TRIPLE}) @@ -182,17 +176,9 @@ function(_build_gpu_entrypoint_objects fq_target_name) target_compile_options(${internal_target_name} PRIVATE -march=${LIBC_GPU_TARGET_ARCHITECTURE}) endif() target_include_directories(${internal_target_name} PRIVATE ${include_dirs}) - add_dependencies(${internal_target_name} ${full_deps_list}) - set_target_properties( - ${internal_target_name} - PROPERTIES - CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD} - FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" - ) - set_target_properties( - ${fq_target_name} - PROPERTIES OBJECT_FILE_RAW "$" - ) + if(full_deps_list) + add_dependencies(${internal_target_name} ${full_deps_list}) + endif() endif() endfunction() @@ -209,7 +195,7 @@ endfunction() function(create_object_library fq_target_name) cmake_parse_arguments( "ADD_OBJECT" - "" # No optional arguments + "NO_GPU_BUNDLE" # No optional arguments "CXX_STANDARD" # Single value arguments "SRCS;HDRS;COMPILE_OPTIONS;DEPENDS;FLAGS" # Multivalue arguments ${ARGN} @@ -219,28 +205,49 @@ function(create_object_library fq_target_name) message(FATAL_ERROR "'add_object_library' rule requires SRCS to be specified.") endif() - add_library( - ${fq_target_name} - EXCLUDE_FROM_ALL - OBJECT - ${ADD_OBJECT_SRCS} - ${ADD_OBJECT_HDRS} - ) - target_include_directories( - ${fq_target_name} - PRIVATE - ${LIBC_BUILD_DIR}/include - ${LIBC_SOURCE_DIR} - ${LIBC_BUILD_DIR} - ) + # The GPU build uses a separate internal file. + if(LIBC_TARGET_ARCHITECTURE_IS_GPU AND NOT ${ADD_OBJECT_NO_GPU_BUNDLE}) + set(internal_target_name ${fq_target_name}.__internal__) + else() + set(internal_target_name ${fq_target_name}) + endif() + + get_fq_deps_list(fq_deps_list ${ADD_OBJECT_DEPENDS}) _get_common_compile_options( compile_options "${ADD_OBJECT_FLAGS}" ${ADD_OBJECT_COMPILE_OPTIONS} ) - target_compile_options(${fq_target_name} PRIVATE ${compile_options}) - get_fq_deps_list(fq_deps_list ${ADD_OBJECT_DEPENDS}) + # GPU builds require special handling for the objects because we want to + # export several different targets at once, e.g. for both Nvidia and AMD. + if(LIBC_TARGET_ARCHITECTURE_IS_GPU AND NOT ${ADD_OBJECT_NO_GPU_BUNDLE}) + _build_gpu_objects( + ${fq_target_name} + ${internal_target_name} + SRCS ${ADD_OBJECT_SRCS} + HDRS ${ADD_OBJECT_HDRS} + DEPENDS ${fq_deps_list} + COMPILE_OPTIONS ${common_compile_options} + FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" + ) + else() + add_library( + ${fq_target_name} + EXCLUDE_FROM_ALL + OBJECT + ${ADD_OBJECT_SRCS} + ${ADD_OBJECT_HDRS} + ) + target_include_directories( + ${fq_target_name} + PRIVATE + ${LIBC_BUILD_DIR}/include + ${LIBC_SOURCE_DIR} + ${LIBC_BUILD_DIR} + ) + target_compile_options(${fq_target_name} PRIVATE ${compile_options}) + endif() if(SHOW_INTERMEDIATE_OBJECTS) message(STATUS "Adding object library ${fq_target_name}") @@ -262,11 +269,18 @@ function(create_object_library fq_target_name) ${fq_target_name} PROPERTIES TARGET_TYPE ${OBJECT_LIBRARY_TARGET_TYPE} - OBJECT_FILES "$" CXX_STANDARD ${ADD_OBJECT_CXX_STANDARD} DEPS "${fq_deps_list}" FLAGS "${ADD_OBJECT_FLAGS}" ) + + if(TARGET ${internal_target_name}) + set_target_properties( + ${fq_target_name} + PROPERTIES + OBJECT_FILES "$" + ) + endif() endfunction(create_object_library) # Internal function, used by `add_object_library`. @@ -483,13 +497,13 @@ function(create_entrypoint_object fq_target_name) # GPU builds require special handling for the objects because we want to # export several different targets at once, e.g. for both Nvidia and AMD. if(LIBC_TARGET_ARCHITECTURE_IS_GPU) - _build_gpu_entrypoint_objects( + _build_gpu_objects( ${fq_target_name} + ${internal_target_name} SRCS ${ADD_ENTRYPOINT_OBJ_SRCS} HDRS ${ADD_ENTRYPOINT_OBJ_HDRS} COMPILE_OPTIONS ${common_compile_options} DEPENDS ${full_deps_list} - CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD} FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" ) else() @@ -505,12 +519,6 @@ function(create_entrypoint_object fq_target_name) target_compile_options(${internal_target_name} BEFORE PRIVATE ${common_compile_options}) target_include_directories(${internal_target_name} PRIVATE ${include_dirs}) add_dependencies(${internal_target_name} ${full_deps_list}) - set_target_properties( - ${internal_target_name} - PROPERTIES - CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD} - FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" - ) add_library( ${fq_target_name} @@ -524,22 +532,36 @@ function(create_entrypoint_object fq_target_name) target_compile_options(${fq_target_name} BEFORE PRIVATE ${common_compile_options} -DLIBC_COPT_PUBLIC_PACKAGING) target_include_directories(${fq_target_name} PRIVATE ${include_dirs}) add_dependencies(${fq_target_name} ${full_deps_list}) + endif() + + set_target_properties( + ${fq_target_name} + PROPERTIES + ENTRYPOINT_NAME ${ADD_ENTRYPOINT_OBJ_NAME} + TARGET_TYPE ${ENTRYPOINT_OBJ_TARGET_TYPE} + OBJECT_FILE "$" + CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD} + DEPS "${fq_deps_list}" + FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" + ) + if(TARGET ${internal_target_name}) + set_target_properties( + ${internal_target_name} + PROPERTIES + CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD} + FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" + ) set_target_properties( ${fq_target_name} PROPERTIES - ENTRYPOINT_NAME ${ADD_ENTRYPOINT_OBJ_NAME} - TARGET_TYPE ${ENTRYPOINT_OBJ_TARGET_TYPE} - OBJECT_FILE "$" # TODO: We don't need to list internal object files if the internal # target is a normal static library. OBJECT_FILE_RAW "$" - CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD} - DEPS "${fq_deps_list}" - FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" ) endif() + if(LLVM_LIBC_ENABLE_LINTING AND TARGET ${internal_target_name}) if(NOT LLVM_LIBC_CLANG_TIDY) message(FATAL_ERROR "Something is wrong! LLVM_LIBC_ENABLE_LINTING is " diff --git a/libc/startup/gpu/amdgpu/CMakeLists.txt b/libc/startup/gpu/amdgpu/CMakeLists.txt index 8de0955..c2ae950 100644 --- a/libc/startup/gpu/amdgpu/CMakeLists.txt +++ b/libc/startup/gpu/amdgpu/CMakeLists.txt @@ -10,6 +10,7 @@ add_startup_object( -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -emit-llvm # AMDGPU's intermediate object file format is bitcode. --target=${LIBC_GPU_TARGET_TRIPLE} + NO_GPU_BUNDLE # Compile this file directly without special GPU handling. ) get_fq_target_name(crt1 fq_name) diff --git a/libc/startup/gpu/nvptx/CMakeLists.txt b/libc/startup/gpu/nvptx/CMakeLists.txt index 2057fcf..f7f58ec 100644 --- a/libc/startup/gpu/nvptx/CMakeLists.txt +++ b/libc/startup/gpu/nvptx/CMakeLists.txt @@ -10,6 +10,7 @@ add_startup_object( -x cuda # Use the CUDA toolchain to emit the `_start` kernel. --offload-device-only --offload-arch=${LIBC_GPU_TARGET_ARCHITECTURE} + NO_GPU_BUNDLE # Compile this file directly without special GPU handling. ) get_fq_target_name(crt1 fq_name) -- 2.7.4