Summary:
We offer almost no support for offloading on 32-bit systems. This causes
some problems when cross-compiling for 32-bit machines as it will find
the CUDA from the host that is incompatible. Instead we force these to
always use the dynamically loaded version, which should always compile.
# If we find the HSA runtime we link with it directly.
find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
-if (${hsa-runtime64_FOUND})
+if (hsa-runtime64_FOUND AND NOT LLVM_BUILD_32_BITS)
set_target_properties(amdgpu-arch PROPERTIES INSTALL_RPATH_USE_LINK_PATH ON)
clang_target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64)
else()
find_package(CUDAToolkit QUIET)
# If we found the CUDA library directly we just dynamically link against it.
-if (CUDAToolkit_FOUND)
+if (CUDAToolkit_FOUND AND NOT LLVM_BUILD_32_BITS)
target_link_libraries(nvptx-arch PRIVATE CUDA::cuda_driver)
else()
target_compile_definitions(nvptx-arch PRIVATE "DYNAMIC_CUDA")