[HIP] Fix -fgpu-rdc for Windows
authorYaxun (Sam) Liu <yaxun.liu@amd.com>
Thu, 2 Dec 2021 22:49:36 +0000 (17:49 -0500)
committerYaxun (Sam) Liu <yaxun.liu@amd.com>
Mon, 6 Dec 2021 21:42:23 +0000 (16:42 -0500)
This patch fixes issues for -fgpu-rdc for Windows MSVC
toolchain:

Fix COFF specific section flags and remove section types
in llvm-mc input file for Windows.

Escape fatbin path in llvm-mc input file.

Add -triple option to llvm-mc.

Put __hip_gpubin_handle in comdat when it has linkonce_odr
linkage.

Reviewed by: Artem Belevich

Differential Revision: https://reviews.llvm.org/D115039

clang/lib/CodeGen/CGCUDANV.cpp
clang/lib/Driver/ToolChains/HIP.cpp
clang/test/CodeGenCUDA/device-stub.cu
clang/test/Driver/hip-toolchain-rdc.hip

index a1b4431..c4e3f7f 100644 (file)
@@ -814,6 +814,9 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
         Linkage,
         /*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy),
         "__hip_gpubin_handle");
+    if (Linkage == llvm::GlobalValue::LinkOnceAnyLinkage)
+      GpuBinaryHandle->setComdat(
+          CGM.getModule().getOrInsertComdat(GpuBinaryHandle->getName()));
     GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign());
     // Prevent the weak symbol in different shared libraries being merged.
     if (Linkage != llvm::GlobalValue::InternalLinkage)
index 07af1a0..097cfaa 100644 (file)
@@ -183,8 +183,7 @@ void AMDGCN::Linker::constructGenerateObjFileFromHIPFatBinary(
     const InputInfoList &Inputs, const ArgList &Args,
     const JobAction &JA) const {
   const ToolChain &TC = getToolChain();
-  std::string Name =
-      std::string(llvm::sys::path::stem(Output.getFilename()));
+  std::string Name = std::string(llvm::sys::path::stem(Output.getFilename()));
 
   // Create Temp Object File Generator,
   // Offload Bundled file and Bundled Object file.
@@ -206,20 +205,29 @@ void AMDGCN::Linker::constructGenerateObjFileFromHIPFatBinary(
   std::string ObjBuffer;
   llvm::raw_string_ostream ObjStream(ObjBuffer);
 
+  auto HostTriple =
+      C.getSingleOffloadToolChain<Action::OFK_Host>()->getTriple();
+
   // Add MC directives to embed target binaries. We ensure that each
   // section and image is 16-byte aligned. This is not mandatory, but
   // increases the likelihood of data to be aligned with a cache block
   // in several main host machines.
   ObjStream << "#       HIP Object Generator\n";
   ObjStream << "# *** Automatically generated by Clang ***\n";
-  ObjStream << "  .protected __hip_fatbin\n";
-  ObjStream << "  .type __hip_fatbin,@object\n";
-  ObjStream << "  .section .hip_fatbin,\"a\",@progbits\n";
+  if (HostTriple.isWindowsMSVCEnvironment()) {
+    ObjStream << "  .section .hip_fatbin, \"dw\"\n";
+  } else {
+    ObjStream << "  .protected __hip_fatbin\n";
+    ObjStream << "  .type __hip_fatbin,@object\n";
+    ObjStream << "  .section .hip_fatbin,\"a\",@progbits\n";
+  }
   ObjStream << "  .globl __hip_fatbin\n";
   ObjStream << "  .p2align " << llvm::Log2(llvm::Align(HIPCodeObjectAlign))
             << "\n";
   ObjStream << "__hip_fatbin:\n";
-  ObjStream << "  .incbin \"" << BundleFile << "\"\n";
+  ObjStream << "  .incbin ";
+  llvm::sys::printArg(ObjStream, BundleFile, /*Quote=*/true);
+  ObjStream << "\n";
   ObjStream.flush();
 
   // Dump the contents of the temp object file gen if the user requested that.
@@ -238,7 +246,8 @@ void AMDGCN::Linker::constructGenerateObjFileFromHIPFatBinary(
 
   Objf << ObjBuffer;
 
-  ArgStringList McArgs{"-o",      Output.getFilename(),
+  ArgStringList McArgs{"-triple", Args.MakeArgString(HostTriple.normalize()),
+                       "-o",      Output.getFilename(),
                        McinFile,  "--filetype=obj"};
   const char *Mc = Args.MakeArgString(TC.GetProgramPath("llvm-mc"));
   C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
index 0de58e1..462faad 100644 (file)
 // RUN:     -fcuda-include-gpubinary %t -o - -x hip\
 // RUN:   | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,WIN
 
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -aux-triple amdgcn -emit-llvm %s \
+// RUN:     -o - -x hip\
+// RUN:   | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,WIN,HIP,HIPNEF
+
 #include "Inputs/cuda.h"
 
+// HIPNEF: $__hip_gpubin_handle = comdat any
+
 #ifndef NOGLOBALS
 // NORDC-DAG: @device_var = internal global i32
 // RDC-DAG: @device_var = global i32
index abcc145..7d06e39 100644 (file)
 // RUN:   -fhip-dump-offload-linker-script \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s
+// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,LNX %s
+
+// RUN: %clang -### -target x86_64-pc-windows-msvc \
+// RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
+// RUN:   --hip-device-lib=lib1.bc --hip-device-lib=lib2.bc \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib2 \
+// RUN:   -fuse-ld=lld -fgpu-rdc -nogpuinc \
+// RUN:   -fhip-dump-offload-linker-script \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,MSVC %s
 
 // check code object alignment in dumped llvm-mc input
-// CHECK: .protected __hip_fatbin
+// LNX: .protected __hip_fatbin
+// LNX: .type __hip_fatbin,@object
+// LNX: .section .hip_fatbin,"a",@progbits
+// MSVC: .section .hip_fatbin, "dw"
+// CHECK: .globl __hip_fatbin
 // CHECK: .p2align 12
+// CHECK: __hip_fatbin:
+// CHECK: .incbin "[[BUNDLE:.*hipfb]]"
 
 // emit objects for host side path
-// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
+// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" [[HOST:"x86_64-[^"]+"]]
 // CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-emit-obj"
 // CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
@@ -26,7 +43,7 @@
 // CHECK-SAME: {{.*}} "-o" [[A_OBJ_HOST:".*o"]] "-x" "hip"
 // CHECK-SAME: {{.*}} [[A_SRC:".*a.cu"]]
 
-// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
+// CHECK: [[CLANG]] "-cc1" "-triple" [[HOST]]
 // CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-emit-obj"
 // CHECK-SAME: {{.*}} "-main-file-name" "b.hip"
@@ -36,7 +53,7 @@
 
 // generate image for device side path on gfx803
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
-// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
+// CHECK-SAME: "-aux-triple" [[HOST:"x86_64-[^"]+"]]
 // CHECK-SAME: "-emit-llvm-bc"
 // CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
 // CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
@@ -48,7 +65,7 @@
 // CHECK-SAME: {{.*}} [[A_SRC]]
 
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
-// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
+// CHECK-SAME: "-aux-triple" [[HOST]]
 // CHECK-SAME: "-emit-llvm-bc"
 // CHECK-SAME: {{.*}} "-main-file-name" "b.hip"
 // CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
@@ -68,7 +85,7 @@
 
 // generate image for device side path on gfx900
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
-// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
+// CHECK-SAME: "-aux-triple" [[HOST]]
 // CHECK-SAME: "-emit-llvm-bc"
 // CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
 // CHECK-SAME: "-fcuda-is-device"
@@ -78,7 +95,7 @@
 // CHECK-SAME: {{.*}} [[A_SRC]]
 
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
-// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
+// CHECK-SAME: "-aux-triple" [[HOST]]
 // CHECK-SAME: "-emit-llvm-bc"
 // CHECK-SAME: {{.*}} "-main-file-name" "b.hip"
 // CHECK-SAME: "-fcuda-is-device"
 // CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
 // CHECK-SAME: "-bundle-align=4096"
 // CHECK-SAME: "-targets={{.*}},hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900"
-// CHECK-SAME: "-inputs={{.*}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE:.*hipfb]]"
+// CHECK-SAME: "-inputs={{.*}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE]]"
 
-// CHECK: [[MC:".*llvm-mc.*"]] "-o" [[OBJBUNDLE:".*o"]] "{{.*}}.mcin" "--filetype=obj"
+// CHECK: [[MC:".*llvm-mc.*"]] "-triple" [[HOST]] "-o" [[OBJBUNDLE:".*o"]] "{{.*}}.mcin" "--filetype=obj"
 
 // output the executable
-// CHECK: [[LD:".*ld.*"]] {{.*}}"-o" "a.out" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]
+// LNX: [[LD:".*ld.*"]] {{.*}}"-o" "a.out" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]
+// MSVC: [[LD:".*lld-link.*"]] {{.*}}"-out:a.exe" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]