[HIP] Add option -fgpu-inline-threshold

author Yaxun (Sam) Liu <yaxun.liu@amd.com>

Wed, 21 Apr 2021 18:34:37 +0000 (14:34 -0400)

committer Yaxun (Sam) Liu <yaxun.liu@amd.com>

Wed, 21 Apr 2021 21:18:18 +0000 (17:18 -0400)
author Yaxun (Sam) Liu <yaxun.liu@amd.com>
Wed, 21 Apr 2021 18:34:37 +0000 (14:34 -0400)
committer Yaxun (Sam) Liu <yaxun.liu@amd.com>
Wed, 21 Apr 2021 21:18:18 +0000 (17:18 -0400)
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td

index 648cfb0..d17486b 100644 (file)
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -956,6 +956,9 @@ def gpu_max_threads_per_block_EQ : Joined<["--"], "gpu-max-threads-per-block=">,
    HelpText<"Default max threads per block for kernel launch bounds for HIP">,
    MarshallingInfoInt<LangOpts<"GPUMaxThreadsPerBlock">, "1024">,
    ShouldParseIf<hip.KeyPath>;
+def fgpu_inline_threshold_EQ : Joined<["-"], "fgpu-inline-threshold=">,
+  Flags<[HelpHidden]>,
+  HelpText<"Inline threshold for device compilation for CUDA/HIP">;
  def gpu_instrument_lib_EQ : Joined<["--"], "gpu-instrument-lib=">,
    HelpText<"Instrument device library for HIP, which is a LLVM bitcode containing "
    "__cyg_profile_func_enter and __cyg_profile_func_exit">;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp

index 95ca995..ddfdda8 100644 (file)
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6473,6 +6473,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
    if (IsHIP)
      CmdArgs.push_back("-fcuda-allow-variadic-functions");
  
+  if (IsCudaDevice || IsHIPDevice) {
+    StringRef InlineThresh =
+        Args.getLastArgValue(options::OPT_fgpu_inline_threshold_EQ);
+    if (!InlineThresh.empty()) {
+      std::string ArgStr =
+          std::string("-inline-threshold=") + InlineThresh.str();
+      CmdArgs.append({"-mllvm", Args.MakeArgStringRef(ArgStr)});
+    }
+  }
+
    // OpenMP offloading device jobs take the argument -fopenmp-host-ir-file-path
    // to specify the result of the compile phase on the host, so the meaningful
    // device declarations can be identified. Also, -fopenmp-is-device is passed
diff --git a/clang/test/Driver/hip-options.hip b/clang/test/Driver/hip-options.hip

index 99e9f0c..b592a9c 100644 (file)
--- a/clang/test/Driver/hip-options.hip
+++ b/clang/test/Driver/hip-options.hip
@@ -51,3 +51,8 @@
  // RUN:   --cuda-gpu-arch=gfx906  %s 2>&1 | FileCheck -check-prefix=CTA %s
  // CTA: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-mconstructor-aliases"
  // CTA-NOT: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mconstructor-aliases"
+
+// RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
+// RUN:   --offload-arch=gfx906 -fgpu-inline-threshold=1000 %s 2>&1 | FileCheck -check-prefix=THRESH %s
+// THRESH: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mllvm" "-inline-threshold=1000"
+// THRESH-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-inline-threshold=1000"
author	Yaxun (Sam) Liu <yaxun.liu@amd.com>
	Wed, 21 Apr 2021 18:34:37 +0000 (14:34 -0400)
committer	Yaxun (Sam) Liu <yaxun.liu@amd.com>
	Wed, 21 Apr 2021 21:18:18 +0000 (17:18 -0400)
clang/include/clang/Driver/Options.td		patch \| blob \| history
clang/lib/Driver/ToolChains/Clang.cpp		patch \| blob \| history
clang/test/Driver/hip-options.hip		patch \| blob \| history