From: Yaxun (Sam) Liu Date: Fri, 12 Feb 2021 15:21:27 +0000 (-0500) Subject: Relands "[HIP] Change default --gpu-max-threads-per-block value to 1024" X-Git-Tag: llvmorg-14-init~15274 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=053e61d54e63810b005adacf5e73a6465ca24bd2;p=platform%2Fupstream%2Fllvm.git Relands "[HIP] Change default --gpu-max-threads-per-block value to 1024" This reverts commit e384e94fbe7c1d5c89fcdde33ffda04e9802c2ce. --- diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 1a9f804..e244089 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -242,7 +242,7 @@ LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr function LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental functions") LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code") LANGOPT(GPUAllowDeviceInit, 1, 0, "allowing device side global init functions for HIP") -LANGOPT(GPUMaxThreadsPerBlock, 32, 256, "default max threads per block for kernel launch bounds for HIP") +LANGOPT(GPUMaxThreadsPerBlock, 32, 1024, "default max threads per block for kernel launch bounds for HIP") LANGOPT(GPUDeferDiag, 1, 0, "defer host/device related diagnostic messages for CUDA/HIP") LANGOPT(GPUExcludeWrongSideOverloads, 1, 0, "always exclude wrong side overloads in overloading resolution for CUDA/HIP") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index bcb9916..5228396 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -925,7 +925,7 @@ defm gpu_exclude_wrong_side_overloads : BoolFOption<"gpu-exclude-wrong-side-over def gpu_max_threads_per_block_EQ : Joined<["--"], "gpu-max-threads-per-block=">, Flags<[CC1Option]>, HelpText<"Default max threads per block for kernel launch bounds for HIP">, - MarshallingInfoStringInt, "256">, + MarshallingInfoStringInt, "1024">, ShouldParseIf; def gpu_instrument_lib_EQ : Joined<["--"], "gpu-instrument-lib=">, HelpText<"Instrument device library for HIP, which is a LLVM bitcode containing " diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index bcd2429..d425412 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -8998,9 +8998,13 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( assert(Max == 0 && "Max must be zero"); } else if (IsOpenCLKernel || IsHIPKernel) { // By default, restrict the maximum size to a value specified by - // --gpu-max-threads-per-block=n or its default value. + // --gpu-max-threads-per-block=n or its default value for HIP. + const unsigned OpenCLDefaultMaxWorkGroupSize = 256; + const unsigned DefaultMaxWorkGroupSize = + IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize + : M.getLangOpts().GPUMaxThreadsPerBlock; std::string AttrVal = - std::string("1,") + llvm::utostr(M.getLangOpts().GPUMaxThreadsPerBlock); + std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize); F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu index a1d62e3..3e602b1 100644 --- a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu +++ b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu @@ -39,7 +39,7 @@ __global__ void num_vgpr_64() { // NAMD-NOT: "amdgpu-num-vgpr" // NAMD-NOT: "amdgpu-num-sgpr" -// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true" +// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size"="true" // MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024" // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64" // CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2" diff --git a/clang/test/CodeGenCUDA/kernel-amdgcn.cu b/clang/test/CodeGenCUDA/kernel-amdgcn.cu index 5f48a9e..48473b9 100644 --- a/clang/test/CodeGenCUDA/kernel-amdgcn.cu +++ b/clang/test/CodeGenCUDA/kernel-amdgcn.cu @@ -39,4 +39,4 @@ int main() { launch((void*)D.Empty()); return 0; } -// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,256" +// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"