From 9fa46200ea2f77cb3db2590f4268a8fdbc3882c5 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Tue, 13 Dec 2022 10:20:42 -0500 Subject: [PATCH] [AMDGPU] Add `.workgroup_processor_mode` to v5 MD Adds Workgroup Processor Mode (WGP) to the HSA Metadata for Code Object v5/GFX10+. The field is already present as an asm directive and in the compute program resource register but is also needed in the MD. Reviewed By: kzhuravl Differential Revision: https://reviews.llvm.org/D139931 --- llvm/docs/AMDGPUUsage.rst | 14 ++++++++------ llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp | 2 ++ llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp | 3 +++ llvm/lib/Target/AMDGPU/GCNSubtarget.h | 2 ++ .../AMDGPU/hsa-metadata-workgroup-processor-mode-v5.ll | 14 ++++++++++++++ 5 files changed, 29 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/hsa-metadata-workgroup-processor-mode-v5.ll diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index eac0883..3030b14 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -3571,12 +3571,14 @@ Code object V5 metadata is the same as .. table:: AMDHSA Code Object V5 Kernel Metadata Map Additions :name: amdgpu-amdhsa-code-object-kernel-metadata-map-table-v5 - ===================== ============= ========== ======================================= - String Key Value Type Required? Description - ===================== ============= ========== ======================================= - ".uses_dynamic_stack" boolean Indicates if the generated machine code - is using a dynamically sized stack. - ===================== ============= ========== ======================================= + ============================= ============= ========== ======================================= + String Key Value Type Required? Description + ============================= ============= ========== ======================================= + ".uses_dynamic_stack" boolean Indicates if the generated machine code + is using a dynamically sized stack. + ".workgroup_processor_mode" boolean (GFX10+) Controls ENABLE_WGP_MODE in + :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. + ============================= ============= ========== ======================================= .. diff --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp index b465966..64d2c97 100644 --- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp +++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp @@ -262,6 +262,8 @@ bool MetadataVerifier::verifyKernel(msgpack::DocNode &Node) { if (!verifyScalarEntry(KernelMap, ".uses_dynamic_stack", false, msgpack::Type::Boolean)) return false; + if (!verifyIntegerEntry(KernelMap, ".workgroup_processor_mode", false)) + return false; if (!verifyIntegerEntry(KernelMap, ".kernarg_segment_align", true)) return false; if (!verifyIntegerEntry(KernelMap, ".wavefront_size", true)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index 7a3446a..2c578cc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -877,6 +877,9 @@ msgpack::MapDocNode MetadataStreamerMsgPackV3::getHSAKernelProps( if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) Kern[".uses_dynamic_stack"] = Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack); + if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5 && STM.supportsWGP()) + Kern[".workgroup_processor_mode"] = + Kern.getDocument()->getNode(ProgramInfo.WgpMode); // FIXME: The metadata treats the minimum as 16? Kern[".kernarg_segment_align"] = diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 1d035a2..5ebec83 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -298,6 +298,8 @@ public: /// the original value. bool zeroesHigh16BitsOfDest(unsigned Opcode) const; + bool supportsWGP() const { return getGeneration() >= GFX10; } + bool hasIntClamp() const { return HasIntClamp; } diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-workgroup-processor-mode-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-workgroup-processor-mode-v5.ll new file mode 100644 index 0000000..d1d5e232 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-workgroup-processor-mode-v5.ll @@ -0,0 +1,14 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=5 -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=5 -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10-CU %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=5 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=5 -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10-CU %s + +; GFX10: .amdhsa_workgroup_processor_mode 0 +; GFX10: .workgroup_processor_mode: 0 +; GFX10-CU: .amdhsa_workgroup_processor_mode 1 +; GFX10-CU: .workgroup_processor_mode: 1 + +define amdgpu_kernel void @wavefrontsize() { +entry: + ret void +} -- 2.7.4