From 5e25284dbc947781fcd3f2230bea14c48f43ec50 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Wed, 21 Sep 2022 00:32:57 -0400 Subject: [PATCH] [AMDGPU] Emit module flag for all code object versions Reviewed by: Changpeng Fang, Matt Arsenault, Brian Sumner Differential Revision: https://reviews.llvm.org/D134355 --- clang/lib/CodeGen/CodeGenModule.cpp | 5 ++--- clang/test/CodeGenCUDA/amdgpu-code-object-version.cu | 11 +++++++---- clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu | 10 +++++----- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 06ad67a..8feb673 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -583,9 +583,8 @@ void CodeGenModule::Release() { } // Emit amdgpu_code_object_version module flag, which is code object version // times 100. - // ToDo: Enable module flag for all code object version when ROCm device - // library is ready. - if (getTarget().getTargetOpts().CodeObjectVersion == TargetOptions::COV_5) { + if (getTarget().getTargetOpts().CodeObjectVersion != + TargetOptions::COV_None) { getModule().addModuleFlag(llvm::Module::Error, "amdgpu_code_object_version", getTarget().getTargetOpts().CodeObjectVersion); diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu index e828fd9..16505b3 100644 --- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu +++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu @@ -1,16 +1,16 @@ // Create module flag for code object version. // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ -// RUN: -o - %s | FileCheck %s -check-prefix=NONE +// RUN: -o - %s | FileCheck %s -check-prefix=V4 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ -// RUN: -mcode-object-version=2 -o - %s | FileCheck -check-prefix=NONE %s +// RUN: -mcode-object-version=2 -o - %s | FileCheck -check-prefix=V2 %s // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ -// RUN: -mcode-object-version=3 -o - %s | FileCheck -check-prefix=NONE %s +// RUN: -mcode-object-version=3 -o - %s | FileCheck -check-prefix=V3 %s // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ -// RUN: -mcode-object-version=4 -o - %s | FileCheck -check-prefix=NONE %s +// RUN: -mcode-object-version=4 -o - %s | FileCheck -check-prefix=V4 %s // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -mcode-object-version=5 -o - %s | FileCheck -check-prefix=V5 %s @@ -21,6 +21,9 @@ // RUN: not %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -mcode-object-version=4.1 -o - %s 2>&1| FileCheck %s -check-prefix=INV +// V2: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 200} +// V3: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 300} +// V4: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 400} // V5: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 500} // NONE-NOT: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", // INV: error: invalid value '4.1' in '-mcode-object-version=4.1' diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu index 7ce7fa5..4694d7e 100644 --- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu +++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu @@ -18,7 +18,7 @@ // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel1Pi(i32 addrspace(1)*{{.*}} %x.coerce) // CHECK: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* -// OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4, !amdgpu.noclobber !2 +// OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4, !amdgpu.noclobber ![[MD:[0-9]+]] // OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1 // OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4 // OPT: ret void @@ -30,7 +30,7 @@ __global__ void kernel1(int *x) { // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel2Ri(i32 addrspace(1)*{{.*}} nonnull align 4 dereferenceable(4) %x.coerce) // CHECK: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* -// OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4, !amdgpu.noclobber !2 +// OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4, !amdgpu.noclobber ![[MD]] // OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1 // OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4 // OPT: ret void @@ -68,7 +68,7 @@ struct S { // OPT: [[R1:%.*]] = getelementptr inbounds %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 1 // OPT: [[P1:%.*]] = load float*, float* addrspace(4)* [[R1]], align 8 // OPT: [[G1:%.*]] ={{.*}} addrspacecast float* [[P1]] to float addrspace(1)* -// OPT: [[V0:%.*]] = load i32, i32 addrspace(1)* [[G0]], align 4, !amdgpu.noclobber !2 +// OPT: [[V0:%.*]] = load i32, i32 addrspace(1)* [[G0]], align 4, !amdgpu.noclobber ![[MD]] // OPT: [[INC:%.*]] = add nsw i32 [[V0]], 1 // OPT: store i32 [[INC]], i32 addrspace(1)* [[G0]], align 4 // OPT: [[V1:%.*]] = load float, float addrspace(1)* [[G1]], align 4 @@ -103,7 +103,7 @@ struct T { // OPT: [[R1:%.*]] = getelementptr inbounds %struct.T, %struct.T addrspace(4)* %0, i64 0, i32 0, i64 1 // OPT: [[P1:%.*]] = load float*, float* addrspace(4)* [[R1]], align 8 // OPT: [[G1:%.*]] ={{.*}} addrspacecast float* [[P1]] to float addrspace(1)* -// OPT: [[V0:%.*]] = load float, float addrspace(1)* [[G0]], align 4, !amdgpu.noclobber !2 +// OPT: [[V0:%.*]] = load float, float addrspace(1)* [[G0]], align 4, !amdgpu.noclobber ![[MD]] // OPT: [[ADD0:%.*]] = fadd contract float [[V0]], 1.000000e+00 // OPT: store float [[ADD0]], float addrspace(1)* [[G0]], align 4 // OPT: [[V1:%.*]] = load float, float addrspace(1)* [[G1]], align 4 @@ -130,7 +130,7 @@ struct SS { // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel82SS(float addrspace(1)*{{.*}} %a.coerce) // CHECK: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* -// OPT: [[VAL:%.*]] = load float, float addrspace(1)* %a.coerce, align 4, !amdgpu.noclobber !2 +// OPT: [[VAL:%.*]] = load float, float addrspace(1)* %a.coerce, align 4, !amdgpu.noclobber ![[MD]] // OPT: [[INC:%.*]] = fadd contract float [[VAL]], 3.000000e+00 // OPT: store float [[INC]], float addrspace(1)* %a.coerce, align 4 // OPT: ret void -- 2.7.4