[AMDGPUHSAMetadataStreamer] Do not assume ABI alignment for pointers
authorNikita Popov <npopov@redhat.com>
Wed, 26 Jan 2022 09:53:21 +0000 (10:53 +0100)
committerNikita Popov <npopov@redhat.com>
Wed, 26 Jan 2022 14:45:14 +0000 (15:45 +0100)
AMDGPUHSAMetadataStreamer currently assumes that pointer arguments
without align attribute have ABI alignment of the pointee type.
This is incompatible with opaque pointers, but also plain incorrect:
Pointer arguments without explicit alignment have alignment 1. It is
the responsibility of the frontent to add correct align annotations.

Differential Revision: https://reviews.llvm.org/D118229

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll

index f4d4d34..3ac7c45 100644 (file)
@@ -331,8 +331,7 @@ void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
   if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
       // FIXME: Should report this for all address spaces
-      PointeeAlign = DL.getValueOrABITypeAlignment(
-          Arg.getParamAlign(), PtrTy->getPointerElementType());
+      PointeeAlign = Arg.getParamAlign().valueOrOne();
     }
   }
 
@@ -731,10 +730,8 @@ void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
 
   // FIXME: Need to distinguish in memory alignment from pointer alignment.
   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
-    if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
-      PointeeAlign = DL.getValueOrABITypeAlignment(
-          Arg.getParamAlign(), PtrTy->getPointerElementType());
-    }
+    if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
+      PointeeAlign = Arg.getParamAlign().valueOrOne();
   }
 
   // There's no distinction between byval aggregates and raw aggregates.
index 45c8282..e219652 100644 (file)
@@ -881,7 +881,7 @@ define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) #0
 ; CHECK:          .symbol:         test_addr_space.kd
 define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g,
                                            i32 addrspace(4)* %c,
-                                           i32 addrspace(3)* %l) #0
+                                           i32 addrspace(3)* align 4 %l) #0
     !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51
     !kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
   ret void
@@ -1679,11 +1679,11 @@ define amdgpu_kernel void @test_arg_unknown_builtin_type(
 ; CHECK:          .symbol:         test_pointee_align.kd
 define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a,
                                               i8 addrspace(3)* %b,
-                                              <2 x i8> addrspace(3)* %c,
-                                              <3 x i8> addrspace(3)* %d,
-                                              <4 x i8> addrspace(3)* %e,
-                                              <8 x i8> addrspace(3)* %f,
-                                              <16 x i8> addrspace(3)* %g,
+                                              <2 x i8> addrspace(3)* align 2 %c,
+                                              <3 x i8> addrspace(3)* align 4 %d,
+                                              <4 x i8> addrspace(3)* align 4 %e,
+                                              <8 x i8> addrspace(3)* align 8 %f,
+                                              <16 x i8> addrspace(3)* align 16 %g,
                                               {} addrspace(3)* %h) #0
     !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93
     !kernel_arg_base_type !93 !kernel_arg_type_qual !94 {
index f19fbc1..f44681a 100644 (file)
@@ -873,7 +873,7 @@ define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) #0
 ; CHECK-NEXT:       AddrSpaceQual: Global
 define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g,
                                            i32 addrspace(4)* %c,
-                                           i32 addrspace(3)* %l) #0
+                                           i32 addrspace(3)* align 4 %l) #0
     !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51
     !kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
   ret void
@@ -1653,11 +1653,11 @@ define amdgpu_kernel void @test_arg_unknown_builtin_type(
 ; CHECK-NEXT:       AddrSpaceQual: Global
 define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a,
                                               i8 addrspace(3)* %b,
-                                              <2 x i8> addrspace(3)* %c,
-                                              <3 x i8> addrspace(3)* %d,
-                                              <4 x i8> addrspace(3)* %e,
-                                              <8 x i8> addrspace(3)* %f,
-                                              <16 x i8> addrspace(3)* %g,
+                                              <2 x i8> addrspace(3)* align 2 %c,
+                                              <3 x i8> addrspace(3)* align 4 %d,
+                                              <4 x i8> addrspace(3)* align 4 %e,
+                                              <8 x i8> addrspace(3)* align 8 %f,
+                                              <16 x i8> addrspace(3)* align 16 %g,
                                               {} addrspace(3)* %h) #0
     !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93
     !kernel_arg_base_type !93 !kernel_arg_type_qual !94 {