[amdgpu][lds] Use a consistent order of fields in generated structs

author Jon Chesterfield <jonathanchesterfield@gmail.com>

Wed, 9 Nov 2022 15:57:38 +0000 (15:57 +0000)

committer Jon Chesterfield <jonathanchesterfield@gmail.com>

Wed, 9 Nov 2022 15:57:41 +0000 (15:57 +0000)
author Jon Chesterfield <jonathanchesterfield@gmail.com>
Wed, 9 Nov 2022 15:57:38 +0000 (15:57 +0000)
committer Jon Chesterfield <jonathanchesterfield@gmail.com>
Wed, 9 Nov 2022 15:57:41 +0000 (15:57 +0000)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp

index de56ab7..be5ac67 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -390,10 +390,22 @@ private:
  
      SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
      LayoutFields.reserve(LDSVarsToTransform.size());
-    for (GlobalVariable *GV : LDSVarsToTransform) {
-      OptimizedStructLayoutField F(GV, DL.getTypeAllocSize(GV->getValueType()),
-                                   AMDGPU::getAlign(DL, GV));
-      LayoutFields.emplace_back(F);
+    {
+      // The order of fields in this struct depends on the order of
+      // varables in the argument which varies when changing how they
+      // are identified, leading to spurious test breakage.
+      std::vector<GlobalVariable *> Sorted(LDSVarsToTransform.begin(),
+                                           LDSVarsToTransform.end());
+      llvm::sort(Sorted.begin(), Sorted.end(),
+                 [](const GlobalVariable *lhs, const GlobalVariable *rhs) {
+                   return lhs->getName() < rhs->getName();
+                 });
+      for (GlobalVariable *GV : Sorted) {
+        OptimizedStructLayoutField F(GV,
+                                     DL.getTypeAllocSize(GV->getValueType()),
+                                     AMDGPU::getAlign(DL, GV));
+        LayoutFields.emplace_back(F);
+      }
      }
  
      performOptimizedStructLayout(LayoutFields);
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-constantexpr.ll

index cffaf0d..ebe9cca 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-constantexpr.ll
@@ -3,18 +3,18 @@
  
  ; CHECK: %llvm.amdgcn.module.lds.t = type { float, float }
  
-@func = addrspace(3) global float undef, align 4
+@a_func = addrspace(3) global float undef, align 4
  
  ; CHECK: %llvm.amdgcn.kernel.timestwo.lds.t = type { float }
  
  @kern = addrspace(3) global float undef, align 4
  
-; @func is only used from a non-kernel function so is rewritten
-; CHECK-NOT: @func
-; @both is used from a non-kernel function so is rewritten
-; CHECK-NOT: @both
-; sorted both < func, so @both at null and @func at 4
-@both = addrspace(3) global float undef, align 4
+; @a_func is only used from a non-kernel function so is rewritten
+; CHECK-NOT: @a_func
+; @b_both is used from a non-kernel function so is rewritten
+; CHECK-NOT: @b_both
+; sorted both < func, so @b_both at null and @a_func at 4
+@b_both = addrspace(3) global float undef, align 4
  
  ; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 4
  ; CHECK: @llvm.amdgcn.kernel.timestwo.lds = internal addrspace(3) global %llvm.amdgcn.kernel.timestwo.lds.t undef, align 4
@@ -32,7 +32,7 @@
  ; CHECK:       ret i32 %8
  define i32 @get_func() local_unnamed_addr #0 {
  entry:
-  %0 = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @func to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @func to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4
+  %0 = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @a_func to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @a_func to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4
    ret i32 %0
  }
  
@@ -49,7 +49,7 @@ entry:
  ; CHECK:      ret void
  define void @set_func(i32 %x) local_unnamed_addr #1 {
  entry:
-  store i32 %x, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @both to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @both to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4
+  store i32 %x, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @b_both to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @b_both to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4
    ret void
  }
  
@@ -78,9 +78,9 @@ entry:
  ; CHECK:      store i32 %mul, i32* %16, align 4
  ; CHECK:      ret void
  define amdgpu_kernel void @timestwo() {
-  %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @both to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @kern to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4
+  %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @b_both to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @kern to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4
    %mul = mul i32 %ld, 2
-  store i32 %mul, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @kern to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @both to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4
+  store i32 %mul, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @kern to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @b_both to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4
    ret void
  }
author	Jon Chesterfield <jonathanchesterfield@gmail.com>
	Wed, 9 Nov 2022 15:57:38 +0000 (15:57 +0000)
committer	Jon Chesterfield <jonathanchesterfield@gmail.com>
	Wed, 9 Nov 2022 15:57:41 +0000 (15:57 +0000)
llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/lower-module-lds-constantexpr.ll		patch \| blob \| history