[amdgpu][lds] Remove recalculation of LDS frame from backend

author Jon Chesterfield <jonathanchesterfield@gmail.com>

Thu, 13 Jul 2023 22:54:37 +0000 (23:54 +0100)

committer Jon Chesterfield <jonathanchesterfield@gmail.com>

Thu, 13 Jul 2023 22:54:38 +0000 (23:54 +0100)
author Jon Chesterfield <jonathanchesterfield@gmail.com>
Thu, 13 Jul 2023 22:54:37 +0000 (23:54 +0100)
committer Jon Chesterfield <jonathanchesterfield@gmail.com>
Thu, 13 Jul 2023 22:54:38 +0000 (23:54 +0100)
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst

index dbe6e69a3b3975a39b5d36e75958a4e1e3356bc1..dfe64fb471fdaec1c0eaac6ae58291bcbc18c842 100644 (file)
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1090,6 +1090,12 @@ The AMDGPU backend supports the following LLVM IR attributes.
                                               kernel argument that holds the completion action pointer. If this
                                               attribute is absent, then the amdgpu-no-implicitarg-ptr is also removed.
  
+     "amdgpu-lds-size"                       The number of bytes that will be allocated in the Local Data Store at
+                                             address zero. Variables are allocated within this frame using absolute
+                                             symbol metadata, primarily by the AMDGPULowerModuleLDS pass. Internal
+                                             detail of how LDS variables are lowered, language front ends should not
+                                             set this.
+
       ======================================= ==========================================================
  
  Calling Conventions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

index 6d79f27c263f02f29db073bdade682146b90c36f..9ba5ea8fb73f04c1725ca1ff3f7634db5b92b2bf 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -512,8 +512,6 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
    const SITargetLowering &TLI = *getTLI<SITargetLowering>();
    const DataLayout &DL = F.getParent()->getDataLayout();
  
-  Info->allocateKnownAddressLDSGlobal(F);
-
    SmallVector<CCValAssign, 16> ArgLocs;
    CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
  
@@ -596,8 +594,6 @@ bool AMDGPUCallLowering::lowerFormalArguments(
    const SIRegisterInfo *TRI = Subtarget.getRegisterInfo();
    const DataLayout &DL = F.getParent()->getDataLayout();
  
-  Info->allocateKnownAddressLDSGlobal(F);
-
    SmallVector<CCValAssign, 16> ArgLocs;
    CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
  
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp

index 0c9a44bce502df524fae4eb8b65d51547973099b..0a1075c36acb0edd55405e9d245719de3fff06de 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -1106,6 +1106,8 @@ public:
      return KernelToCreatedDynamicLDS;
    }
  
+  // This attribute is no longer used by the backend. TODO: Delete it in favour
+  // of pass-local state and update the tests to remove the string.
    static bool canElideModuleLDS(const Function &F) {
      return F.hasFnAttribute("amdgpu-elide-module-lds");
    }
@@ -1211,7 +1213,6 @@ public:
  
      // All kernel frames have been allocated. Calculate and record the
      // addresses.
-
      {
        const DataLayout &DL = M.getDataLayout();
  
@@ -1220,8 +1221,8 @@ public:
            continue;
  
          // All three of these are optional. The first variable is allocated at
-        // zero. They are allocated by allocateKnownAddressLDSGlobal in the
-        // following order:
+        // zero. They are allocated by AMDGPUMachineFunction as one block.
+        // Layout:
          //{
          //  module.lds
          //  alignment padding
@@ -1250,22 +1251,23 @@ public:
  
          if (AllocateKernelScopeStruct) {
            GlobalVariable *KernelStruct = Replacement->second.SGV;
-
            Offset = alignTo(Offset, AMDGPU::getAlign(DL, KernelStruct));
-
            recordLDSAbsoluteAddress(&M, KernelStruct, Offset);
-
            Offset += DL.getTypeAllocSize(KernelStruct->getValueType());
-
          }
  
+        // If there is dynamic allocation, the alignment needed is included in
+        // the static frame size. There may be no reference to the dynamic
+        // variable in the kernel itself, so without including it here, that
+        // alignment padding could be missed.
          if (AllocateDynamicVariable) {
            GlobalVariable *DynamicVariable = KernelToCreatedDynamicLDS[&Func];
-
            Offset = alignTo(Offset, AMDGPU::getAlign(DL, DynamicVariable));
-
            recordLDSAbsoluteAddress(&M, DynamicVariable, Offset);
          }
+
+        if (Offset != 0)
+          Func.addFnAttr("amdgpu-lds-size", std::to_string(Offset));
        }
      }
  
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp

index 5a12782de1a577fc085064941d239042eda88ee7..8f3bb62d6541f4433034e8abeda8ce9194d4781e 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -43,6 +43,12 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
    // Assume the attribute allocates before any known GDS globals.
    StaticGDSSize = GDSSize;
  
+  // The two separate variables are only profitable when the LDS module lowering
+  // pass is disabled. If graphics does not use dynamic LDS, this is never
+  // profitable. Leaving cleanup for a later change.
+  LDSSize = F.getFnAttributeAsParsedInteger("amdgpu-lds-size", 0);
+  StaticLDSSize = LDSSize;
+
    CallingConv::ID CC = F.getCallingConv();
    if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
      ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
@@ -65,6 +71,42 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
  
    unsigned Offset;
    if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+
+    std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV);
+    if (MaybeAbs) {
+      // Absolute address LDS variables that exist prior to the LDS lowering
+      // pass raise a fatal error in that pass. These failure modes are only
+      // reachable if that lowering pass is disabled or broken. If/when adding
+      // support for absolute addresses on user specified variables, the
+      // alignment check moves to the lowering pass and the frame calculation
+      // needs to take the user variables into consideration.
+
+      uint32_t ObjectStart = *MaybeAbs;
+
+      if (ObjectStart != alignTo(ObjectStart, Alignment)) {
+        report_fatal_error("Absolute address LDS variable inconsistent with "
+                           "variable alignment");
+      }
+
+      if (isModuleEntryFunction()) {
+        // If this is a module entry function, we can also sanity check against
+        // the static frame. Strictly it would be better to check against the
+        // attribute, i.e. that the variable is within the always-allocated
+        // section, and not within some other non-absolute-address object
+        // allocated here, but the extra error detection is minimal and we would
+        // have to pass the Function around or cache the attribute value.
+        uint32_t ObjectEnd =
+            ObjectStart + DL.getTypeAllocSize(GV.getValueType());
+        if (ObjectEnd > StaticLDSSize) {
+          report_fatal_error(
+              "Absolute address LDS variable outside of static frame");
+        }
+      }
+
+      Entry.first->second = ObjectStart;
+      return ObjectStart;
+    }
+
      /// TODO: We should sort these to minimize wasted space due to alignment
      /// padding. Currently the padding is decided by the first encountered use
      /// during lowering.
@@ -89,16 +131,6 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
    return Offset;
  }
  
-static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds";
-
-static const GlobalVariable *getKernelLDSGlobalFromFunction(const Function &F) {
-  const Module *M = F.getParent();
-  std::string KernelLDSName = "llvm.amdgcn.kernel.";
-  KernelLDSName += F.getName();
-  KernelLDSName += ".lds";
-  return M->getNamedGlobal(KernelLDSName);
-}
-
  static const GlobalVariable *
  getKernelDynLDSGlobalFromFunction(const Function &F) {
    const Module *M = F.getParent();
@@ -108,73 +140,6 @@ getKernelDynLDSGlobalFromFunction(const Function &F) {
    return M->getNamedGlobal(KernelDynLDSName);
  }
  
-// This kernel calls no functions that require the module lds struct
-static bool canElideModuleLDS(const Function &F) {
-  return F.hasFnAttribute("amdgpu-elide-module-lds");
-}
-
-void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) {
-  const Module *M = F.getParent();
-  // This function is called before allocating any other LDS so that it can
-  // reliably put values at known addresses. Consequently, dynamic LDS, if
-  // present, will not yet have been allocated
-
-  assert(getDynLDSAlign() == Align() && "dynamic LDS not yet allocated");
-
-  if (isModuleEntryFunction()) {
-
-    // Pointer values start from zero, memory allocated per-kernel-launch
-    // Variables can be grouped into a module level struct and a struct per
-    // kernel function by AMDGPULowerModuleLDSPass. If that is done, they
-    // are allocated at statically computable addresses here.
-    //
-    // Address 0
-    // {
-    //   llvm.amdgcn.module.lds
-    // }
-    // alignment padding
-    // {
-    //   llvm.amdgcn.kernel.some-name.lds
-    // }
-    // other variables, e.g. dynamic lds, allocated after this call
-
-    const GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName);
-    const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F);
-    const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F);
-
-    if (GV && !canElideModuleLDS(F)) {
-      unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align());
-      std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*GV);
-      if (!Expect || (Offset != *Expect)) {
-        report_fatal_error("Inconsistent metadata on module LDS variable");
-      }
-    }
-
-    if (KV) {
-      // The per-kernel offset is deterministic because it is allocated
-      // before any other non-module LDS variables.
-      unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align());
-      std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*KV);
-      if (!Expect || (Offset != *Expect)) {
-        report_fatal_error("Inconsistent metadata on kernel LDS variable");
-      }
-    }
-
-    if (Dyn) {
-      // The dynamic LDS is deterministic because the per-kernel one has the
-      // maximum alignment of any reachable and all remaining LDS variables,
-      // if this is present, are themselves dynamic LDS and will be allocated
-      // at the same address.
-      setDynLDSAlign(F, *Dyn);
-      unsigned Offset = LDSSize;
-      std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn);
-      if (!Expect || (Offset != *Expect)) {
-        report_fatal_error("Inconsistent metadata on dynamic LDS variable");
-      }
-    }
-  }
-}
-
  std::optional<uint32_t>
  AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
    // TODO: Would be more consistent with the abs symbols to use a range
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h

index cdb937fe7b24a72de44809229dd58918d5cda9e6..5780fa64a7e4359c111f1f54927cffde8607b93a 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -104,8 +104,6 @@ public:
    unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV,
                               Align Trailing);
  
-  void allocateKnownAddressLDSGlobal(const Function &F);
-
    static std::optional<uint32_t> getLDSKernelIdMetadata(const Function &F);
    static std::optional<uint32_t> getLDSAbsoluteAddress(const GlobalValue &GV);
  
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp

index 29ec22f55f8036b8da2ef57f5f12944d29038a26..b429809f2e269c0c8f12a47a35d5673365d13e94 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2460,8 +2460,6 @@ SDValue SITargetLowering::LowerFormalArguments(
      return DAG.getEntryNode();
    }
  
-  Info->allocateKnownAddressLDSGlobal(Fn);
-
    SmallVector<ISD::InputArg, 16> Splits;
    SmallVector<CCValAssign, 16> ArgLocs;
    BitVector Skipped(Ins.size());
diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll

index 0c47b439706bae58df7c6841c20beeeaf41d667a..af8142c3efbe878da78e58b20468cc0ef51a9dc6 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll
@@ -22,7 +22,7 @@
  ; CHECK: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t undef, align 4, !absolute_symbol !0
  ;.
  define amdgpu_kernel void @k0() #0 {
-; CHECK-LABEL: @k0(
+; CHECK-LABEL: @k0() #0
  ; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !1, !noalias !4
  ; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !8, !noalias !9
  ; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !10, !noalias !11
@@ -40,7 +40,7 @@ define amdgpu_kernel void @k0() #0 {
  }
  
  define amdgpu_kernel void @k1() #0 {
-; CHECK-LABEL: @k1(
+; CHECK-LABEL: @k1() #1
  ; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !14, !noalias !17
  ; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !20, !noalias !21
  ; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !22, !noalias !23
@@ -56,7 +56,7 @@ define amdgpu_kernel void @k1() #0 {
  }
  
  define amdgpu_kernel void @k2() #0 {
-; CHECK-LABEL: @k2(
+; CHECK-LABEL: @k2() #2
  ; CHECK-NEXT:    store i8 2, ptr addrspace(3) @llvm.amdgcn.kernel.k2.lds, align 2
  ; CHECK-NEXT:    ret void
  ;
@@ -66,7 +66,7 @@ define amdgpu_kernel void @k2() #0 {
  }
  
  define amdgpu_kernel void @k3() #0 {
-; CHECK-LABEL: @k3(
+; CHECK-LABEL: @k3() #3
  ; CHECK-NEXT:    store i8 4, ptr addrspace(3) @llvm.amdgcn.kernel.k3.lds, align 4
  ; CHECK-NEXT:    ret void
  ;
@@ -75,14 +75,14 @@ define amdgpu_kernel void @k3() #0 {
    ret void
  }
  
-
+; CHECK-LABEL: @calls_f0() #4
  define amdgpu_kernel void @calls_f0() {
    call void @f0()
    ret void
  }
  
  define void @f0() {
-; CHECK-LABEL: define void @f0(
+; CHECK-LABEL: define void @f0()
  ; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 8, !noalias !24
  ; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !24
  ; CHECK-NEXT: ret void
@@ -93,7 +93,10 @@ define void @f0() {
    ret void
  }
  
-attributes #0 = { "amdgpu-elide-module-lds" }
-; CHECK: attributes #0 = { "amdgpu-elide-module-lds" }
+; CHECK: attributes #0 = { "amdgpu-elide-module-lds" "amdgpu-lds-size"="23" }
+; CHECK: attributes #1 = { "amdgpu-elide-module-lds" "amdgpu-lds-size"="22" }
+; CHECK: attributes #2 = { "amdgpu-elide-module-lds" "amdgpu-lds-size"="2" }
+; CHECK: attributes #3 = { "amdgpu-elide-module-lds" "amdgpu-lds-size"="4" }
+; CHECK: attributes #4 = { "amdgpu-lds-size"="9" }
  
  ; CHECK: !0 = !{i64 0, i64 1}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll

index 6ed7345e450e0e9e2321d4d0d8fe3d1f1f338ff7..4fcad258d4a74b767a69d8113f4ac396aac9b8e7 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll
@@ -9,7 +9,7 @@
  @B = external addrspace(3) global [0 x i32]
  
  define amdgpu_kernel void @kernel_0() {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_0() !llvm.amdgcn.lds.kernel.id !1 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_0() #0 !llvm.amdgcn.lds.kernel.id !1 {
  ; CHECK-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_0.lds) ]
  ; CHECK-NEXT:    call void @call_store_A()
  ; CHECK-NEXT:    ret void
@@ -29,7 +29,7 @@ define amdgpu_kernel void @kernel_1() {
  }
  
  define amdgpu_kernel void @kernel_2() {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_2() !llvm.amdgcn.lds.kernel.id !3 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_2() #0 !llvm.amdgcn.lds.kernel.id !3 {
  ; CHECK-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_2.lds) ]
  ; CHECK-NEXT:    call void @store_A()
  ; CHECK-NEXT:    ret void
@@ -82,3 +82,5 @@ define private ptr @get_B_ptr() {
  ;
    ret ptr addrspacecast (ptr addrspace(3) @B to ptr)
  }
+
+; CHECK: attributes #0 = { "amdgpu-lds-size"="64" }
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-constantexpr.ll

index 50155d1d240270ec8d1cf974599a7450c1968d35..9ef2957891620f48be723bfd377fe2d005c9d90b 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-constantexpr.ll
@@ -48,7 +48,7 @@ entry:
    ret void
  }
  
-; CHECK-LABEL: @timestwo() #0
+; CHECK-LABEL: @timestwo() #1
  ; CHECK-NOT: call void @llvm.donothing()
  
  ; CHECK:      %1 = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.timestwo.lds to ptr
@@ -67,14 +67,14 @@ entry:
  ; CHECK:      %12 = inttoptr i64 %11 to ptr
  ; CHECK:      store i32 %mul, ptr %12, align 4
  ; CHECK:      ret void
-define amdgpu_kernel void @timestwo() {
+define amdgpu_kernel void @timestwo() #1 {
    %ld = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @b_both to ptr) to i64), i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @kern to ptr) to i64)) to ptr), align 4
    %mul = mul i32 %ld, 2
    store i32 %mul, ptr inttoptr (i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @kern to ptr) to i64), i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @b_both to ptr) to i64)) to ptr), align 4
    ret void
  }
  
-; CHECK-LABEL: @through_functions()
+; CHECK-LABEL: @through_functions() #2
  define amdgpu_kernel void @through_functions() {
    %ld = call i32 @get_func()
    %mul = mul i32 %ld, 4
@@ -84,3 +84,5 @@ define amdgpu_kernel void @through_functions() {
  
  attributes #0 = { "amdgpu-elide-module-lds" }
  ; CHECK: attributes #0 = { "amdgpu-elide-module-lds" }
+; CHECK: attributes #1 = { "amdgpu-elide-module-lds" "amdgpu-lds-size"="8" }
+; CHECK: attributes #2 = { "amdgpu-lds-size"="8" }
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll

index d4f50e57970b90c75df9abe5721d706c2510c3a2..2789c672c592d3ef0556cf0b9eeea71f60542261 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll
@@ -131,7 +131,7 @@ define amdgpu_kernel void @expect_align2() {
  }
  
  define amdgpu_kernel void @expect_align4() {
-; CHECK-LABEL: @expect_align4() !llvm.amdgcn.lds.kernel.id !4 {
+; CHECK-LABEL: @expect_align4() #2 !llvm.amdgcn.lds.kernel.id !4 {
  ; CHECK-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_align4.dynlds) ]
  ; CHECK-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
  ; CHECK-NEXT:    call void @use_shared4()
@@ -158,7 +158,7 @@ define amdgpu_kernel void @expect_align8() {
  
  ; Note: use_shared4 uses module.lds so this will allocate at offset 4
  define amdgpu_kernel void @expect_max_of_2_and_4() {
-; CHECK-LABEL: @expect_max_of_2_and_4() !llvm.amdgcn.lds.kernel.id !6 {
+; CHECK-LABEL: @expect_max_of_2_and_4() #2 !llvm.amdgcn.lds.kernel.id !6 {
  ; CHECK-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_max_of_2_and_4.dynlds) ]
  ; CHECK-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
  ; CHECK-NEXT:    call void @use_shared2()
@@ -174,15 +174,16 @@ define amdgpu_kernel void @expect_max_of_2_and_4() {
  attributes #0 = { noinline }
  
  ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
-; CHECK: declare void @llvm.donothing() #2
+; CHECK: declare void @llvm.donothing() #3
  
  ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
-; CHECK: declare i32 @llvm.amdgcn.lds.kernel.id() #3
+; CHECK: declare i32 @llvm.amdgcn.lds.kernel.id() #4
  
  ; CHECK: attributes #0 = { "amdgpu-elide-module-lds" }
  ; CHECK: attributes #1 = { noinline }
-; CHECK: attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #2 = { "amdgpu-lds-size"="4" }
+; CHECK: attributes #3 = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #4 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
  
  ; CHECK: !0 = !{i64 0, i64 1}
  ; CHECK: !1 = !{i64 4, i64 5}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll

index 0844b3801fd5e655f6b1e9d3101255f3f452b18e..11dfa16d41b5d8dc6ceafb24fe0b3521f647f146 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll
@@ -284,9 +284,12 @@ define amdgpu_kernel void @k123() {
  !2 = !{i32 1}
  
  
-; OPT: attributes #0 = { "amdgpu-elide-module-lds" }
-; OPT: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
-; OPT: attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; OPT: attributes #0 = { "amdgpu-elide-module-lds" "amdgpu-lds-size"="8" }
+; OPT: attributes #1 = { "amdgpu-lds-size"="8" }
+; OPT: attributes #2 = { "amdgpu-elide-module-lds" "amdgpu-lds-size"="12" }
+; OPT: attributes #3 = { "amdgpu-lds-size"="20" }
+; OPT: attributes #4 = { nocallback nofree nosync nounwind willreturn memory(none) }
+; OPT: attributes #5 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
  
  ; OPT: !0 = !{i64 0, i64 1}
  ; OPT: !1 = !{i64 4, i64 5}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll

index 930b588bcde0935815fe05a8fffc6e502ec20632..306b04dcf30dd5d585cf8b6c2b2e07b4864cfe3e 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll
@@ -195,7 +195,7 @@ define void @f3() {
  
  ; Doesn't access any via a function, won't be in the lookup table
  define amdgpu_kernel void @kernel_no_table() {
-; OPT-LABEL: @kernel_no_table() {
+; OPT-LABEL: @kernel_no_table() #0 {
  ; OPT-NEXT:    [[LD:%.*]] = load i64, ptr addrspace(3) @llvm.amdgcn.kernel.kernel_no_table.lds, align 8
  ; OPT-NEXT:    [[MUL:%.*]] = mul i64 [[LD]], 8
  ; OPT-NEXT:    store i64 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.kernel_no_table.lds, align 8
@@ -218,7 +218,7 @@ define amdgpu_kernel void @kernel_no_table() {
  
  ; Access two variables, will allocate those two
  define amdgpu_kernel void @k01() {
-; OPT-LABEL: @k01() !llvm.amdgcn.lds.kernel.id !1 {
+; OPT-LABEL: @k01() #0 !llvm.amdgcn.lds.kernel.id !1 {
  ; OPT-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds) ]
  ; OPT-NEXT:    call void @f0()
  ; OPT-NEXT:    call void @f1()
@@ -256,7 +256,7 @@ define amdgpu_kernel void @k01() {
  }
  
  define amdgpu_kernel void @k23() {
-; OPT-LABEL: @k23() !llvm.amdgcn.lds.kernel.id !7 {
+; OPT-LABEL: @k23() #1 !llvm.amdgcn.lds.kernel.id !7 {
  ; OPT-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds) ]
  ; OPT-NEXT:    call void @f2()
  ; OPT-NEXT:    call void @f3()
@@ -295,7 +295,7 @@ define amdgpu_kernel void @k23() {
  
  ; Access and allocate three variables
  define amdgpu_kernel void @k123() {
-; OPT-LABEL: @k123() !llvm.amdgcn.lds.kernel.id !13 {
+; OPT-LABEL: @k123() #2 !llvm.amdgcn.lds.kernel.id !13 {
  ; OPT-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ]
  ; OPT-NEXT:    call void @f1()
  ; OPT-NEXT:    [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !20, !noalias !21
@@ -346,6 +346,10 @@ define amdgpu_kernel void @k123() {
  
  ; OPT: declare i32 @llvm.amdgcn.lds.kernel.id()
  
+; OPT: attributes #0 = { "amdgpu-lds-size"="8" }
+; OPT: attributes #1 = { "amdgpu-lds-size"="12" }
+; OPT: attributes #2 = { "amdgpu-lds-size"="16" }
+
  !0 = !{i64 0, i64 1}
  !1 = !{i32 0}
  !2 = !{i32 2}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds.ll

index 47688b71bdc5cc9d2fd02c024658299a0833d179..87ba74312735101abc0e00e9dd46a0c760e155d8 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds.ll
@@ -40,7 +40,7 @@ define void @func() {
  }
  
  ; This kernel calls a function that uses LDS so needs the block
-; CHECK-LABEL: @kern_call()
+; CHECK-LABEL: @kern_call() #0
  ; CHECK: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
  ; CHECK: call void @func()
  ; CHECK: %dec = atomicrmw fsub ptr addrspace(3) @llvm.amdgcn.module.lds, float 2.000000e+00 monotonic, align 8
@@ -51,7 +51,7 @@ define amdgpu_kernel void @kern_call() {
  }
  
  ; This kernel does alloc the LDS block as it makes no calls
-; CHECK-LABEL: @kern_empty()
+; CHECK-LABEL: @kern_empty() #1
  ; CHECK-NOT: call void @llvm.donothing()
  define spir_kernel void @kern_empty() #0{
    ret void
@@ -62,4 +62,6 @@ define spir_kernel void @kern_empty() #0{
  declare amdgpu_kernel void @kernel_declaration()
  
  attributes #0 = { "amdgpu-elide-module-lds" }
-; CHECK: attributes #0 = { "amdgpu-elide-module-lds" }
+
+; CHECK: attributes #0 = { "amdgpu-lds-size"="12" }
+; CHECK: attributes #1 = { "amdgpu-elide-module-lds" }
author	Jon Chesterfield <jonathanchesterfield@gmail.com>
	Thu, 13 Jul 2023 22:54:37 +0000 (23:54 +0100)
committer	Jon Chesterfield <jonathanchesterfield@gmail.com>
	Thu, 13 Jul 2023 22:54:38 +0000 (23:54 +0100)
llvm/docs/AMDGPUUsage.rst		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/lower-module-lds-constantexpr.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/lower-module-lds.ll		patch \| blob \| history