[AMDGPU][GlobalISel] Add support for global atomicrmw fadd

author Jay Foad <jay.foad@amd.com>

Mon, 1 Mar 2021 17:24:57 +0000 (17:24 +0000)

committer Jay Foad <jay.foad@amd.com>

Wed, 31 Mar 2021 10:13:00 +0000 (11:13 +0100)
author Jay Foad <jay.foad@amd.com>
Mon, 1 Mar 2021 17:24:57 +0000 (17:24 +0000)
committer Jay Foad <jay.foad@amd.com>
Wed, 31 Mar 2021 10:13:00 +0000 (11:13 +0100)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

index e2e0321..d80e6c5 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1716,7 +1716,7 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
    case Intrinsic::amdgcn_s_barrier:
      return selectSBarrier(I);
    case Intrinsic::amdgcn_global_atomic_fadd:
-    return selectGlobalAtomicFaddIntrinsic(I);
+    return selectGlobalAtomicFadd(I, I.getOperand(2), I.getOperand(3));
    default: {
      return selectImpl(I, *CoverageInfo);
    }
@@ -2319,6 +2319,13 @@ void AMDGPUInstructionSelector::initM0(MachineInstr &I) const {
  
  bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
    MachineInstr &I) const {
+  if (I.getOpcode() == TargetOpcode::G_ATOMICRMW_FADD) {
+    const LLT PtrTy = MRI->getType(I.getOperand(1).getReg());
+    unsigned AS = PtrTy.getAddressSpace();
+    if (AS == AMDGPUAS::GLOBAL_ADDRESS)
+      return selectGlobalAtomicFadd(I, I.getOperand(1), I.getOperand(2));
+  }
+
    initM0(I);
    return selectImpl(I, *CoverageInfo);
  }
@@ -2960,11 +2967,14 @@ bool AMDGPUInstructionSelector::selectAMDGPU_BUFFER_ATOMIC_FADD(
    return true;
  }
  
-bool AMDGPUInstructionSelector::selectGlobalAtomicFaddIntrinsic(
-  MachineInstr &MI) const{
+bool AMDGPUInstructionSelector::selectGlobalAtomicFadd(
+  MachineInstr &MI, MachineOperand &AddrOp, MachineOperand &DataOp) const {
  
-  if (STI.hasGFX90AInsts())
+  if (STI.hasGFX90AInsts()) {
+    // gfx90a adds return versions of the global atomic fadd instructions so no
+    // special handling is required.
      return selectImpl(MI, *CoverageInfo);
+  }
  
    MachineBasicBlock *MBB = MI.getParent();
    const DebugLoc &DL = MI.getDebugLoc();
@@ -2981,9 +2991,9 @@ bool AMDGPUInstructionSelector::selectGlobalAtomicFaddIntrinsic(
    // FIXME: This is only needed because tablegen requires number of dst operands
    // in match and replace pattern to be the same. Otherwise patterns can be
    // exported from SDag path.
-  auto Addr = selectFlatOffsetImpl<true>(MI.getOperand(2));
+  auto Addr = selectFlatOffsetImpl<true>(AddrOp);
  
-  Register Data = MI.getOperand(3).getReg();
+  Register Data = DataOp.getReg();
    const unsigned Opc = MRI->getType(Data).isVector() ?
      AMDGPU::GLOBAL_ATOMIC_PK_ADD_F16 : AMDGPU::GLOBAL_ATOMIC_ADD_F32;
    auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc))
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

index 878e3a9..1022009 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -147,7 +147,8 @@ private:
    bool selectG_INSERT_VECTOR_ELT(MachineInstr &I) const;
    bool selectG_SHUFFLE_VECTOR(MachineInstr &I) const;
    bool selectAMDGPU_BUFFER_ATOMIC_FADD(MachineInstr &I) const;
-  bool selectGlobalAtomicFaddIntrinsic(MachineInstr &I) const;
+  bool selectGlobalAtomicFadd(MachineInstr &I, MachineOperand &AddrOp,
+                              MachineOperand &DataOp) const;
    bool selectBVHIntrinsic(MachineInstr &I) const;
  
    std::pair<Register, unsigned> selectVOP3ModsImpl(MachineOperand &Root,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

index 5c8da8b..d86f277 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1291,12 +1291,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
      Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
    }
  
+  auto &Atomic = getActionDefinitionsBuilder(G_ATOMICRMW_FADD);
    if (ST.hasLDSFPAtomics()) {
-    auto &Atomic = getActionDefinitionsBuilder(G_ATOMICRMW_FADD)
-      .legalFor({{S32, LocalPtr}, {S32, RegionPtr}});
+    Atomic.legalFor({{S32, LocalPtr}, {S32, RegionPtr}});
      if (ST.hasGFX90AInsts())
        Atomic.legalFor({{S64, LocalPtr}});
    }
+  if (ST.hasAtomicFaddInsts())
+    Atomic.legalFor({{S32, GlobalPtr}});
  
    // BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling, and output
    // demarshalling
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir

new file mode 100644 (file)

index 0000000..0abf6f6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir
@@ -0,0 +1,22 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -O0 -run-pass=legalizer %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -O0 -run-pass=legalizer %s -o - | FileCheck %s
+
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel.*' -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s
+
+# ERR: remark: <unknown>:0:0: unable to legalize instruction: %2:_(s32) = G_ATOMICRMW_FADD %0:_(p1), %1:_ :: (load store seq_cst 4, addrspace 1) (in function: atomicrmw_fadd_global_i32)
+
+---
+name: atomicrmw_fadd_global_i32
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $sgpr2
+    ; CHECK-LABEL: name: atomicrmw_fadd_global_i32
+    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1)
+    %0:_(p1) = COPY $sgpr0_sgpr1
+    %1:_(s32) = COPY $sgpr2
+    %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst 4, addrspace 1)
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-local.mir

similarity index 100%

rename from llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd.mir

rename to llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-local.mir
author	Jay Foad <jay.foad@amd.com>
	Mon, 1 Mar 2021 17:24:57 +0000 (17:24 +0000)
committer	Jay Foad <jay.foad@amd.com>
	Wed, 31 Mar 2021 10:13:00 +0000 (11:13 +0100)
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-local.mir	[moved from llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd.mir with 100% similarity]	patch \| blob \| history