[AMDGPU][GFX9][GFX10] Support base+soffset+offset SMEM atomics.
authorIvan Kosarev <ivan.kosarev@amd.com>
Fri, 10 Jun 2022 09:06:45 +0000 (10:06 +0100)
committerIvan Kosarev <ivan.kosarev@amd.com>
Fri, 10 Jun 2022 12:22:41 +0000 (13:22 +0100)
Resolves a part of
https://github.com/llvm/llvm-project/issues/38652

Reviewed By: dp

Differential Revision: https://reviews.llvm.org/D127314

llvm/lib/Target/AMDGPU/SMInstructions.td
llvm/test/MC/AMDGPU/gfx10_asm_smem.s
llvm/test/MC/AMDGPU/gfx9_asm_smem.s
llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt

index 530835bc22c8bcaee70b494e34c8eb50b8a39df1..882d13402a192ffddda7fc7f8564916556be835f 100644 (file)
@@ -87,6 +87,21 @@ class SM_Real <SM_Pseudo ps, string opName = ps.Mnemonic>
   bits<5> cpol;
 }
 
+class OffsetMode<bit hasOffset, bit hasSOffset, string variant,
+                 dag ins, string asm> {
+  bit HasOffset = hasOffset;
+  bit HasSOffset = hasSOffset;
+  string Variant = variant;
+  dag Ins = ins;
+  string Asm = asm;
+}
+
+def IMM_Offset : OffsetMode<1, 0, "_IMM", (ins smem_offset:$offset), "$offset">;
+def SGPR_Offset : OffsetMode<0, 1, "_SGPR", (ins SReg_32:$soffset), "$soffset">;
+def SGPR_IMM_Offset : OffsetMode<1, 1, "_SGPR_IMM",
+                                 (ins SReg_32:$soffset, smem_offset_mod:$offset),
+                                 "$soffset$offset">;
+
 class SM_Probe_Pseudo <string opName, string variant, RegisterClass baseClass,
                        dag offsets, string asmOffsets,
                        bit hasOffset, bit hasSOffset>
@@ -282,23 +297,21 @@ class SM_Atomic_Pseudo <string opName,
 class SM_Pseudo_Atomic<string opName,
                        RegisterClass baseClass,
                        RegisterClass dataClass,
-                       bit isImm,
+                       OffsetMode offsets,
                        bit isRet,
-                       string opNameWithSuffix = opName # !if(isImm,
-                                 !if(isRet, "_IMM_RTN", "_IMM"),
-                                 !if(isRet, "_SGPR_RTN", "_SGPR")),
+                       string opNameWithSuffix =
+                         opName # offsets.Variant # !if(isRet, "_RTN", ""),
                        Operand CPolTy = !if(isRet, CPol_GLC1, CPol)> :
   SM_Atomic_Pseudo<opName,
                    !if(isRet, (outs dataClass:$sdst), (outs)),
-                   !if(isImm,
-                       (ins dataClass:$sdata, baseClass:$sbase, smem_offset:$offset, CPolTy:$cpol),
-                       (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$soffset, CPolTy:$cpol)),
-                   !if(isRet, " $sdst", " $sdata") # ", $sbase, " #
-                     !if(isImm, "$offset", "$soffset") # "$cpol",
+                   !con((ins dataClass:$sdata, baseClass:$sbase), offsets.Ins,
+                        (ins CPolTy:$cpol)),
+                   !if(isRet, " $sdst", " $sdata") #
+                     ", $sbase, " # offsets.Asm # "$cpol",
                    isRet>,
   AtomicNoRet <opNameWithSuffix, isRet> {
-  let has_offset = isImm;
-  let has_soffset = !not(isImm);
+  let has_offset = offsets.HasOffset;
+  let has_soffset = offsets.HasSOffset;
   let PseudoInstr = opNameWithSuffix;
 
   let Constraints = !if(isRet, "$sdst = $sdata", "");
@@ -308,10 +321,12 @@ class SM_Pseudo_Atomic<string opName,
 multiclass SM_Pseudo_Atomics<string opName,
                              RegisterClass baseClass,
                              RegisterClass dataClass> {
-  def _IMM      : SM_Pseudo_Atomic <opName, baseClass, dataClass, 1, 0>;
-  def _SGPR     : SM_Pseudo_Atomic <opName, baseClass, dataClass, 0, 0>;
-  def _IMM_RTN  : SM_Pseudo_Atomic <opName, baseClass, dataClass, 1, 1>;
-  def _SGPR_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, 0, 1>;
+  def _IMM      : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 0>;
+  def _SGPR     : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 0>;
+  def _SGPR_IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 0>;
+  def _IMM_RTN  : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 1>;
+  def _SGPR_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 1>;
+  def _SGPR_IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 1>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -705,8 +720,20 @@ class SMEM_Atomic_Real_vi <bits<8> op, SM_Atomic_Pseudo ps>
 multiclass SM_Real_Atomics_vi<bits<8> op, string ps> {
   def _IMM_vi       : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
   def _SGPR_vi      : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
+  def _SGPR_alt_gfx9
+    : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>,
+      SMEM_Real_SGPR_alt_gfx9;
+  let IsGFX9SpecificEncoding = true in
+  def _SGPR_IMM_gfx9
+    : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>;
   def _IMM_RTN_vi   : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
   def _SGPR_RTN_vi  : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
+  def _SGPR_RTN_alt_gfx9
+    : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>,
+      SMEM_Real_SGPR_alt_gfx9;
+  let IsGFX9SpecificEncoding = true in
+  def _SGPR_IMM_RTN_gfx9
+    : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>;
 }
 
 defm S_BUFFER_ATOMIC_SWAP         : SM_Real_Atomics_vi <0x40, "S_BUFFER_ATOMIC_SWAP">;
@@ -1103,8 +1130,10 @@ class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps>
 multiclass SM_Real_Atomics_gfx10<bits<8> op, string ps> {
   def _IMM_gfx10       : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
   def _SGPR_gfx10      : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
+  def _SGPR_IMM_gfx10  : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>;
   def _IMM_RTN_gfx10   : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
   def _SGPR_RTN_gfx10  : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
+  def _SGPR_IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>;
 }
 
 let SubtargetPredicate = HasScalarAtomics in {
index ef20f198a66db0daf22f36a50abc32082ea5fdf2..ef4b460f67ab6304bc0269c8c6e50c4cc179fd90 100644 (file)
@@ -983,6 +983,9 @@ s_atomic_add s5, s[2:3], s101
 s_atomic_add s5, s[2:3], 0x64
 // GFX10: encoding: [0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0xfa]
 
+s_atomic_add s5, s[2:3], s7 offset:0x64
+// GFX10: encoding: [0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0x0e]
+
 s_atomic_add_x2 s[10:11], s[2:3], s101
 // GFX10: encoding: [0x81,0x02,0x88,0xf6,0x00,0x00,0x00,0xca]
 
@@ -1109,6 +1112,9 @@ s_atomic_add s5, s[2:3], s101 glc
 s_atomic_add s5, s[2:3], 0x64 glc
 // GFX10: encoding: [0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0xfa]
 
+s_atomic_add s5, s[2:3], s7 offset:0x64 glc
+// GFX10: encoding: [0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0x0e]
+
 s_atomic_add_x2 s[10:11], s[2:3], s101 glc
 // GFX10: encoding: [0x81,0x02,0x89,0xf6,0x00,0x00,0x00,0xca]
 
index 371fe084fdcba0fa751f04217d9d87bd92620ad8..1fe4ec3f4cc6f878909a701dd58c91921c8129a5 100644 (file)
@@ -3123,9 +3123,15 @@ s_atomic_add s5, s[2:3], m0
 s_atomic_add s5, s[2:3], 0x0
 // CHECK: [0x41,0x01,0x0a,0xc2,0x00,0x00,0x00,0x00]
 
+s_atomic_add s5, s[2:3], s7 offset:0x12345
+// CHECK: [0x41,0x41,0x0a,0xc2,0x45,0x23,0x01,0x0e]
+
 s_atomic_add s5, s[2:3], s0 glc
 // CHECK: [0x41,0x01,0x09,0xc2,0x00,0x00,0x00,0x00]
 
+s_atomic_add s5, s[2:3], s7 offset:0x12345 glc
+// CHECK: [0x41,0x41,0x0b,0xc2,0x45,0x23,0x01,0x0e]
+
 s_atomic_sub s5, s[2:3], s0
 // CHECK: [0x41,0x01,0x0c,0xc2,0x00,0x00,0x00,0x00]
 
index 4d810c699c7cd31e9db6b3dde154f6ed79ad56d3..460a8c2326ce3c9ae4a6c0a3e7f8f8c1ce56d1c0 100644 (file)
 # GFX10: s_atomic_add s5, s[2:3], 0x64           ; encoding: [0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0xfa]
 0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0xfa
 
+# GFX10: s_atomic_add s5, s[2:3], s7 offset:0x64 ; encoding: [0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0x0e]
+0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0x0e
+
 # GFX10: s_atomic_add s5, s[2:3], 0x64 dlc       ; encoding: [0x41,0x41,0x08,0xf6,0x64,0x00,0x00,0xfa]
 0x41,0x41,0x08,0xf6,0x64,0x00,0x00,0xfa
 
 # GFX10: s_atomic_add s5, s[2:3], 0x64 glc       ; encoding: [0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0xfa]
 0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0xfa
 
+# GFX10: s_atomic_add s5, s[2:3], s7 offset:0x64 glc ; encoding: [0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0x0e]
+0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0x0e
+
 # GFX10: s_atomic_add s5, s[2:3], s101           ; encoding: [0x41,0x01,0x08,0xf6,0x00,0x00,0x00,0xca]
 0x41,0x01,0x08,0xf6,0x00,0x00,0x00,0xca
 
index b561bf15af0ce1f3f31ceb01bbe6044d3a3e8d91..c2573060a1e9d6d877ae4d512fb7237fc3d59cc9 100644 (file)
 # GFX9: s_atomic_add s5, s[2:3], s101    ; encoding: [0x41,0x01,0x08,0xc2,0x65,0x00,0x00,0x00]
 0x41,0x01,0x08,0xc2,0x65,0x00,0x00,0x00
 
+# The SGRP variants can alternatively be encoded with imm=0, soffset_en=1
+# and the offset register encoded in the soffset field with the offset
+# field being disregarded.
+# GFX9: s_atomic_add s5, s[2:3], s101    ; encoding: [0x41,0x41,0x08,0xc2,0x00,0x00,0x00,0xca]
+0x41,0x41,0x08,0xc2,0x2e,0x00,0x00,0xca
+
+# The SGRP variants can alternatively be encoded with imm=0, soffset_en=1
+# and the offset register encoded in the soffset field with the offset
+# field being disregarded.
+# GFX9: s_atomic_add s5, s[2:3], s101 glc ; encoding: [0x41,0x41,0x09,0xc2,0x00,0x00,0x00,0xca]
+0x41,0x41,0x09,0xc2,0x2e,0x00,0x00,0xca
+
 # GFX9: s_atomic_add_x2 s[10:11], s[2:3], s101    ; encoding: [0x81,0x02,0x88,0xc2,0x65,0x00,0x00,0x00]
 0x81,0x02,0x88,0xc2,0x65,0x00,0x00,0x00
 
+# GFX9: s_atomic_add s5, s[2:3], s7 offset:0x12345 ; encoding: [0x41,0x41,0x0a,0xc2,0x45,0x23,0x01,0x0e]
+0x41,0x41,0x0a,0xc2,0x45,0x23,0x01,0x0e
+
+# GFX9: s_atomic_add s5, s[2:3], s7 offset:0x12345 glc ; encoding: [0x41,0x41,0x0b,0xc2,0x45,0x23,0x01,0x0e]
+0x41,0x41,0x0b,0xc2,0x45,0x23,0x01,0x0e
+
 # GFX9: s_atomic_and s101, s[2:3], s0    ; encoding: [0x41,0x19,0x20,0xc2,0x00,0x00,0x00,0x00]
 0x41,0x19,0x20,0xc2,0x00,0x00,0x00,0x00