[AMDGPU] gfx940 MUBUF format changes
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Tue, 8 Mar 2022 18:28:23 +0000 (10:28 -0800)
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Fri, 11 Mar 2022 19:36:49 +0000 (11:36 -0800)
Differential Revision: https://reviews.llvm.org/D121234

llvm/lib/Target/AMDGPU/BUFInstructions.td
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
llvm/test/MC/AMDGPU/gfx940_asm_features.s
llvm/test/MC/AMDGPU/gfx940_err.s
llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt

index f968ce2..0b7aebd 100644 (file)
@@ -1175,8 +1175,13 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol",
 
 let SubtargetPredicate = isGFX90APlus in {
   def BUFFER_WBL2  : MUBUF_Invalidate<"buffer_wbl2"> {
+    let has_glc = 1;
+    let has_sccb = 1;
+    let InOperandList = (ins CPol_0:$cpol);
+    let AsmOperands = "$cpol";
   }
   def BUFFER_INVL2 : MUBUF_Invalidate<"buffer_invl2"> {
+    let SubtargetPredicate = isGFX90AOnly;
   }
 
   defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64, int_amdgcn_global_atomic_fadd>;
@@ -1184,6 +1189,14 @@ let SubtargetPredicate = isGFX90APlus in {
   defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64, int_amdgcn_global_atomic_fmax>;
 } // End SubtargetPredicate = isGFX90APlus
 
+def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> {
+  let SubtargetPredicate = isGFX940Plus;
+  let has_glc = 1;
+  let has_sccb = 1;
+  let InOperandList = (ins CPol_0:$cpol);
+  let AsmOperands = "$cpol";
+}
+
 let SubtargetPredicate = isGFX10Plus in {
   def BUFFER_GL0_INV : MUBUF_Invalidate<"buffer_gl0_inv">;
   def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">;
@@ -2366,9 +2379,28 @@ class MUBUF_Real_gfx90a <bits<7> op, MUBUF_Pseudo ps,
   let Inst{55}    = acc;
 }
 
+class MUBUF_Real_gfx940 <bits<7> op, MUBUF_Pseudo ps> :
+  MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.GFX940> {
+  let AssemblerPredicate = isGFX940Plus;
+  let DecoderNamespace = "GFX9";
+  let AsmString = ps.Mnemonic # !subst("$tfe", "", ps.AsmOperands);
+
+  let Inst{55} = acc;
+}
+
 multiclass MUBUF_Real_vi_gfx90a<bits<7> op, MUBUF_Pseudo ps> {
   def _vi :     MUBUF_Real_vi<op, ps>;
-  def _gfx90a : MUBUF_Real_gfx90a<op, ps, !and(ps.has_sccb,!not(ps.FPAtomic))>;
+
+  foreach _ = BoolToList<!not(ps.FPAtomic)>.ret in
+    def _gfx90a : MUBUF_Real_gfx90a<op, ps>;
+
+  foreach _ = BoolToList<ps.FPAtomic>.ret in {
+    def _gfx90a : MUBUF_Real_gfx90a<op, ps, 0> {
+      let SubtargetPredicate = isGFX90AOnly;
+      let AssemblerPredicate = isGFX90AOnly;
+    }
+    def _gfx940 : MUBUF_Real_gfx940<op, ps>;
+  }
 }
 
 multiclass MUBUF_Real_AllAddr_vi<bits<7> op> {
@@ -2558,9 +2590,17 @@ let SubtargetPredicate = isGFX90APlus in {
 } // End SubtargetPredicate = isGFX90APlus, AssemblerPredicate = isGFX90APlus
 
 def BUFFER_WBL2_gfx90a  : MUBUF_Real_gfx90a<0x28, BUFFER_WBL2> {
+  let AsmString = BUFFER_WBL2.Mnemonic; // drop flags
+  let AssemblerPredicate = isGFX90AOnly;
+  let SubtargetPredicate = isGFX90AOnly;
 }
 def BUFFER_INVL2_gfx90a : MUBUF_Real_gfx90a<0x29, BUFFER_INVL2>;
 
+let SubtargetPredicate = isGFX940Plus in {
+def BUFFER_WBL2_gfx940  : MUBUF_Real_gfx940<0x28, BUFFER_WBL2>;
+def BUFFER_INV_gfx940   : MUBUF_Real_gfx940<0x29, BUFFER_INV>;
+}
+
 class MTBUF_Real_Base_vi <bits<4> op, MTBUF_Pseudo ps, int Enc> :
   MTBUF_Real<ps>,
   Enc64,
index d1ce968..8c8609c 100644 (file)
@@ -1360,7 +1360,9 @@ bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI,
       // to initiate writeback of any dirty cache lines of earlier writes by the
       // same wave. A "S_WAITCNT vmcnt(0)" is needed after to ensure the
       // writeback has completed.
-      BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2));
+      BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2))
+        // Set SC bits to indicate system scope.
+        .addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1);
       // Followed by same as GFX7, which will ensure the necessary "S_WAITCNT
       // vmcnt(0)" needed by the "BUFFER_WBL2".
       Changed = true;
index d3f2907..1c7c502 100644 (file)
@@ -149,6 +149,33 @@ v_mov_b64 v[2:3], 1
 // GFX940: v_mov_b64_e32 v[2:3], 0x64              ; encoding: [0xff,0x70,0x04,0x7e,0x64,0x00,0x00,0x00]
 v_mov_b64 v[2:3], 0x64
 
+// GFX90A: error: invalid operand for instruction
+// GFX10:  error: instruction not supported on this GPU
+// GFX940: buffer_wbl2 sc1                         ; encoding: [0x00,0x80,0xa0,0xe0,0x00,0x00,0x00,0x00]
+buffer_wbl2 sc1
+
+// GFX90A: error: invalid operand for instruction
+// GFX10:  error: instruction not supported on this GPU
+// GFX940: buffer_wbl2 sc0                         ; encoding: [0x00,0x40,0xa0,0xe0,0x00,0x00,0x00,0x00]
+buffer_wbl2 sc0
+
+// GFX90A: error: invalid operand for instruction
+// GFX10:  error: instruction not supported on this GPU
+// GFX940: buffer_wbl2 sc0 sc1                     ; encoding: [0x00,0xc0,0xa0,0xe0,0x00,0x00,0x00,0x00]
+buffer_wbl2 sc0 sc1
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: buffer_inv sc0                          ; encoding: [0x00,0x40,0xa4,0xe0,0x00,0x00,0x00,0x00]
+buffer_inv sc0
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: buffer_inv sc1                          ; encoding: [0x00,0x80,0xa4,0xe0,0x00,0x00,0x00,0x00]
+buffer_inv sc1
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: buffer_inv sc0 sc1                      ; encoding: [0x00,0xc0,0xa4,0xe0,0x00,0x00,0x00,0x00]
+buffer_inv sc0 sc1
+
 // NOT-GFX940: error: invalid operand for instruction
 // GFX940: buffer_atomic_swap v5, off, s[8:11], s3 sc0 ; encoding: [0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03]
 buffer_atomic_swap v5, off, s[8:11], s3 sc0
@@ -224,3 +251,28 @@ global_atomic_min_f64 v[0:1], v[2:3], off sc1
 // GFX10:  error: instruction not supported on this GPU
 // GFX940: global_atomic_max_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x44,0xdf,0x00,0x02,0x7f,0x00]
 global_atomic_max_f64 v[0:1], v[2:3], off sc1
+
+// GFX90A: error: invalid operand for instruction
+// GFX10:  error: instruction not supported on this GPU
+// GFX940: buffer_atomic_add_f32 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x34,0xe1,0x00,0x04,0x02,0x03]
+buffer_atomic_add_f32 v4, off, s[8:11], s3 sc1
+
+// GFX90A: error: invalid operand for instruction
+// GFX10:  error: instruction not supported on this GPU
+// GFX940: buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x38,0xe1,0x00,0x04,0x02,0x03]
+buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 sc1
+
+// GFX90A: error: invalid operand for instruction
+// GFX10:  error: instruction not supported on this GPU
+// GFX940: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x3c,0xe1,0x00,0x04,0x02,0x03]
+buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 sc1
+
+// GFX90A: error: invalid operand for instruction
+// GFX10:  error: instruction not supported on this GPU
+// GFX940: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x44,0xe1,0x00,0x04,0x02,0x03]
+buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1
+
+// GFX90A: error: invalid operand for instruction
+// GFX10:  error: instruction not supported on this GPU
+// GFX940: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03]
+buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1
index b5ca08d..832754d 100644 (file)
@@ -31,6 +31,9 @@ v_mov_b64 v[2:3], v[4:5] dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWOR
 v_mov_b64_sdwa v[2:3], v[4:5]
 // GFX940: error: sdwa variant of this instruction is not supported
 
+buffer_invl2
+// GFX940: error: instruction not supported on this GPU
+
 global_load_dword v2, v[2:3], off glc
 // GFX940: error: invalid operand for instruction
 
@@ -48,3 +51,9 @@ buffer_atomic_swap v5, off, s[8:11], s3 glc
 
 buffer_atomic_swap v5, off, s[8:11], s3 slc
 // GFX940: error: invalid operand for instruction
+
+buffer_wbl2 glc
+// GFX940: error: invalid operand for instruction
+
+buffer_wbl2 scc
+// GFX940: error: invalid operand for instruction
index c187208..cf88448 100644 (file)
 # GFX940: v_mov_b64_e32 v[2:3], 0x64              ; encoding: [0xff,0x70,0x04,0x7e,0x64,0x00,0x00,0x00]
 0xff,0x70,0x04,0x7e,0x64,0x00,0x00,0x00
 
+# GFX940: buffer_wbl2 sc1                         ; encoding: [0x00,0x80,0xa0,0xe0,0x00,0x00,0x00,0x00]
+0x00,0x80,0xa0,0xe0,0x00,0x00,0x00,0x00
+
+# GFX940: buffer_wbl2 sc0                         ; encoding: [0x00,0x40,0xa0,0xe0,0x00,0x00,0x00,0x00]
+0x00,0x40,0xa0,0xe0,0x00,0x00,0x00,0x00
+
+# GFX940: buffer_wbl2 sc0 sc1                     ; encoding: [0x00,0xc0,0xa0,0xe0,0x00,0x00,0x00,0x00]
+0x00,0xc0,0xa0,0xe0,0x00,0x00,0x00,0x00
+
+# GFX940: buffer_inv sc0                          ; encoding: [0x00,0x40,0xa4,0xe0,0x00,0x00,0x00,0x00]
+0x00,0x40,0xa4,0xe0,0x00,0x00,0x00,0x00
+
+# GFX940: buffer_inv sc1                          ; encoding: [0x00,0x80,0xa4,0xe0,0x00,0x00,0x00,0x00]
+0x00,0x80,0xa4,0xe0,0x00,0x00,0x00,0x00
+
+# GFX940: buffer_inv sc0 sc1                      ; encoding: [0x00,0xc0,0xa4,0xe0,0x00,0x00,0x00,0x00]
+0x00,0xc0,0xa4,0xe0,0x00,0x00,0x00,0x00
+
 # GFX940: buffer_atomic_swap v5, off, s[8:11], s3 sc0 ; encoding: [0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03]
 0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03
 
 
 # GFX940: global_atomic_max_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x44,0xdf,0x00,0x02,0x7f,0x00]
 0x00,0x80,0x44,0xdf,0x00,0x02,0x7f,0x00
+
+# GFX940: buffer_atomic_add_f32 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x34,0xe1,0x00,0x04,0x02,0x03]
+0x00,0x80,0x34,0xe1,0x00,0x04,0x02,0x03
+
+# GFX940: buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x38,0xe1,0x00,0x04,0x02,0x03]
+0x00,0x80,0x38,0xe1,0x00,0x04,0x02,0x03
+
+# GFX940: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x3c,0xe1,0x00,0x04,0x02,0x03]
+0x00,0x80,0x3c,0xe1,0x00,0x04,0x02,0x03
+
+# GFX940: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x44,0xe1,0x00,0x04,0x02,0x03]
+0x00,0x80,0x44,0xe1,0x00,0x04,0x02,0x03
+
+# GFX940: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03]
+0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03