let SubtargetPredicate = isGFX90APlus in {
def BUFFER_WBL2 : MUBUF_Invalidate<"buffer_wbl2"> {
+ let has_glc = 1;
+ let has_sccb = 1;
+ let InOperandList = (ins CPol_0:$cpol);
+ let AsmOperands = "$cpol";
}
def BUFFER_INVL2 : MUBUF_Invalidate<"buffer_invl2"> {
+ let SubtargetPredicate = isGFX90AOnly;
}
defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64, int_amdgcn_global_atomic_fadd>;
defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64, int_amdgcn_global_atomic_fmax>;
} // End SubtargetPredicate = isGFX90APlus
+def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> {
+ let SubtargetPredicate = isGFX940Plus;
+ let has_glc = 1;
+ let has_sccb = 1;
+ let InOperandList = (ins CPol_0:$cpol);
+ let AsmOperands = "$cpol";
+}
+
let SubtargetPredicate = isGFX10Plus in {
def BUFFER_GL0_INV : MUBUF_Invalidate<"buffer_gl0_inv">;
def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">;
let Inst{55} = acc;
}
+class MUBUF_Real_gfx940 <bits<7> op, MUBUF_Pseudo ps> :
+ MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.GFX940> {
+ let AssemblerPredicate = isGFX940Plus;
+ let DecoderNamespace = "GFX9";
+ let AsmString = ps.Mnemonic # !subst("$tfe", "", ps.AsmOperands);
+
+ let Inst{55} = acc;
+}
+
multiclass MUBUF_Real_vi_gfx90a<bits<7> op, MUBUF_Pseudo ps> {
def _vi : MUBUF_Real_vi<op, ps>;
- def _gfx90a : MUBUF_Real_gfx90a<op, ps, !and(ps.has_sccb,!not(ps.FPAtomic))>;
+
+ foreach _ = BoolToList<!not(ps.FPAtomic)>.ret in
+ def _gfx90a : MUBUF_Real_gfx90a<op, ps>;
+
+ foreach _ = BoolToList<ps.FPAtomic>.ret in {
+ def _gfx90a : MUBUF_Real_gfx90a<op, ps, 0> {
+ let SubtargetPredicate = isGFX90AOnly;
+ let AssemblerPredicate = isGFX90AOnly;
+ }
+ def _gfx940 : MUBUF_Real_gfx940<op, ps>;
+ }
}
multiclass MUBUF_Real_AllAddr_vi<bits<7> op> {
} // End SubtargetPredicate = isGFX90APlus, AssemblerPredicate = isGFX90APlus
def BUFFER_WBL2_gfx90a : MUBUF_Real_gfx90a<0x28, BUFFER_WBL2> {
+ let AsmString = BUFFER_WBL2.Mnemonic; // drop flags
+ let AssemblerPredicate = isGFX90AOnly;
+ let SubtargetPredicate = isGFX90AOnly;
}
def BUFFER_INVL2_gfx90a : MUBUF_Real_gfx90a<0x29, BUFFER_INVL2>;
+let SubtargetPredicate = isGFX940Plus in {
+def BUFFER_WBL2_gfx940 : MUBUF_Real_gfx940<0x28, BUFFER_WBL2>;
+def BUFFER_INV_gfx940 : MUBUF_Real_gfx940<0x29, BUFFER_INV>;
+}
+
class MTBUF_Real_Base_vi <bits<4> op, MTBUF_Pseudo ps, int Enc> :
MTBUF_Real<ps>,
Enc64,
// to initiate writeback of any dirty cache lines of earlier writes by the
// same wave. A "S_WAITCNT vmcnt(0)" is needed after to ensure the
// writeback has completed.
- BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2));
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2))
+ // Set SC bits to indicate system scope.
+ .addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1);
// Followed by same as GFX7, which will ensure the necessary "S_WAITCNT
// vmcnt(0)" needed by the "BUFFER_WBL2".
Changed = true;
// GFX940: v_mov_b64_e32 v[2:3], 0x64 ; encoding: [0xff,0x70,0x04,0x7e,0x64,0x00,0x00,0x00]
v_mov_b64 v[2:3], 0x64
+// GFX90A: error: invalid operand for instruction
+// GFX10: error: instruction not supported on this GPU
+// GFX940: buffer_wbl2 sc1 ; encoding: [0x00,0x80,0xa0,0xe0,0x00,0x00,0x00,0x00]
+buffer_wbl2 sc1
+
+// GFX90A: error: invalid operand for instruction
+// GFX10: error: instruction not supported on this GPU
+// GFX940: buffer_wbl2 sc0 ; encoding: [0x00,0x40,0xa0,0xe0,0x00,0x00,0x00,0x00]
+buffer_wbl2 sc0
+
+// GFX90A: error: invalid operand for instruction
+// GFX10: error: instruction not supported on this GPU
+// GFX940: buffer_wbl2 sc0 sc1 ; encoding: [0x00,0xc0,0xa0,0xe0,0x00,0x00,0x00,0x00]
+buffer_wbl2 sc0 sc1
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: buffer_inv sc0 ; encoding: [0x00,0x40,0xa4,0xe0,0x00,0x00,0x00,0x00]
+buffer_inv sc0
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: buffer_inv sc1 ; encoding: [0x00,0x80,0xa4,0xe0,0x00,0x00,0x00,0x00]
+buffer_inv sc1
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: buffer_inv sc0 sc1 ; encoding: [0x00,0xc0,0xa4,0xe0,0x00,0x00,0x00,0x00]
+buffer_inv sc0 sc1
+
// NOT-GFX940: error: invalid operand for instruction
// GFX940: buffer_atomic_swap v5, off, s[8:11], s3 sc0 ; encoding: [0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03]
buffer_atomic_swap v5, off, s[8:11], s3 sc0
// GFX10: error: instruction not supported on this GPU
// GFX940: global_atomic_max_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x44,0xdf,0x00,0x02,0x7f,0x00]
global_atomic_max_f64 v[0:1], v[2:3], off sc1
+
+// GFX90A: error: invalid operand for instruction
+// GFX10: error: instruction not supported on this GPU
+// GFX940: buffer_atomic_add_f32 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x34,0xe1,0x00,0x04,0x02,0x03]
+buffer_atomic_add_f32 v4, off, s[8:11], s3 sc1
+
+// GFX90A: error: invalid operand for instruction
+// GFX10: error: instruction not supported on this GPU
+// GFX940: buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x38,0xe1,0x00,0x04,0x02,0x03]
+buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 sc1
+
+// GFX90A: error: invalid operand for instruction
+// GFX10: error: instruction not supported on this GPU
+// GFX940: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x3c,0xe1,0x00,0x04,0x02,0x03]
+buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 sc1
+
+// GFX90A: error: invalid operand for instruction
+// GFX10: error: instruction not supported on this GPU
+// GFX940: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x44,0xe1,0x00,0x04,0x02,0x03]
+buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1
+
+// GFX90A: error: invalid operand for instruction
+// GFX10: error: instruction not supported on this GPU
+// GFX940: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03]
+buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1
v_mov_b64_sdwa v[2:3], v[4:5]
// GFX940: error: sdwa variant of this instruction is not supported
+buffer_invl2
+// GFX940: error: instruction not supported on this GPU
+
global_load_dword v2, v[2:3], off glc
// GFX940: error: invalid operand for instruction
buffer_atomic_swap v5, off, s[8:11], s3 slc
// GFX940: error: invalid operand for instruction
+
+buffer_wbl2 glc
+// GFX940: error: invalid operand for instruction
+
+buffer_wbl2 scc
+// GFX940: error: invalid operand for instruction
# GFX940: v_mov_b64_e32 v[2:3], 0x64 ; encoding: [0xff,0x70,0x04,0x7e,0x64,0x00,0x00,0x00]
0xff,0x70,0x04,0x7e,0x64,0x00,0x00,0x00
+# GFX940: buffer_wbl2 sc1 ; encoding: [0x00,0x80,0xa0,0xe0,0x00,0x00,0x00,0x00]
+0x00,0x80,0xa0,0xe0,0x00,0x00,0x00,0x00
+
+# GFX940: buffer_wbl2 sc0 ; encoding: [0x00,0x40,0xa0,0xe0,0x00,0x00,0x00,0x00]
+0x00,0x40,0xa0,0xe0,0x00,0x00,0x00,0x00
+
+# GFX940: buffer_wbl2 sc0 sc1 ; encoding: [0x00,0xc0,0xa0,0xe0,0x00,0x00,0x00,0x00]
+0x00,0xc0,0xa0,0xe0,0x00,0x00,0x00,0x00
+
+# GFX940: buffer_inv sc0 ; encoding: [0x00,0x40,0xa4,0xe0,0x00,0x00,0x00,0x00]
+0x00,0x40,0xa4,0xe0,0x00,0x00,0x00,0x00
+
+# GFX940: buffer_inv sc1 ; encoding: [0x00,0x80,0xa4,0xe0,0x00,0x00,0x00,0x00]
+0x00,0x80,0xa4,0xe0,0x00,0x00,0x00,0x00
+
+# GFX940: buffer_inv sc0 sc1 ; encoding: [0x00,0xc0,0xa4,0xe0,0x00,0x00,0x00,0x00]
+0x00,0xc0,0xa4,0xe0,0x00,0x00,0x00,0x00
+
# GFX940: buffer_atomic_swap v5, off, s[8:11], s3 sc0 ; encoding: [0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03]
0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03
# GFX940: global_atomic_max_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x44,0xdf,0x00,0x02,0x7f,0x00]
0x00,0x80,0x44,0xdf,0x00,0x02,0x7f,0x00
+
+# GFX940: buffer_atomic_add_f32 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x34,0xe1,0x00,0x04,0x02,0x03]
+0x00,0x80,0x34,0xe1,0x00,0x04,0x02,0x03
+
+# GFX940: buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x38,0xe1,0x00,0x04,0x02,0x03]
+0x00,0x80,0x38,0xe1,0x00,0x04,0x02,0x03
+
+# GFX940: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x3c,0xe1,0x00,0x04,0x02,0x03]
+0x00,0x80,0x3c,0xe1,0x00,0x04,0x02,0x03
+
+# GFX940: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x44,0xe1,0x00,0x04,0x02,0x03]
+0x00,0x80,0x44,0xe1,0x00,0x04,0x02,0x03
+
+# GFX940: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03]
+0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03