[AMDGPU][MC][GFX11] Disable non-null src0 for s_waitcnt_*cnt
authorDmitry Preobrazhensky <dmitri.preobrazhenski@gmail.com>
Thu, 29 Sep 2022 16:54:00 +0000 (19:54 +0300)
committerDmitry Preobrazhensky <dmitri.preobrazhenski@gmail.com>
Thu, 29 Sep 2022 16:56:03 +0000 (19:56 +0300)
Differential Revision: https://reviews.llvm.org/D134809

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/lib/Target/AMDGPU/SOPInstructions.td
llvm/test/MC/AMDGPU/gfx11_asm_sopk.s
llvm/test/MC/AMDGPU/gfx11_asm_sopk_err.s [new file with mode: 0644]
llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_sopk.txt

index 7e4b639..3741053 100644 (file)
@@ -1685,6 +1685,7 @@ private:
   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
   bool validateDivScale(const MCInst &Inst);
+  bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
                              const SMLoc &IDLoc);
   bool validateLdsDMA(uint64_t Enc, const MCInst &Inst,
@@ -4454,6 +4455,29 @@ bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
   return false;
 }
 
+bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
+                                      const OperandVector &Operands) {
+  if (!isGFX11Plus())
+    return true;
+
+  unsigned Opc = Inst.getOpcode();
+  if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
+      Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
+      Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
+      Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
+    return true;
+
+  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
+  assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
+  auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
+  if (Reg == AMDGPU::SGPR_NULL)
+    return true;
+
+  SMLoc RegLoc = getRegLoc(Reg, Operands);
+  Error(RegLoc, "src0 must be null");
+  return false;
+}
+
 // gfx90a has an undocumented limitation:
 // DS_GWS opcodes must use even aligned registers.
 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
@@ -4694,6 +4718,9 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
     Error(IDLoc, "ABS not allowed in VOP3B instructions");
     return false;
   }
+  if (!validateWaitCnt(Inst, Operands)) {
+    return false;
+  }
   if (!validateExeczVcczOperands(Operands)) {
     return false;
   }
index 9dec1fe..fdfb2be 100644 (file)
@@ -704,7 +704,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
 
   int ImmLitIdx =
       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
-  if (Res && ImmLitIdx != -1)
+  bool isVOP2 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP2;
+  if (Res && ImmLitIdx != -1 && (isVOP2 || AMDGPU::isVOPD(MI.getOpcode())))
     Res = convertFMAanyK(MI, ImmLitIdx);
 
   // if the opcode was not recognized we'll assume a Size of 4 bytes
index 09bdf02..21b8ac9 100644 (file)
@@ -738,6 +738,7 @@ class SOPK_Real<SOPK_Pseudo ps> :
   let SOPK = 1;
   let isPseudo = 0;
   let isCodeGenOnly = 0;
+  let UseNamedOperandTable = 1;
 
   // copy relevant pseudo op flags
   let SubtargetPredicate = ps.SubtargetPredicate;
index 87c1da6..22adf37 100644 (file)
@@ -465,101 +465,29 @@ s_call_b64 vcc, 0x1234
 s_call_b64 null, 0x1234
 // GFX11: encoding: [0x34,0x12,0x7c,0xba]
 
-s_waitcnt_vscnt s0, 0x1234
-// GFX11: encoding: [0x34,0x12,0x00,0xbc]
+s_waitcnt_vscnt null, 0x1234
+// GFX11: encoding: [0x34,0x12,0x7c,0xbc]
 
-s_waitcnt_vscnt s0, 0xc1d1
-// GFX11: encoding: [0xd1,0xc1,0x00,0xbc]
+s_waitcnt_vscnt null, 0xc1d1
+// GFX11: encoding: [0xd1,0xc1,0x7c,0xbc]
 
-s_waitcnt_vscnt s105, 0x1234
-// GFX11: encoding: [0x34,0x12,0x69,0xbc]
+s_waitcnt_vmcnt null, 0x1234
+// GFX11: encoding: [0x34,0x12,0xfc,0xbc]
 
-s_waitcnt_vscnt exec_lo, 0x1234
-// GFX11: encoding: [0x34,0x12,0x7e,0xbc]
+s_waitcnt_vmcnt null, 0xc1d1
+// GFX11: encoding: [0xd1,0xc1,0xfc,0xbc]
 
-s_waitcnt_vscnt exec_hi, 0x1234
-// GFX11: encoding: [0x34,0x12,0x7f,0xbc]
+s_waitcnt_expcnt null, 0x1234
+// GFX11: encoding: [0x34,0x12,0x7c,0xbd]
 
-s_waitcnt_vscnt vcc_lo, 0x1234
-// GFX11: encoding: [0x34,0x12,0x6a,0xbc]
+s_waitcnt_expcnt null, 0xc1d1
+// GFX11: encoding: [0xd1,0xc1,0x7c,0xbd]
 
-s_waitcnt_vscnt vcc_hi, 0x1234
-// GFX11: encoding: [0x34,0x12,0x6b,0xbc]
+s_waitcnt_lgkmcnt null, 0x1234
+// GFX11: encoding: [0x34,0x12,0xfc,0xbd]
 
-s_waitcnt_vscnt m0, 0x1234
-// GFX11: encoding: [0x34,0x12,0x7d,0xbc]
-
-s_waitcnt_vmcnt s0, 0x1234
-// GFX11: encoding: [0x34,0x12,0x80,0xbc]
-
-s_waitcnt_vmcnt s0, 0xc1d1
-// GFX11: encoding: [0xd1,0xc1,0x80,0xbc]
-
-s_waitcnt_vmcnt s105, 0x1234
-// GFX11: encoding: [0x34,0x12,0xe9,0xbc]
-
-s_waitcnt_vmcnt exec_lo, 0x1234
-// GFX11: encoding: [0x34,0x12,0xfe,0xbc]
-
-s_waitcnt_vmcnt exec_hi, 0x1234
-// GFX11: encoding: [0x34,0x12,0xff,0xbc]
-
-s_waitcnt_vmcnt vcc_lo, 0x1234
-// GFX11: encoding: [0x34,0x12,0xea,0xbc]
-
-s_waitcnt_vmcnt vcc_hi, 0x1234
-// GFX11: encoding: [0x34,0x12,0xeb,0xbc]
-
-s_waitcnt_vmcnt m0, 0x1234
-// GFX11: encoding: [0x34,0x12,0xfd,0xbc]
-
-s_waitcnt_expcnt s0, 0x1234
-// GFX11: encoding: [0x34,0x12,0x00,0xbd]
-
-s_waitcnt_expcnt s0, 0xc1d1
-// GFX11: encoding: [0xd1,0xc1,0x00,0xbd]
-
-s_waitcnt_expcnt s105, 0x1234
-// GFX11: encoding: [0x34,0x12,0x69,0xbd]
-
-s_waitcnt_expcnt exec_lo, 0x1234
-// GFX11: encoding: [0x34,0x12,0x7e,0xbd]
-
-s_waitcnt_expcnt exec_hi, 0x1234
-// GFX11: encoding: [0x34,0x12,0x7f,0xbd]
-
-s_waitcnt_expcnt vcc_lo, 0x1234
-// GFX11: encoding: [0x34,0x12,0x6a,0xbd]
-
-s_waitcnt_expcnt vcc_hi, 0x1234
-// GFX11: encoding: [0x34,0x12,0x6b,0xbd]
-
-s_waitcnt_expcnt m0, 0x1234
-// GFX11: encoding: [0x34,0x12,0x7d,0xbd]
-
-s_waitcnt_lgkmcnt s0, 0x1234
-// GFX11: encoding: [0x34,0x12,0x80,0xbd]
-
-s_waitcnt_lgkmcnt s0, 0xc1d1
-// GFX11: encoding: [0xd1,0xc1,0x80,0xbd]
-
-s_waitcnt_lgkmcnt s105, 0x1234
-// GFX11: encoding: [0x34,0x12,0xe9,0xbd]
-
-s_waitcnt_lgkmcnt exec_lo, 0x1234
-// GFX11: encoding: [0x34,0x12,0xfe,0xbd]
-
-s_waitcnt_lgkmcnt exec_hi, 0x1234
-// GFX11: encoding: [0x34,0x12,0xff,0xbd]
-
-s_waitcnt_lgkmcnt vcc_lo, 0x1234
-// GFX11: encoding: [0x34,0x12,0xea,0xbd]
-
-s_waitcnt_lgkmcnt vcc_hi, 0x1234
-// GFX11: encoding: [0x34,0x12,0xeb,0xbd]
-
-s_waitcnt_lgkmcnt m0, 0x1234
-// GFX11: encoding: [0x34,0x12,0xfd,0xbd]
+s_waitcnt_lgkmcnt null, 0xc1d1
+// GFX11: encoding: [0xd1,0xc1,0xfc,0xbd]
 
 s_subvector_loop_begin s0, 0x1234
 // GFX11: encoding: [0x34,0x12,0x00,0xbb]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_sopk_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_sopk_err.s
new file mode 100644 (file)
index 0000000..4537134
--- /dev/null
@@ -0,0 +1,21 @@
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 %s 2>&1 | FileCheck %s -check-prefix=GFX11 --implicit-check-not=error: --strict-whitespace
+
+s_waitcnt_vscnt s0, 0x1234
+// GFX11: error: src0 must be null
+// GFX11-NEXT:{{^}}s_waitcnt_vscnt s0, 0x1234
+// GFX11-NEXT:{{^}}                ^
+
+s_waitcnt_vmcnt exec_lo, 0x1234
+// GFX11: error: src0 must be null
+// GFX11-NEXT:{{^}}s_waitcnt_vmcnt exec_lo, 0x1234
+// GFX11-NEXT:{{^}}                ^
+
+s_waitcnt_expcnt vcc_lo, 0x1234
+// GFX11: error: src0 must be null
+// GFX11-NEXT:{{^}}s_waitcnt_expcnt vcc_lo, 0x1234
+// GFX11-NEXT:{{^}}                 ^
+
+s_waitcnt_lgkmcnt m0, 0x1234
+// GFX11: error: src0 must be null
+// GFX11-NEXT:{{^}}s_waitcnt_lgkmcnt m0, 0x1234
+// GFX11-NEXT:{{^}}                  ^
index b02d582..f342d7c 100644 (file)
 # GFX11: s_version 0xc1d1                        ; encoding: [0xd1,0xc1,0x80,0xb0]
 0xd1,0xc1,0x80,0xb0
 
-# GFX11: s_waitcnt_expcnt exec_hi, 0x1234        ; encoding: [0x34,0x12,0x7f,0xbd]
-0x34,0x12,0x7f,0xbd
+# GFX11: s_waitcnt_expcnt null, 0x1234           ; encoding: [0x34,0x12,0x7c,0xbd]
+0x34,0x12,0x7c,0xbd
 
-# GFX11: s_waitcnt_expcnt exec_lo, 0x1234        ; encoding: [0x34,0x12,0x7e,0xbd]
-0x34,0x12,0x7e,0xbd
-
-# GFX11: s_waitcnt_expcnt m0, 0x1234             ; encoding: [0x34,0x12,0x7d,0xbd]
-0x34,0x12,0x7d,0xbd
-
-# GFX11: s_waitcnt_expcnt s0, 0x1234             ; encoding: [0x34,0x12,0x00,0xbd]
-0x34,0x12,0x00,0xbd
-
-# GFX11: s_waitcnt_expcnt s0, 0xc1d1             ; encoding: [0xd1,0xc1,0x00,0xbd]
-0xd1,0xc1,0x00,0xbd
+# GFX11: s_waitcnt_expcnt null, 0xc1d1           ; encoding: [0xd1,0xc1,0x7c,0xbd]
+0xd1,0xc1,0x7c,0xbd
 
 # GFX11: s_waitcnt_expcnt s105, 0x1234           ; encoding: [0x34,0x12,0x69,0xbd]
 0x34,0x12,0x69,0xbd
 
-# GFX11: s_waitcnt_expcnt vcc_hi, 0x1234         ; encoding: [0x34,0x12,0x6b,0xbd]
-0x34,0x12,0x6b,0xbd
-
-# GFX11: s_waitcnt_expcnt vcc_lo, 0x1234         ; encoding: [0x34,0x12,0x6a,0xbd]
-0x34,0x12,0x6a,0xbd
-
-# GFX11: s_waitcnt_lgkmcnt exec_hi, 0x1234       ; encoding: [0x34,0x12,0xff,0xbd]
-0x34,0x12,0xff,0xbd
-
-# GFX11: s_waitcnt_lgkmcnt exec_lo, 0x1234       ; encoding: [0x34,0x12,0xfe,0xbd]
-0x34,0x12,0xfe,0xbd
+# GFX11: s_waitcnt_lgkmcnt null, 0x1234          ; encoding: [0x34,0x12,0xfc,0xbd]
+0x34,0x12,0xfc,0xbd
 
-# GFX11: s_waitcnt_lgkmcnt m0, 0x1234            ; encoding: [0x34,0x12,0xfd,0xbd]
-0x34,0x12,0xfd,0xbd
-
-# GFX11: s_waitcnt_lgkmcnt s0, 0x1234            ; encoding: [0x34,0x12,0x80,0xbd]
-0x34,0x12,0x80,0xbd
-
-# GFX11: s_waitcnt_lgkmcnt s0, 0xc1d1            ; encoding: [0xd1,0xc1,0x80,0xbd]
-0xd1,0xc1,0x80,0xbd
-
-# GFX11: s_waitcnt_lgkmcnt s105, 0x1234          ; encoding: [0x34,0x12,0xe9,0xbd]
-0x34,0x12,0xe9,0xbd
+# GFX11: s_waitcnt_lgkmcnt null, 0xc1d1          ; encoding: [0xd1,0xc1,0xfc,0xbd]
+0xd1,0xc1,0xfc,0xbd
 
 # GFX11: s_waitcnt_lgkmcnt vcc_hi, 0x1234        ; encoding: [0x34,0x12,0xeb,0xbd]
 0x34,0x12,0xeb,0xbd
 
-# GFX11: s_waitcnt_lgkmcnt vcc_lo, 0x1234        ; encoding: [0x34,0x12,0xea,0xbd]
-0x34,0x12,0xea,0xbd
+# GFX11: s_waitcnt_vmcnt null, 0x1234            ; encoding: [0x34,0x12,0xfc,0xbc]
+0x34,0x12,0xfc,0xbc
+
+# GFX11: s_waitcnt_vmcnt null, 0xc1d1            ; encoding: [0xd1,0xc1,0xfc,0xbc]
+0xd1,0xc1,0xfc,0xbc
 
 # GFX11: s_waitcnt_vmcnt exec_hi, 0x1234         ; encoding: [0x34,0x12,0xff,0xbc]
 0x34,0x12,0xff,0xbc
 
-# GFX11: s_waitcnt_vmcnt exec_lo, 0x1234         ; encoding: [0x34,0x12,0xfe,0xbc]
-0x34,0x12,0xfe,0xbc
-
-# GFX11: s_waitcnt_vmcnt m0, 0x1234              ; encoding: [0x34,0x12,0xfd,0xbc]
-0x34,0x12,0xfd,0xbc
+# GFX11: s_waitcnt_vscnt null, 0x1234            ; encoding: [0x34,0x12,0x7c,0xbc]
+0x34,0x12,0x7c,0xbc
 
-# GFX11: s_waitcnt_vmcnt s0, 0x1234              ; encoding: [0x34,0x12,0x80,0xbc]
-0x34,0x12,0x80,0xbc
-
-# GFX11: s_waitcnt_vmcnt s0, 0xc1d1              ; encoding: [0xd1,0xc1,0x80,0xbc]
-0xd1,0xc1,0x80,0xbc
-
-# GFX11: s_waitcnt_vmcnt s105, 0x1234            ; encoding: [0x34,0x12,0xe9,0xbc]
-0x34,0x12,0xe9,0xbc
-
-# GFX11: s_waitcnt_vmcnt vcc_hi, 0x1234          ; encoding: [0x34,0x12,0xeb,0xbc]
-0x34,0x12,0xeb,0xbc
-
-# GFX11: s_waitcnt_vmcnt vcc_lo, 0x1234          ; encoding: [0x34,0x12,0xea,0xbc]
-0x34,0x12,0xea,0xbc
-
-# GFX11: s_waitcnt_vscnt exec_hi, 0x1234         ; encoding: [0x34,0x12,0x7f,0xbc]
-0x34,0x12,0x7f,0xbc
-
-# GFX11: s_waitcnt_vscnt exec_lo, 0x1234         ; encoding: [0x34,0x12,0x7e,0xbc]
-0x34,0x12,0x7e,0xbc
+# GFX11: s_waitcnt_vscnt null, 0xc1d1            ; encoding: [0xd1,0xc1,0x7c,0xbc]
+0xd1,0xc1,0x7c,0xbc
 
 # GFX11: s_waitcnt_vscnt m0, 0x1234              ; encoding: [0x34,0x12,0x7d,0xbc]
 0x34,0x12,0x7d,0xbc
 
-# GFX11: s_waitcnt_vscnt s0, 0x1234              ; encoding: [0x34,0x12,0x00,0xbc]
-0x34,0x12,0x00,0xbc
-
-# GFX11: s_waitcnt_vscnt s0, 0xc1d1              ; encoding: [0xd1,0xc1,0x00,0xbc]
-0xd1,0xc1,0x00,0xbc
-
-# GFX11: s_waitcnt_vscnt s105, 0x1234            ; encoding: [0x34,0x12,0x69,0xbc]
-0x34,0x12,0x69,0xbc
-
-# GFX11: s_waitcnt_vscnt vcc_hi, 0x1234          ; encoding: [0x34,0x12,0x6b,0xbc]
-0x34,0x12,0x6b,0xbc
-
-# GFX11: s_waitcnt_vscnt vcc_lo, 0x1234          ; encoding: [0x34,0x12,0x6a,0xbc]
-0x34,0x12,0x6a,0xbc
-
 # GFX11: s_setreg_imm32_b32 hwreg(HW_REG_MODE), 0xaf123456 ; encoding: [0x01,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf]
 0x01,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf