[AMDGPU][MC][GFX940] Correct disassembly of MFMA opcodes
authorDmitry Preobrazhensky <d-pre@mail.ru>
Mon, 1 Aug 2022 12:59:20 +0000 (15:59 +0300)
committerDmitry Preobrazhensky <d-pre@mail.ru>
Mon, 1 Aug 2022 13:00:47 +0000 (16:00 +0300)
Add a decoder table for GFX940 MFMA opcodes.

Differential Revision: https://reviews.llvm.org/D130759

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/lib/Target/AMDGPU/VOP3PInstructions.td
llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt

index 98ee720..3af5d92 100644 (file)
@@ -560,6 +560,12 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     if (Bytes.size() < 4) break;
     const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
 
+    if (STI.getFeatureBits()[AMDGPU::FeatureGFX940Insts]) {
+      Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address);
+      if (Res)
+        break;
+    }
+
     if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) {
       Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address);
       if (Res)
index b300e1a..679f6db 100644 (file)
@@ -1004,14 +1004,14 @@ multiclass VOP3P_Real_MFMA_gfx940<bits<7> op, string Name = !cast<VOP3_Pseudo>(N
                                   VOP3_Pseudo PS_ACD = !cast<VOP3_Pseudo>(NAME # "_e64"),
                                   VOP3_Pseudo PS_VCD = !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64")> {
   let SubtargetPredicate = isGFX940Plus,
-      AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9",
+      AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX940",
       AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in {
   def _gfx940_acd : VOP3P_Real<PS_ACD, SIEncodingFamily.GFX940>,
                     VOP3Pe_MAI <op, PS_ACD.Pfl, 1>;
 
   def _gfx940_vcd : VOP3P_Real<PS_VCD, SIEncodingFamily.GFX940>,
                     VOP3Pe_MAI <op, PS_VCD.Pfl, 0>;
-  } // End AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9"
+  } // End AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX940"
 
   defm : VOP3P_Real_MFMA_gfx940_aliases<Name, PS_ACD.Mnemonic, NAME>;
 
index 048e12a..0742cf1 100644 (file)
 
 # GFX940: v_smfmac_f32_32x32x32_fp8_fp8 a[0:15], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xff,0xd3,0x02,0x09,0x06,0x14]
 0x00,0x80,0xff,0xd3,0x02,0x09,0x06,0x14
+
+# GFX940: v_mfma_f32_16x16x16_f16 v[10:13], v[2:3], v[4:5], v[6:9] ; encoding: [0x0a,0x00,0xcd,0xd3,0x02,0x09,0x1a,0x04]
+0x0a,0x00,0xcd,0xd3,0x02,0x09,0x1a,0x04
+
+# GFX940: v_mfma_f32_16x16x16_f16 v[252:255], a[254:255], v[254:255], v[252:255] ; encoding: [0xfc,0x00,0xcd,0xd3,0xfe,0xfd,0xf3,0x0f]
+0xfc,0x00,0xcd,0xd3,0xfe,0xfd,0xf3,0x0f
+
+# GFX940: v_mfma_f32_16x16x16_f16 v[252:255], v[254:255], a[254:255], v[252:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xfc,0x3a,0xcd,0xd3,0xfe,0xfd,0xf3,0x77]
+0xfc,0x3a,0xcd,0xd3,0xfe,0xfd,0xf3,0x77
+
+# GFX940: v_mfma_f32_16x16x16_f16 a[252:255], a[254:255], a[254:255], a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xcd,0xd3,0xfe,0xfd,0xf3,0xff]
+0xfc,0xff,0xcd,0xd3,0xfe,0xfd,0xf3,0xff
+
+# GFX940: v_mfma_f32_16x16x1_4b_f32 v[240:255], v1, v2, v[240:255] ; encoding: [0xf0,0x00,0xc1,0xd3,0x01,0x05,0xc2,0x07]
+0xf0,0x00,0xc1,0xd3,0x01,0x05,0xc2,0x07
+
+# GFX940: v_mfma_f32_16x16x1_4b_f32 v[240:255], a1, v2, v[240:255] ; encoding: [0xf0,0x00,0xc1,0xd3,0x01,0x05,0xc2,0x0f]
+0xf0,0x00,0xc1,0xd3,0x01,0x05,0xc2,0x0f
+
+# GFX940: v_mfma_f32_16x16x1_4b_f32 v[240:255], v1, a2, v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xc1,0xd3,0x01,0x05,0xc2,0x77]
+0xf0,0x3a,0xc1,0xd3,0x01,0x05,0xc2,0x77
+
+# GFX940: v_mfma_f32_16x16x1_4b_f32 a[240:255], a255, a255, a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xc1,0xd3,0xff,0xff,0xc3,0xff]
+0xf0,0xff,0xc1,0xd3,0xff,0xff,0xc3,0xff
+
+# GFX940: v_mfma_f32_16x16x4_4b_f16 v[240:255], v[2:3], v[4:5], v[240:255] ; encoding: [0xf0,0x00,0xc9,0xd3,0x02,0x09,0xc2,0x07]
+0xf0,0x00,0xc9,0xd3,0x02,0x09,0xc2,0x07
+
+# GFX940: v_mfma_f32_16x16x4_4b_f16 v[240:255], a[2:3], v[4:5], v[240:255] ; encoding: [0xf0,0x00,0xc9,0xd3,0x02,0x09,0xc2,0x0f]
+0xf0,0x00,0xc9,0xd3,0x02,0x09,0xc2,0x0f
+
+# GFX940: v_mfma_f32_16x16x4_4b_f16 v[240:255], v[2:3], a[4:5], v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xc9,0xd3,0x02,0x09,0xc2,0x77]
+0xf0,0x3a,0xc9,0xd3,0x02,0x09,0xc2,0x77
+
+# GFX940: v_mfma_f32_16x16x4_4b_f16 a[240:255], a[254:255], a[254:255], a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xc9,0xd3,0xfe,0xfd,0xc3,0xff]
+0xf0,0xff,0xc9,0xd3,0xfe,0xfd,0xc3,0xff
+
+# GFX940: v_mfma_f32_16x16x4_f32 v[10:13], v1, v2, v[6:9] ; encoding: [0x0a,0x00,0xc5,0xd3,0x01,0x05,0x1a,0x04]
+0x0a,0x00,0xc5,0xd3,0x01,0x05,0x1a,0x04
+
+# GFX940: v_mfma_f32_16x16x4_f32 v[252:255], a255, v255, v[252:255] ; encoding: [0xfc,0x00,0xc5,0xd3,0xff,0xff,0xf3,0x0f]
+0xfc,0x00,0xc5,0xd3,0xff,0xff,0xf3,0x0f
+
+# GFX940: v_mfma_f32_16x16x4_f32 a[10:13], v1, a2, a[6:9] cbsz:2 abid:7 blgp:3 ; encoding: [0x0a,0xba,0xc5,0xd3,0x01,0x05,0x1a,0x74]
+0x0a,0xba,0xc5,0xd3,0x01,0x05,0x1a,0x74
+
+# GFX940: v_mfma_f32_16x16x4_f32 a[252:255], a255, a255, a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xc5,0xd3,0xff,0xff,0xf3,0xff]
+0xfc,0xff,0xc5,0xd3,0xff,0xff,0xf3,0xff
+
+# GFX940: v_mfma_f32_32x32x1_2b_f32 v[224:255], v1, v2, v[224:255] ; encoding: [0xe0,0x00,0xc0,0xd3,0x01,0x05,0x82,0x07]
+0xe0,0x00,0xc0,0xd3,0x01,0x05,0x82,0x07
+
+# GFX940: v_mfma_f32_32x32x1_2b_f32 v[224:255], a1, v2, v[224:255] ; encoding: [0xe0,0x00,0xc0,0xd3,0x01,0x05,0x82,0x0f]
+0xe0,0x00,0xc0,0xd3,0x01,0x05,0x82,0x0f
+
+# GFX940: v_mfma_f32_32x32x1_2b_f32 v[224:255], v1, a2, v[224:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xe0,0x3a,0xc0,0xd3,0x01,0x05,0x82,0x77]
+0xe0,0x3a,0xc0,0xd3,0x01,0x05,0x82,0x77
+
+# GFX940: v_mfma_f32_32x32x1_2b_f32 a[224:255], a255, a255, a[224:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xe0,0xff,0xc0,0xd3,0xff,0xff,0x83,0xff]
+0xe0,0xff,0xc0,0xd3,0xff,0xff,0x83,0xff
+
+# GFX940: v_mfma_f32_32x32x2_f32 v[240:255], v1, v2, v[240:255] ; encoding: [0xf0,0x00,0xc4,0xd3,0x01,0x05,0xc2,0x07]
+0xf0,0x00,0xc4,0xd3,0x01,0x05,0xc2,0x07
+
+# GFX940: v_mfma_f32_32x32x2_f32 v[240:255], a1, v2, v[240:255] ; encoding: [0xf0,0x00,0xc4,0xd3,0x01,0x05,0xc2,0x0f]
+0xf0,0x00,0xc4,0xd3,0x01,0x05,0xc2,0x0f
+
+# GFX940: v_mfma_f32_32x32x2_f32 v[240:255], v1, a2, v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xc4,0xd3,0x01,0x05,0xc2,0x77]
+0xf0,0x3a,0xc4,0xd3,0x01,0x05,0xc2,0x77
+
+# GFX940: v_mfma_f32_32x32x2_f32 a[240:255], a255, a255, a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xc4,0xd3,0xff,0xff,0xc3,0xff]
+0xf0,0xff,0xc4,0xd3,0xff,0xff,0xc3,0xff
+
+# GFX940: v_mfma_f32_32x32x4_2b_f16 v[224:255], v[2:3], v[4:5], v[224:255] ; encoding: [0xe0,0x00,0xc8,0xd3,0x02,0x09,0x82,0x07]
+0xe0,0x00,0xc8,0xd3,0x02,0x09,0x82,0x07
+
+# GFX940: v_mfma_f32_32x32x4_2b_f16 v[224:255], a[2:3], v[4:5], v[224:255] ; encoding: [0xe0,0x00,0xc8,0xd3,0x02,0x09,0x82,0x0f]
+0xe0,0x00,0xc8,0xd3,0x02,0x09,0x82,0x0f
+
+# GFX940: v_mfma_f32_32x32x4_2b_f16 v[224:255], v[2:3], a[4:5], v[224:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xe0,0x3a,0xc8,0xd3,0x02,0x09,0x82,0x77]
+0xe0,0x3a,0xc8,0xd3,0x02,0x09,0x82,0x77
+
+# GFX940: v_mfma_f32_32x32x4_2b_f16 a[224:255], a[254:255], a[254:255], a[224:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xe0,0xff,0xc8,0xd3,0xfe,0xfd,0x83,0xff]
+0xe0,0xff,0xc8,0xd3,0xfe,0xfd,0x83,0xff
+
+# GFX940: v_mfma_f32_32x32x8_f16 v[240:255], v[2:3], v[4:5], v[240:255] ; encoding: [0xf0,0x00,0xcc,0xd3,0x02,0x09,0xc2,0x07]
+0xf0,0x00,0xcc,0xd3,0x02,0x09,0xc2,0x07
+
+# GFX940: v_mfma_f32_32x32x8_f16 v[240:255], a[2:3], v[4:5], v[240:255] ; encoding: [0xf0,0x00,0xcc,0xd3,0x02,0x09,0xc2,0x0f]
+0xf0,0x00,0xcc,0xd3,0x02,0x09,0xc2,0x0f
+
+# GFX940: v_mfma_f32_32x32x8_f16 v[240:255], v[2:3], a[4:5], v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xcc,0xd3,0x02,0x09,0xc2,0x77]
+0xf0,0x3a,0xcc,0xd3,0x02,0x09,0xc2,0x77
+
+# GFX940: v_mfma_f32_32x32x8_f16 a[240:255], a[254:255], a[254:255], a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xcc,0xd3,0xfe,0xfd,0xc3,0xff]
+0xf0,0xff,0xcc,0xd3,0xfe,0xfd,0xc3,0xff
+
+# GFX940: v_mfma_f32_4x4x1_16b_f32 v[10:13], v1, v2, v[6:9] ; encoding: [0x0a,0x00,0xc2,0xd3,0x01,0x05,0x1a,0x04]
+0x0a,0x00,0xc2,0xd3,0x01,0x05,0x1a,0x04
+
+# GFX940: v_mfma_f32_4x4x1_16b_f32 v[252:255], a255, v255, v[252:255] ; encoding: [0xfc,0x00,0xc2,0xd3,0xff,0xff,0xf3,0x0f]
+0xfc,0x00,0xc2,0xd3,0xff,0xff,0xf3,0x0f
+
+# GFX940: v_mfma_f32_4x4x1_16b_f32 a[10:13], v1, a2, a[6:9] cbsz:2 abid:7 blgp:3 ; encoding: [0x0a,0xba,0xc2,0xd3,0x01,0x05,0x1a,0x74]
+0x0a,0xba,0xc2,0xd3,0x01,0x05,0x1a,0x74
+
+# GFX940: v_mfma_f32_4x4x1_16b_f32 a[252:255], a255, a255, a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xc2,0xd3,0xff,0xff,0xf3,0xff]
+0xfc,0xff,0xc2,0xd3,0xff,0xff,0xf3,0xff
+
+# GFX940: v_mfma_f32_4x4x4_16b_f16 v[10:13], v[2:3], v[4:5], v[6:9] ; encoding: [0x0a,0x00,0xca,0xd3,0x02,0x09,0x1a,0x04]
+0x0a,0x00,0xca,0xd3,0x02,0x09,0x1a,0x04
+
+# GFX940: v_mfma_f32_4x4x4_16b_f16 v[252:255], a[254:255], v[254:255], v[252:255] ; encoding: [0xfc,0x00,0xca,0xd3,0xfe,0xfd,0xf3,0x0f]
+0xfc,0x00,0xca,0xd3,0xfe,0xfd,0xf3,0x0f
+
+# GFX940: v_mfma_f32_4x4x4_16b_f16 a[10:13], v[2:3], a[4:5], a[6:9] cbsz:2 abid:7 blgp:3 ; encoding: [0x0a,0xba,0xca,0xd3,0x02,0x09,0x1a,0x74]
+0x0a,0xba,0xca,0xd3,0x02,0x09,0x1a,0x74
+
+# GFX940: v_mfma_f32_4x4x4_16b_f16 a[252:255], a[254:255], a[254:255], a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xca,0xd3,0xfe,0xfd,0xf3,0xff]
+0xfc,0xff,0xca,0xd3,0xfe,0xfd,0xf3,0xff
+
+# GFX940: v_mfma_i32_16x16x4_4b_i8 v[240:255], a1, a2, v[240:255] ; encoding: [0xf0,0x00,0xd1,0xd3,0x01,0x05,0xc2,0x1f]
+0xf0,0x00,0xd1,0xd3,0x01,0x05,0xc2,0x1f
+
+# GFX940: v_mfma_i32_16x16x4_4b_i8 v[240:255], v1, a2, v[240:255] ; encoding: [0xf0,0x00,0xd1,0xd3,0x01,0x05,0xc2,0x17]
+0xf0,0x00,0xd1,0xd3,0x01,0x05,0xc2,0x17
+
+# GFX940: v_mfma_i32_16x16x4_4b_i8 v[240:255], a1, v2, v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xd1,0xd3,0x01,0x05,0xc2,0x6f]
+0xf0,0x3a,0xd1,0xd3,0x01,0x05,0xc2,0x6f
+
+# GFX940: v_mfma_i32_16x16x4_4b_i8 a[240:255], a255, a255, a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xd1,0xd3,0xff,0xff,0xc3,0xff]
+0xf0,0xff,0xd1,0xd3,0xff,0xff,0xc3,0xff
+
+# GFX940: v_mfma_i32_32x32x4_2b_i8 v[224:255], v1, v2, v[224:255] ; encoding: [0xe0,0x00,0xd0,0xd3,0x01,0x05,0x82,0x07]
+0xe0,0x00,0xd0,0xd3,0x01,0x05,0x82,0x07
+
+# GFX940: v_mfma_i32_32x32x4_2b_i8 v[224:255], v1, a2, v[224:255] ; encoding: [0xe0,0x00,0xd0,0xd3,0x01,0x05,0x82,0x17]
+0xe0,0x00,0xd0,0xd3,0x01,0x05,0x82,0x17
+
+# GFX940: v_mfma_i32_32x32x4_2b_i8 v[224:255], a1, v2, v[224:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xe0,0x3a,0xd0,0xd3,0x01,0x05,0x82,0x6f]
+0xe0,0x3a,0xd0,0xd3,0x01,0x05,0x82,0x6f
+
+# GFX940: v_mfma_i32_32x32x4_2b_i8 a[224:255], a255, a255, a[224:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xe0,0xff,0xd0,0xd3,0xff,0xff,0x83,0xff]
+0xe0,0xff,0xd0,0xd3,0xff,0xff,0x83,0xff
+
+# GFX940: v_mfma_i32_4x4x4_16b_i8 v[10:13], v1, v2, v[6:9] ; encoding: [0x0a,0x00,0xd2,0xd3,0x01,0x05,0x1a,0x04]
+0x0a,0x00,0xd2,0xd3,0x01,0x05,0x1a,0x04
+
+# GFX940: v_mfma_i32_4x4x4_16b_i8 v[252:255], a255, v255, v[252:255] ; encoding: [0xfc,0x00,0xd2,0xd3,0xff,0xff,0xf3,0x0f]
+0xfc,0x00,0xd2,0xd3,0xff,0xff,0xf3,0x0f
+
+# GFX940: v_mfma_i32_4x4x4_16b_i8 a[10:13], v1, a2, a[6:9] cbsz:2 abid:7 blgp:3 ; encoding: [0x0a,0xba,0xd2,0xd3,0x01,0x05,0x1a,0x74]
+0x0a,0xba,0xd2,0xd3,0x01,0x05,0x1a,0x74
+
+# GFX940: v_mfma_i32_4x4x4_16b_i8 a[252:255], a255, a255, a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xd2,0xd3,0xff,0xff,0xf3,0xff]
+0xfc,0xff,0xd2,0xd3,0xff,0xff,0xf3,0xff