[AMDGPU][MC] Correct image_gather4h
authorDmitry Preobrazhensky <dmitri.preobrazhenski@gmail.com>
Tue, 11 Oct 2022 11:40:23 +0000 (14:40 +0300)
committerDmitry Preobrazhensky <dmitri.preobrazhenski@gmail.com>
Tue, 11 Oct 2022 11:41:27 +0000 (14:41 +0300)
Correct encoding of image_gather4h for GFX9; disable this instruction for SI, CI and VI.

Differential Revision: https://reviews.llvm.org/D135605

llvm/lib/Target/AMDGPU/MIMGInstructions.td
llvm/test/MC/AMDGPU/gfx7_unsupported.s
llvm/test/MC/AMDGPU/gfx8_unsupported.s
llvm/test/MC/AMDGPU/gfx90a_err.s
llvm/test/MC/AMDGPU/gfx9_asm_mimg.s
llvm/test/MC/Disassembler/AMDGPU/gfx9_mimg.txt

index 19eb39e..533af65 100644 (file)
@@ -97,7 +97,7 @@ def MIMG {
 class mimgopc <int gfx11, int gfx10m, int vi = gfx10m, int si = gfx10m> {
   field bits<8> GFX11 = gfx11;
   field bits<8> GFX10M = gfx10m; // GFX10minus for all but atomics
-  field bits<8> VI = vi; // VI is only used for atomic instructions
+  field bits<8> VI = vi; // VI is only used for atomic/sampler/gather instructions
   field bits<8> SI = si; // SI is only used for atomic instructions
   bit HAS_GFX11 = !ne(gfx11, MIMG.NOP);
   bit HAS_GFX10M = !ne(gfx10m, MIMG.NOP);
@@ -872,7 +872,7 @@ multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0>
 
 class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,
                            RegisterClass src_rc, string dns="">
-  : MIMG_gfx6789 <op.GFX10M, (outs dst_rc:$vdata), dns> {
+  : MIMG_gfx6789 <op.VI, (outs dst_rc:$vdata), dns> {
   let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
                                 DMask:$dmask, UNorm:$unorm, CPol:$cpol,
                                 R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
@@ -1329,7 +1329,9 @@ defm IMAGE_GATHER4_C_L_O        : MIMG_Gather <mimgopc<MIMG.NOP, 0x5c>, AMDGPUSa
 defm IMAGE_GATHER4_C_B_O        : MIMG_Gather_WQM <mimgopc<MIMG.NOP, 0x5d>, AMDGPUSample_c_b_o>;
 defm IMAGE_GATHER4_C_B_CL_O     : MIMG_Gather_WQM <mimgopc<MIMG.NOP, 0x5e>, AMDGPUSample_c_b_cl_o>;
 defm IMAGE_GATHER4_C_LZ_O       : MIMG_Gather <mimgopc<0x37, 0x5f>, AMDGPUSample_c_lz_o>;
-defm IMAGE_GATHER4H             : MIMG_Gather <mimgopc<0x90, 0x61>, AMDGPUSample, 1, "image_gather4h">;
+
+let SubtargetPredicate = isGFX9Plus in
+defm IMAGE_GATHER4H             : MIMG_Gather <mimgopc<0x90, 0x61, 0x42>, AMDGPUSample, 1, "image_gather4h">;
 
 defm IMAGE_GET_LOD              : MIMG_Sampler <mimgopc<0x38, 0x60>, AMDGPUSample, 1, 0, 1, "image_get_lod">;
 
index e295f4f..88b5774 100644 (file)
@@ -2594,6 +2594,9 @@ image_sample_d_g16 v[5:6], v[1:3], s[8:15], s[12:15] dmask:0x3
 image_sample_d_o_g16 v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3
 // CHECK: error: instruction not supported on this GPU
 
+image_gather4h v[251:254], v[1:2], s[8:15], s[12:15] dmask:0x1
+// CHECK: error: instruction not supported on this GPU
+
 v_interp_mov_f32_e64 v255, p10, attr0.x
 // CHECK: error: e64 variant of this instruction is not supported
 
index 1e06a1d..cf2e7a2 100644 (file)
@@ -52,6 +52,9 @@ image_sample_d_g16 v[5:6], v[1:3], s[8:15], s[12:15] dmask:0x3
 image_sample_d_o_g16 v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x3
 // CHECK: error: instruction not supported on this GPU
 
+image_gather4h v[251:254], v[1:2], s[8:15], s[12:15] dmask:0x1
+// CHECK: error: instruction not supported on this GPU
+
 buffer_atomic_add_f32 v255, off, s[8:11], s3 offset:4095
 // CHECK: error: instruction not supported on this GPU
 
index a69e952..bf77eef 100644 (file)
@@ -96,6 +96,9 @@ ds_write_src2_b64 v1
 image_gather4 v[5:8], v1, s[8:15], s[12:15]
 // GFX90A: error: instruction not supported on this GPU
 
+image_gather4h v[251:254], v[1:2], s[8:15], s[12:15] dmask:0x1
+// GFX90A: error: instruction not supported on this GPU
+
 image_get_lod v5, v1, s[8:15], s[12:15]
 // GFX90A: error: instruction not supported on this GPU
 
index bfda147..21e1f3d 100644 (file)
@@ -6201,6 +6201,30 @@ image_gather4_c_lz_o v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x1 da
 image_gather4_c_lz_o v[5:6], v[1:4], s[8:15], s[12:15] dmask:0x1 d16
 // CHECK: [0x00,0x01,0x7c,0xf1,0x01,0x05,0x62,0x80]
 
+image_gather4h v[5:8], v[1:2], s[8:15], s[12:15] dmask:0x1
+// CHECK: encoding: [0x00,0x01,0x08,0xf1,0x01,0x05,0x62,0x00]
+
+image_gather4h v[5:8], v[254:255], s[8:15], s[12:15] dmask:0x2
+// CHECK: encoding: [0x00,0x02,0x08,0xf1,0xfe,0x05,0x62,0x00]
+
+image_gather4h v[5:8], v1, s[8:15], s[12:15] dmask:0x4 a16
+// CHECK: [0x00,0x84,0x08,0xf1,0x01,0x05,0x62,0x00]
+
+image_gather4h v[5:8], v255, s[8:15], s[12:15] dmask:0x8 a16
+// CHECK: encoding: [0x00,0x88,0x08,0xf1,0xff,0x05,0x62,0x00]
+
+image_gather4h v[5:8], v[1:3], s[8:15], s[12:15] dmask:0x4
+// CHECK: [0x00,0x04,0x08,0xf1,0x01,0x05,0x62,0x00]
+
+image_gather4h v[252:255], v[253:255], s[8:15], s[12:15] dmask:0x4
+// CHECK: [0x00,0x04,0x08,0xf1,0xfd,0xfc,0x62,0x00]
+
+image_gather4h v[5:6], v[1:2], s[92:99], s[96:99] dmask:0x4 a16 d16
+// CHECK: [0x00,0x84,0x08,0xf1,0x01,0x05,0x17,0x83]
+
+image_gather4h v[254:255], v[254:255], ttmp[8:15], ttmp[12:15] dmask:0x4 unorm glc slc a16 lwe da d16
+// CHECK: [0x00,0xf4,0x0a,0xf3,0xfe,0xfe,0xdd,0x83]
+
 image_get_lod v5, v1, s[8:15], s[12:15] dmask:0x1
 // CHECK: [0x00,0x01,0x80,0xf1,0x01,0x05,0x62,0x00]
 
index b6e74d3..21120a6 100644 (file)
 
 # CHECK: image_gather4_lz_o v[5:6], v[1:2], s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x5c,0xf1,0x01,0x05,0x62,0x80]
 0x00,0x01,0x5c,0xf1,0x01,0x05,0x62,0x80
+
+# CHECK: image_gather4h v[5:8], v1, s[8:15], s[12:15] dmask:0x1 ; encoding: [0x00,0x01,0x08,0xf1,0x01,0x05,0x62,0x00]
+0x00,0x01,0x08,0xf1,0x01,0x05,0x62,0x00
+
+# CHECK: image_gather4h v[5:8], v255, s[8:15], s[12:15] dmask:0x2 ; encoding: [0x00,0x02,0x08,0xf1,0xff,0x05,0x62,0x00]
+0x00,0x02,0x08,0xf1,0xff,0x05,0x62,0x00
+
+# CHECK: image_gather4h v[5:8], v255, s[8:15], s[12:15] dmask:0x4 ; encoding: [0x00,0x04,0x08,0xf1,0xff,0x05,0x62,0x00]
+0x00,0x04,0x08,0xf1,0xff,0x05,0x62,0x00
+
+# CHECK: image_gather4h v[5:8], v255, s[8:15], s[12:15] dmask:0x8 a16 ; encoding: [0x00,0x88,0x08,0xf1,0xff,0x05,0x62,0x00]
+0x00,0x88,0x08,0xf1,0xff,0x05,0x62,0x00
+
+# CHECK: image_gather4h v[5:6], v255, s[92:99], s[96:99] dmask:0x8 d16 ; encoding: [0x00,0x08,0x08,0xf1,0xff,0x05,0x17,0x83]
+0x00,0x08,0x08,0xf1,0xff,0x05,0x17,0x83
+
+# CHECK: image_gather4h v[252:255], v255, ttmp[8:15], ttmp[12:15] dmask:0x4 unorm glc slc a16 lwe da ; encoding: [0x00,0xf4,0x0a,0xf3,0xff,0xfc,0xdd,0x03]
+0x00,0xf4,0x0a,0xf3,0xff,0xfc,0xdd,0x03