[AMDGPU][MC] Corrected decoding of 16-bit literals
authorDmitry Preobrazhensky <dmitry.preobrazhensky@amd.com>
Wed, 22 Jul 2020 14:16:59 +0000 (17:16 +0300)
committerDmitry Preobrazhensky <dmitry.preobrazhensky@amd.com>
Wed, 22 Jul 2020 14:20:43 +0000 (17:20 +0300)
16-bit literals are encoded as 32-bit values. If high 16-bits of the value is 0xFFFF, the decoded instruction cannot be reassembled.

For example, the following code

0xff,0x04,0x04,0x52,0xcd,0xab,0xff,0xff

was decoded as

v_mul_lo_u16_e32 v2, 0xffffabcd, v2

However this literal is actually a 64-bit constant 0x00000000ffffabcd which violates requirements described in the documentation - the truncation is not safe.

This change corrects decoding to make reassembly possible.

Reviewers: arsenm, rampitec

Differential Revision: https://reviews.llvm.org/D84098

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
llvm/test/CodeGen/AMDGPU/add.i16.ll
llvm/test/CodeGen/AMDGPU/add.v2i16.ll
llvm/test/CodeGen/AMDGPU/imm16.ll
llvm/test/CodeGen/AMDGPU/sub.i16.ll
llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
llvm/test/MC/Disassembler/AMDGPU/literal16_vi.txt
llvm/test/MC/Disassembler/AMDGPU/vop1.txt
llvm/test/MC/Disassembler/AMDGPU/vop3-literal.txt

index 00bf404..257638b 100644 (file)
@@ -391,10 +391,12 @@ void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
                                             const MCSubtargetInfo &STI,
                                             raw_ostream &O) {
   int16_t SImm = static_cast<int16_t>(Imm);
-  if (isInlinableIntLiteral(SImm))
+  if (isInlinableIntLiteral(SImm)) {
     O << SImm;
-  else
-    O << formatHex(static_cast<uint64_t>(Imm));
+  } else {
+    uint64_t Imm16 = static_cast<uint16_t>(Imm);
+    O << formatHex(Imm16);
+  }
 }
 
 void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
@@ -425,8 +427,10 @@ void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
   else if (Imm == 0x3118) {
     assert(STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]);
     O << "0.15915494";
-  } else
-    O << formatHex(static_cast<uint64_t>(Imm));
+  } else {
+    uint64_t Imm16 = static_cast<uint16_t>(Imm);
+    O << formatHex(Imm16);
+  }
 }
 
 void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm,
index 6dfd23e..78a66a7 100644 (file)
@@ -109,8 +109,6 @@ private:
                            raw_ostream &O);
   void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
                         raw_ostream &O);
-  void printImmediateIntV216(uint32_t Imm, const MCSubtargetInfo &STI,
-                             raw_ostream &O);
   void printImmediateV216(uint32_t Imm, const MCSubtargetInfo &STI,
                           raw_ostream &O);
   void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI,
index 9884829..619cfb8 100644 (file)
@@ -36,7 +36,7 @@ define amdgpu_kernel void @v_test_add_i16_constant(i16 addrspace(1)* %out, i16 a
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
 ; GCN-LABEL: {{^}}v_test_add_i16_neg_constant:
 ; VI: flat_load_ushort [[A:v[0-9]+]]
-; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0xfffffcb3, [[A]]
+; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0xfcb3, [[A]]
 ; VI-NEXT: buffer_store_short [[ADD]]
 define amdgpu_kernel void @v_test_add_i16_neg_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
index 09f9abc..5e85f88 100644 (file)
@@ -89,7 +89,7 @@ define amdgpu_kernel void @v_test_add_v2i16_constant(<2 x i16> addrspace(1)* %ou
 ; GFX9: s_mov_b32 [[CONST:s[0-9]+]], 0xfc21fcb3{{$}}
 ; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]]
 
-; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xfffffcb3, v{{[0-9]+}}
+; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xfcb3, v{{[0-9]+}}
 ; VI-DAG: v_mov_b32_e32 v[[SCONST:[0-9]+]], 0xfffffc21
 ; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 define amdgpu_kernel void @v_test_add_v2i16_neg_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
index 534b04e..8aebe29 100644 (file)
@@ -1438,7 +1438,7 @@ define void @mul_inline_imm_neg_0.5_i16(i16 addrspace(1)* %out, i16 %x) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
-; GFX10-NEXT:    v_mul_lo_u16_e64 v2, 0xffffb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff]
+; GFX10-NEXT:    v_mul_lo_u16_e64 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff]
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
@@ -1447,7 +1447,7 @@ define void @mul_inline_imm_neg_0.5_i16(i16 addrspace(1)* %out, i16 %x) {
 ; VI-LABEL: mul_inline_imm_neg_0.5_i16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xffffb800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xb8,0xff,0xff]
+; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xb800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xb8,0xff,0xff]
 ; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf]
 ; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
@@ -1510,7 +1510,7 @@ define void @mul_inline_imm_neg_1.0_i16(i16 addrspace(1)* %out, i16 %x) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
-; GFX10-NEXT:    v_mul_lo_u16_e64 v2, 0xffffbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff]
+; GFX10-NEXT:    v_mul_lo_u16_e64 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff]
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
@@ -1519,7 +1519,7 @@ define void @mul_inline_imm_neg_1.0_i16(i16 addrspace(1)* %out, i16 %x) {
 ; VI-LABEL: mul_inline_imm_neg_1.0_i16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xffffbc00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xbc,0xff,0xff]
+; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xbc00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xbc,0xff,0xff]
 ; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf]
 ; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
@@ -1583,7 +1583,7 @@ define void @shl_inline_imm_neg_2.0_i16(i16 addrspace(1)* %out, i16 %x) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
-; GFX10-NEXT:    v_lshlrev_b16_e64 v2, v2, 0xffffc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff]
+; GFX10-NEXT:    v_lshlrev_b16_e64 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff]
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
@@ -1656,7 +1656,7 @@ define void @mul_inline_imm_neg_4.0_i16(i16 addrspace(1)* %out, i16 %x) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
-; GFX10-NEXT:    v_mul_lo_u16_e64 v2, 0xffffc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff]
+; GFX10-NEXT:    v_mul_lo_u16_e64 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff]
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
@@ -1665,7 +1665,7 @@ define void @mul_inline_imm_neg_4.0_i16(i16 addrspace(1)* %out, i16 %x) {
 ; VI-LABEL: mul_inline_imm_neg_4.0_i16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xffffc400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xc4,0xff,0xff]
+; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xc400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xc4,0xff,0xff]
 ; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf]
 ; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
index 5f98b7b..7fe003f 100644 (file)
@@ -22,7 +22,7 @@ define amdgpu_kernel void @v_test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
 ; GCN-LABEL: {{^}}v_test_sub_i16_constant:
 ; VI: flat_load_ushort [[A:v[0-9]+]]
-; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0xffffff85, [[A]]
+; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0xff85, [[A]]
 ; VI-NEXT: buffer_store_short [[ADD]]
 define amdgpu_kernel void @v_test_sub_i16_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
index 4353613..30b681f 100644 (file)
@@ -194,7 +194,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_constant(<2 x i16> addrspace(1)* %ou
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
 ; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_add_u16_e32 v2, 0xffffff85, v0
+; VI-NEXT:    v_add_u16_e32 v2, 0xff85, v0
 ; VI-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-NEXT:    v_or_b32_e32 v0, v2, v0
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
index a3cdae3..466932e 100644 (file)
 0xff 0x06 0x02 0x3e 0x00 0x01 0x00 0x00
 
 # non-zero unused bits in constant
-# VI: v_add_f16_e32 v1, 0x10041, v3 ; encoding: [0xff,0x06,0x02,0x3e,0x41,0x00,0x01,0x00]
+# VI: v_add_f16_e32 v1, 0x41, v3 ; encoding: [0xff,0x06,0x02,0x3e,0x41,0x00,0x01,0x00]
 0xff 0x06 0x02 0x3e 0x41 0x00 0x01 0x00
 
-# VI: v_add_f16_e32 v1, 0x1000041, v3 ; encoding: [0xff,0x06,0x02,0x3e,0x41,0x00,0x00,0x01]
+# VI: v_add_f16_e32 v1, 0x41, v3 ; encoding: [0xff,0x06,0x02,0x3e,0x41,0x00,0x00,0x01]
 0xff 0x06 0x02 0x3e 0x41 0x00 0x00 0x01
 
 # FIXME: This should be able to round trip with literal after instruction
 # VI: v_add_f16_e32 v1, 0, v3 ; encoding: [0x80,0x06,0x02,0x3e]
 0xff 0x06 0x02 0x3e 0x00 0x00 0x00 0x00
 
+# VI: v_add_f16_e32 v1, 0xffcd, v3 ; encoding: [0xff,0x06,0x02,0x3e,0xcd,0xff,0xff,0xff]
+0xff 0x06 0x02 0x3e 0xcd 0xff 0xff 0xff
+
+# VI: v_mul_lo_u16_e32 v2, 0xffcd, v2 ; encoding: [0xff,0x04,0x04,0x52,0xcd,0xff,0xff,0xff]
+0xff 0x04 0x04 0x52 0xcd 0xff 0xff 0xff
+
 # VI: v_madmk_f16 v1, v2, 0x41, v3 ; encoding: [0x02,0x07,0x02,0x48,0x41,0x00,0x00,0x00]
 0x02 0x07 0x02 0x48 0x41 0x00 0x00 0x00
 
index 3e65f61..2d481d2 100644 (file)
 # CHECK: v_cvt_i32_f64_e32 v2, 0x4236b732 ; encoding: [0xff,0x06,0x04,0x7e,0x32,0xb7,0x36,0x42]
 0xff 0x06 0x04 0x7e 0x32 0xb7 0x36 0x42
 
-# CHECK: v_cvt_f16_u16_e32 v123, 0x3ade68b1 ; encoding: [0xff,0x72,0xf6,0x7e,0xb1,0x68,0xde,0x3a]
+# CHECK: v_cvt_f16_u16_e32 v123, 0x68b1 ; encoding: [0xff,0x72,0xf6,0x7e,0xb1,0x68,0xde,0x3a]
 0xff 0x72 0xf6 0x7e 0xb1 0x68 0xde 0x3a
 
 # CHECK: v_cvt_f16_i16_e32 v123, 0x21c2  ; encoding: [0xff,0x74,0xf6,0x7e,0xc2,0x21,0x00,0x00]
index 6c4cdea..d9d8972 100644 (file)
 
 # GFX10: v_pk_add_u16 v1, 0xffffff9c, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x9c,0xff,0xff,0xff]
 0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x9c,0xff,0xff,0xff
+
+# GFX10: v_add_nc_i16 v5, v1, 0xcdab ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0xff,0x01,0x00,0xab,0xcd,0xff,0xff]
+0x05,0x00,0x0d,0xd7,0x01,0xff,0x01,0x00,0xab,0xcd,0xff,0xff
+
+# GFX10: v_ceil_f16_e64 v255, 0xabcd clamp ; encoding: [0xff,0x80,0xdc,0xd5,0xff,0x00,0x00,0x00,0xcd,0xab,0xff,0xff]
+0xff,0x80,0xdc,0xd5,0xff,0x00,0x00,0x00,0xcd,0xab,0xff,0xff
+
+# GFX10: v_min_u16_e64 v5, v1, 0xabcd ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xff,0x01,0x00,0xcd,0xab,0xff,0xff]
+0x05,0x00,0x0b,0xd7,0x01,0xff,0x01,0x00,0xcd,0xab,0xff,0xff