From: Dmitry Preobrazhensky Date: Wed, 22 Jul 2020 14:16:59 +0000 (+0300) Subject: [AMDGPU][MC] Corrected decoding of 16-bit literals X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0b8fd77ad953e6968242a131a5ed3b881c19daac;p=platform%2Fupstream%2Fllvm.git [AMDGPU][MC] Corrected decoding of 16-bit literals 16-bit literals are encoded as 32-bit values. If high 16-bits of the value is 0xFFFF, the decoded instruction cannot be reassembled. For example, the following code 0xff,0x04,0x04,0x52,0xcd,0xab,0xff,0xff was decoded as v_mul_lo_u16_e32 v2, 0xffffabcd, v2 However this literal is actually a 64-bit constant 0x00000000ffffabcd which violates requirements described in the documentation - the truncation is not safe. This change corrects decoding to make reassembly possible. Reviewers: arsenm, rampitec Differential Revision: https://reviews.llvm.org/D84098 --- diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index 00bf404..257638b 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -391,10 +391,12 @@ void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI, raw_ostream &O) { int16_t SImm = static_cast(Imm); - if (isInlinableIntLiteral(SImm)) + if (isInlinableIntLiteral(SImm)) { O << SImm; - else - O << formatHex(static_cast(Imm)); + } else { + uint64_t Imm16 = static_cast(Imm); + O << formatHex(Imm16); + } } void AMDGPUInstPrinter::printImmediate16(uint32_t Imm, @@ -425,8 +427,10 @@ void AMDGPUInstPrinter::printImmediate16(uint32_t Imm, else if (Imm == 0x3118) { assert(STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]); O << "0.15915494"; - } else - O << formatHex(static_cast(Imm)); + } else { + uint64_t Imm16 = static_cast(Imm); + O << formatHex(Imm16); + } } void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index 6dfd23e..78a66a7 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -109,8 +109,6 @@ private: raw_ostream &O); void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI, raw_ostream &O); - void printImmediateIntV216(uint32_t Imm, const MCSubtargetInfo &STI, - raw_ostream &O); void printImmediateV216(uint32_t Imm, const MCSubtargetInfo &STI, raw_ostream &O); void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI, diff --git a/llvm/test/CodeGen/AMDGPU/add.i16.ll b/llvm/test/CodeGen/AMDGPU/add.i16.ll index 9884829..619cfb8 100644 --- a/llvm/test/CodeGen/AMDGPU/add.i16.ll +++ b/llvm/test/CodeGen/AMDGPU/add.i16.ll @@ -36,7 +36,7 @@ define amdgpu_kernel void @v_test_add_i16_constant(i16 addrspace(1)* %out, i16 a ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_add_i16_neg_constant: ; VI: flat_load_ushort [[A:v[0-9]+]] -; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0xfffffcb3, [[A]] +; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0xfcb3, [[A]] ; VI-NEXT: buffer_store_short [[ADD]] define amdgpu_kernel void @v_test_add_i16_neg_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll index 09f9abc..5e85f88 100644 --- a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll @@ -89,7 +89,7 @@ define amdgpu_kernel void @v_test_add_v2i16_constant(<2 x i16> addrspace(1)* %ou ; GFX9: s_mov_b32 [[CONST:s[0-9]+]], 0xfc21fcb3{{$}} ; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]] -; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xfffffcb3, v{{[0-9]+}} +; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xfcb3, v{{[0-9]+}} ; VI-DAG: v_mov_b32_e32 v[[SCONST:[0-9]+]], 0xfffffc21 ; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD define amdgpu_kernel void @v_test_add_v2i16_neg_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { diff --git a/llvm/test/CodeGen/AMDGPU/imm16.ll b/llvm/test/CodeGen/AMDGPU/imm16.ll index 534b04e..8aebe29 100644 --- a/llvm/test/CodeGen/AMDGPU/imm16.ll +++ b/llvm/test/CodeGen/AMDGPU/imm16.ll @@ -1438,7 +1438,7 @@ define void @mul_inline_imm_neg_0.5_i16(i16 addrspace(1)* %out, i16 %x) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0xffffb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff] +; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff] ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] @@ -1447,7 +1447,7 @@ define void @mul_inline_imm_neg_0.5_i16(i16 addrspace(1)* %out, i16 %x) { ; VI-LABEL: mul_inline_imm_neg_0.5_i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; VI-NEXT: v_mul_lo_u16_e32 v2, 0xffffb800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xb8,0xff,0xff] +; VI-NEXT: v_mul_lo_u16_e32 v2, 0xb800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xb8,0xff,0xff] ; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf] ; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] @@ -1510,7 +1510,7 @@ define void @mul_inline_imm_neg_1.0_i16(i16 addrspace(1)* %out, i16 %x) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0xffffbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff] +; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff] ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] @@ -1519,7 +1519,7 @@ define void @mul_inline_imm_neg_1.0_i16(i16 addrspace(1)* %out, i16 %x) { ; VI-LABEL: mul_inline_imm_neg_1.0_i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; VI-NEXT: v_mul_lo_u16_e32 v2, 0xffffbc00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xbc,0xff,0xff] +; VI-NEXT: v_mul_lo_u16_e32 v2, 0xbc00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xbc,0xff,0xff] ; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf] ; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] @@ -1583,7 +1583,7 @@ define void @shl_inline_imm_neg_2.0_i16(i16 addrspace(1)* %out, i16 %x) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_lshlrev_b16_e64 v2, v2, 0xffffc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff] +; GFX10-NEXT: v_lshlrev_b16_e64 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff] ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] @@ -1656,7 +1656,7 @@ define void @mul_inline_imm_neg_4.0_i16(i16 addrspace(1)* %out, i16 %x) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0xffffc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff] +; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff] ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] @@ -1665,7 +1665,7 @@ define void @mul_inline_imm_neg_4.0_i16(i16 addrspace(1)* %out, i16 %x) { ; VI-LABEL: mul_inline_imm_neg_4.0_i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; VI-NEXT: v_mul_lo_u16_e32 v2, 0xffffc400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xc4,0xff,0xff] +; VI-NEXT: v_mul_lo_u16_e32 v2, 0xc400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xc4,0xff,0xff] ; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf] ; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] diff --git a/llvm/test/CodeGen/AMDGPU/sub.i16.ll b/llvm/test/CodeGen/AMDGPU/sub.i16.ll index 5f98b7b..7fe003f 100644 --- a/llvm/test/CodeGen/AMDGPU/sub.i16.ll +++ b/llvm/test/CodeGen/AMDGPU/sub.i16.ll @@ -22,7 +22,7 @@ define amdgpu_kernel void @v_test_sub_i16(i16 addrspace(1)* %out, i16 addrspace( ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_sub_i16_constant: ; VI: flat_load_ushort [[A:v[0-9]+]] -; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0xffffff85, [[A]] +; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0xff85, [[A]] ; VI-NEXT: buffer_store_short [[ADD]] define amdgpu_kernel void @v_test_sub_i16_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll index 43536131..30b681f 100644 --- a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll @@ -194,7 +194,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_constant(<2 x i16> addrspace(1)* %ou ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_u16_e32 v2, 0xffffff85, v0 +; VI-NEXT: v_add_u16_e32 v2, 0xff85, v0 ; VI-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-NEXT: v_or_b32_e32 v0, v2, v0 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 diff --git a/llvm/test/MC/Disassembler/AMDGPU/literal16_vi.txt b/llvm/test/MC/Disassembler/AMDGPU/literal16_vi.txt index a3cdae3..466932e 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/literal16_vi.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/literal16_vi.txt @@ -34,16 +34,22 @@ 0xff 0x06 0x02 0x3e 0x00 0x01 0x00 0x00 # non-zero unused bits in constant -# VI: v_add_f16_e32 v1, 0x10041, v3 ; encoding: [0xff,0x06,0x02,0x3e,0x41,0x00,0x01,0x00] +# VI: v_add_f16_e32 v1, 0x41, v3 ; encoding: [0xff,0x06,0x02,0x3e,0x41,0x00,0x01,0x00] 0xff 0x06 0x02 0x3e 0x41 0x00 0x01 0x00 -# VI: v_add_f16_e32 v1, 0x1000041, v3 ; encoding: [0xff,0x06,0x02,0x3e,0x41,0x00,0x00,0x01] +# VI: v_add_f16_e32 v1, 0x41, v3 ; encoding: [0xff,0x06,0x02,0x3e,0x41,0x00,0x00,0x01] 0xff 0x06 0x02 0x3e 0x41 0x00 0x00 0x01 # FIXME: This should be able to round trip with literal after instruction # VI: v_add_f16_e32 v1, 0, v3 ; encoding: [0x80,0x06,0x02,0x3e] 0xff 0x06 0x02 0x3e 0x00 0x00 0x00 0x00 +# VI: v_add_f16_e32 v1, 0xffcd, v3 ; encoding: [0xff,0x06,0x02,0x3e,0xcd,0xff,0xff,0xff] +0xff 0x06 0x02 0x3e 0xcd 0xff 0xff 0xff + +# VI: v_mul_lo_u16_e32 v2, 0xffcd, v2 ; encoding: [0xff,0x04,0x04,0x52,0xcd,0xff,0xff,0xff] +0xff 0x04 0x04 0x52 0xcd 0xff 0xff 0xff + # VI: v_madmk_f16 v1, v2, 0x41, v3 ; encoding: [0x02,0x07,0x02,0x48,0x41,0x00,0x00,0x00] 0x02 0x07 0x02 0x48 0x41 0x00 0x00 0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/vop1.txt index 3e65f61..2d481d24 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/vop1.txt @@ -243,7 +243,7 @@ # CHECK: v_cvt_i32_f64_e32 v2, 0x4236b732 ; encoding: [0xff,0x06,0x04,0x7e,0x32,0xb7,0x36,0x42] 0xff 0x06 0x04 0x7e 0x32 0xb7 0x36 0x42 -# CHECK: v_cvt_f16_u16_e32 v123, 0x3ade68b1 ; encoding: [0xff,0x72,0xf6,0x7e,0xb1,0x68,0xde,0x3a] +# CHECK: v_cvt_f16_u16_e32 v123, 0x68b1 ; encoding: [0xff,0x72,0xf6,0x7e,0xb1,0x68,0xde,0x3a] 0xff 0x72 0xf6 0x7e 0xb1 0x68 0xde 0x3a # CHECK: v_cvt_f16_i16_e32 v123, 0x21c2 ; encoding: [0xff,0x74,0xf6,0x7e,0xc2,0x21,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/vop3-literal.txt b/llvm/test/MC/Disassembler/AMDGPU/vop3-literal.txt index 6c4cdea..d9d8972 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/vop3-literal.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/vop3-literal.txt @@ -47,3 +47,12 @@ # GFX10: v_pk_add_u16 v1, 0xffffff9c, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x9c,0xff,0xff,0xff] 0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x9c,0xff,0xff,0xff + +# GFX10: v_add_nc_i16 v5, v1, 0xcdab ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0xff,0x01,0x00,0xab,0xcd,0xff,0xff] +0x05,0x00,0x0d,0xd7,0x01,0xff,0x01,0x00,0xab,0xcd,0xff,0xff + +# GFX10: v_ceil_f16_e64 v255, 0xabcd clamp ; encoding: [0xff,0x80,0xdc,0xd5,0xff,0x00,0x00,0x00,0xcd,0xab,0xff,0xff] +0xff,0x80,0xdc,0xd5,0xff,0x00,0x00,0x00,0xcd,0xab,0xff,0xff + +# GFX10: v_min_u16_e64 v5, v1, 0xabcd ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xff,0x01,0x00,0xcd,0xab,0xff,0xff] +0x05,0x00,0x0b,0xd7,0x01,0xff,0x01,0x00,0xcd,0xab,0xff,0xff