[AMDGPU] Fix operand class of v_ldexp_f16 src1
authorJoe Nash <Joseph.Nash@amd.com>
Mon, 12 Jun 2023 21:21:29 +0000 (17:21 -0400)
committerJoe Nash <Joseph.Nash@amd.com>
Mon, 19 Jun 2023 14:43:10 +0000 (10:43 -0400)
Patch eece6ba283bd changed the src1 type of v_ldexp_f16 from i32 to
i16. Though semantically src1 is an i16, the hardware reads this operand as an
f16 type, which primarily enables floating point inline constants.
Therefore this patch changes the operand type to f16. It maintains the
current behavior where floating point source modifiers are not allowed
on src1. SDWA sext modifier continues to be allowed.
The test asm and disasm test changes in eece6ba283bd are reverted,
because the floating point inline constants are allowed.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D153169

llvm/lib/Target/AMDGPU/VOP2Instructions.td
llvm/test/MC/AMDGPU/gfx10_asm_vop2.s
llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s
llvm/test/MC/AMDGPU/gfx8_asm_vop3.s
llvm/test/MC/AMDGPU/gfx9_asm_vop3.s
llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop2.txt
llvm/test/MC/Disassembler/AMDGPU/gfx8_vop3.txt
llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt

index 35429a4..ea7ab16 100644 (file)
@@ -863,9 +863,18 @@ def :  divergent_i64_BinOp <xor, V_XOR_B32_e64>;
 // 16-Bit Operand Instructions
 //===----------------------------------------------------------------------===//
 
-def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_I16> {
-  // The ldexp.f16 intrinsic expects a i32 src1 operand, though the hardware
-  // encoding treats src1 as an f16
+// The ldexp.f16 intrinsic expects a integer src1 operand, though the hardware
+// encoding treats src1 as an f16
+def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> {
+  let Src1Mod = Int32InputMods;
+  let Src1ModDPP = IntVRegInputMods;
+  let Src1ModVOP3DPP = IntVRegInputMods;
+  // SDWA sext is the only modifier allowed.
+  let HasSrc1IntMods = 1;
+  let HasSrc1FloatMods = 0;
+  let Src1ModSDWA = Int16SDWAInputMods;
+}
+def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_F16> {
   let Src1RC32 = RegisterOperand<VGPR_32_Lo128>;
   let Src1DPP = VGPR_32_Lo128;
   let Src1ModDPP = IntT16VRegInputMods;
@@ -874,9 +883,9 @@ def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_I16> {
 let isReMaterializable = 1 in {
 let FPDPRounding = 1 in {
   let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts]  in
-    defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I16, any_fldexp>;
+    defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", LDEXP_F16_VOPProfile>;
   let SubtargetPredicate = HasTrue16BitInsts in
-    defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16, any_fldexp>;
+    defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16>;
 } // End FPDPRounding = 1
 // FIXME VOP3 Only instructions. NFC using VOPProfile_True16 for these until a planned change to use a new register class for VOP3 encoded True16 instuctions
 defm V_LSHLREV_B16 : VOP2Inst_e64_t16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>;
@@ -899,6 +908,21 @@ defm V_MIN_I16 : VOP2Inst_e64_t16 <"v_min_i16", VOP_I16_I16_I16, smin>;
 } // End isCommutable = 1
 } // End isReMaterializable = 1
 
+class LDEXP_F16_Pat <SDPatternOperator op, VOP_Pseudo inst, VOPProfile P = inst.Pfl> : GCNPat <
+  (P.DstVT (op (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
+               (i16 (VOP3Mods0 P.Src1VT:$src1, i32:$src1_modifiers)))),
+  (inst $src0_modifiers, $src0,
+        $src1_modifiers, $src1,
+        $clamp, /* clamp */
+        $omod /* omod */)
+>;
+
+let OtherPredicates = [NotHasTrue16BitInsts] in
+def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_e64>;
+
+let OtherPredicates = [HasTrue16BitInsts] in
+def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>;
+
 let SubtargetPredicate = isGFX11Plus in {
   let isCommutable = 1 in {
     defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, and>;
index 6ea135c..b1b5400 100644 (file)
@@ -12931,11 +12931,11 @@ v_ldexp_f16_e64 v5, v1, 0
 v_ldexp_f16_e64 v5, v1, -1
 // GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0x83,0x01,0x00]
 
-v_ldexp_f16_e64 v5, v1, 0x3800
-// GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00]
+v_ldexp_f16_e64 v5, v1, 0.5
+// GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0xe1,0x01,0x00]
 
 v_ldexp_f16_e64 v5, v1, -4.0
-// GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00]
+// GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0xef,0x01,0x00]
 
 v_ldexp_f16_e64 v5, -v1, v2
 // GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0x05,0x02,0x20]
index ffb8310..43c7161 100644 (file)
@@ -734,7 +734,7 @@ v_ldexp_f16_e64 v5, ttmp15, src_scc
 // GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00]
 
 v_ldexp_f16_e64 v5, m0, 0.5
-// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00]
 
 v_ldexp_f16_e64 v5, exec_lo, -1
 // GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7e,0x82,0x01,0x00]
index d46a97f..d4c31f1 100644 (file)
@@ -1,5 +1,4 @@
-// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s
-// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding -filetype=null 2>&1 %s | FileCheck -check-prefix=ERR --implicit-check-not=error %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s
 
 v_interp_p1_f32_e64 v5, v2, attr0.x
 // CHECK: [0x05,0x00,0x70,0xd2,0x00,0x04,0x02,0x00]
@@ -12827,10 +12826,10 @@ v_ldexp_f16_e64 v5, v1, -1
 // CHECK: [0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00]
 
 v_ldexp_f16_e64 v5, v1, 0.5
-// ERR: [[@LINE-1]]:25: error: literal operands are not supported
+// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00]
 
 v_ldexp_f16_e64 v5, v1, -4.0
-// ERR: [[@LINE-1]]:25: error: literal operands are not supported
+// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00]
 
 v_ldexp_f16_e64 v5, v1, src_vccz
 // CHECK: [0x05,0x00,0x33,0xd1,0x01,0xf7,0x01,0x00]
index 34d0d77..8781a01 100644 (file)
@@ -1,5 +1,4 @@
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=ERR --implicit-check-not=error %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s
 
 v_interp_p1_f32_e64 v5, v2, attr0.x
 // CHECK: [0x05,0x00,0x70,0xd2,0x00,0x04,0x02,0x00]
@@ -11240,10 +11239,10 @@ v_ldexp_f16_e64 v5, v1, -1
 // CHECK: [0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00]
 
 v_ldexp_f16_e64 v5, v1, 0.5
-// ERR: [[@LINE-1]]:25: error: literal operands are not supported
+// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00]
 
 v_ldexp_f16_e64 v5, v1, -4.0
-// ERR: [[@LINE-1]]:25: error: literal operands are not supported
+// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00]
 
 v_ldexp_f16_e64 v5, v1, src_vccz
 // CHECK: [0x05,0x00,0x33,0xd1,0x01,0xf7,0x01,0x00]
index c5cd6f6..0785ba2 100644 (file)
 # GFX10: v_ldexp_f16_e64 v5, v1, -1              ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0x83,0x01,0x00]
 0x05,0x00,0x3b,0xd5,0x01,0x83,0x01,0x00
 
-# GFX10: v_ldexp_f16_e64 v5, v1, 0xc400          ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00]
+# GFX10: v_ldexp_f16_e64 v5, v1, -4.0            ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xef,0x01,0x00]
 0x05,0x00,0x3b,0xd5,0x01,0xef,0x01,0x00
 
 # GFX10: v_ldexp_f16_e64 v5, v1, 0               ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0x01,0x01,0x00]
 0x05,0x00,0x3b,0xd5,0x01,0x01,0x01,0x00
 
-# GFX10: v_ldexp_f16_e64 v5, v1, 0x3800          ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00]
+# GFX10: v_ldexp_f16_e64 v5, v1, 0.5             ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xe1,0x01,0x00]
 0x05,0x00,0x3b,0xd5,0x01,0xe1,0x01,0x00
 
 # GFX10: v_ldexp_f16_e64 v5, v1, exec_hi         ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x00,0x00]
index 4e430a8..3141e8f 100644 (file)
 # GFX11: v_ldexp_f16_e64 v5, ttmp15, src_scc     ; encoding: [0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00]
 0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00
 
-# GFX11: v_ldexp_f16_e64 v5, m0, 0x3800          ; encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# GFX11: v_ldexp_f16_e64 v5, m0, 0.5             ; encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00]
 0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00
 
 # GFX11: v_ldexp_f16_e64 v5, exec_lo, -1         ; encoding: [0x05,0x00,0x3b,0xd5,0x7e,0x82,0x01,0x00]
index a0277c7..2b07d62 100644 (file)
 # CHECK: v_ldexp_f16_e64 v5, v1, -1              ; encoding: [0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00]
 0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00
 
-# CHECK: v_ldexp_f16_e64 v5, v1, 0x3800          ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xff,0x01,0x00]
+# CHECK: v_ldexp_f16_e64 v5, v1, 0.5             ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00]
 0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00
 
-# CHECK: v_ldexp_f16_e64 v5, v1, 0xc400          ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xff,0x01,0x00]
+# CHECK: v_ldexp_f16_e64 v5, v1, -4.0            ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00]
 0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00
 
 # CHECK: v_ldexp_f16_e64 v5, -v1, v2             ; encoding: [0x05,0x00,0x33,0xd1,0x01,0x05,0x02,0x20]
index c2ac84b..e3ed977 100644 (file)
 # CHECK: v_ldexp_f16_e64 v5, v1, -1              ; encoding: [0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00]
 0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00
 
-# CHECK: v_ldexp_f16_e64 v5, v1, 0x3800          ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xff,0x01,0x00]
+# CHECK: v_ldexp_f16_e64 v5, v1, 0.5             ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00]
 0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00
 
-# CHECK: v_ldexp_f16_e64 v5, v1, 0xc400          ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xff,0x01,0x00]
+# CHECK: v_ldexp_f16_e64 v5, v1, -4.0            ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00]
 0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00
 
 # CHECK: v_ldexp_f16_e64 v5, -v1, v2             ; encoding: [0x05,0x00,0x33,0xd1,0x01,0x05,0x02,0x20]