defm : IMAD32_Pats<V_MAD_U64_U32_gfx11_e64>;
def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3_OPSEL> {
- let Src0RC64 = VRegSrc_32;
- let Src1RC64 = SCSrc_b32;
- let Src2RC64 = SCSrc_b32;
let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0,
- IntOpSelMods:$src1_modifiers, SCSrc_b32:$src1,
- IntOpSelMods:$src2_modifiers, SCSrc_b32:$src2,
+ IntOpSelMods:$src1_modifiers, SSrc_b32:$src1,
+ IntOpSelMods:$src2_modifiers, SSrc_b32:$src2,
VGPR_32:$vdst_in, op_sel0:$op_sel);
let HasClamp = 0;
let HasExtVOP3DPP = 0;
; GCN-LABEL: {{^}}v_permlane16_b32_vll:
; FIXME-GFX10PLUS: It is allowed to have both immediates as literals
; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234
-; GFX10PLUS-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1
; GFX10PLUS-NOT: v_readfirstlane_b32
-; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}}
+; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], 0xc1d1{{$}}
define amdgpu_kernel void @v_permlane16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 {
%v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0)
store i32 %v, i32 addrspace(1)* %out
; GCN-LABEL: {{^}}v_permlanex16_b32_vll:
; FIXME-GFX10PLUS: It is allowed to have both immediates as literals
; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234
-; GFX10PLUS-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1
; GFX10PLUS-NOT: v_readfirstlane_b32
-; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}}
+; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], 0xc1d1{{$}}
define amdgpu_kernel void @v_permlanex16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 {
%v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0)
store i32 %v, i32 addrspace(1)* %out
v_permlane16_b32 v5, v1, -4.0, s3
// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0xef,0x0d,0x00]
+v_permlane16_b32 v5, v1, 0xaf123456, s3
+// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf]
+
v_permlane16_b32 v5, v1, s2, s103
// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0x9c,0x01]
v_permlane16_b32 v5, v1, s2, -4.0
// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0xdc,0x03]
+v_permlane16_b32 v5, v1, s2, 0xaf123456
+// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf]
+
+v_permlane16_b32 v5, v1, 0x12345678, 0x12345678
+// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12]
+
v_permlane16_b32 v5, v1, s2, s3 op_sel:[1,0]
// GFX10: encoding: [0x05,0x08,0x77,0xd7,0x01,0x05,0x0c,0x00]
v_permlanex16_b32 v5, v1, -4.0, s3
// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0xef,0x0d,0x00]
+v_permlanex16_b32 v5, v1, 0xaf123456, s3
+// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf]
+
v_permlanex16_b32 v5, v1, s2, s103
// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0x9c,0x01]
v_permlanex16_b32 v5, v1, s2, -4.0
// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0xdc,0x03]
+v_permlanex16_b32 v5, v1, s2, 0xaf123456
+// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf]
+
+v_permlanex16_b32 v5, v1, 0x12345678, 0x12345678
+// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12]
+
v_permlanex16_b32 v5, v1, s2, s3 op_sel:[1,0]
// GFX10: encoding: [0x05,0x08,0x78,0xd7,0x01,0x05,0x0c,0x00]
v_permlane16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1]
// GFX11: encoding: [0xff,0x10,0x5b,0xd6,0xff,0xfb,0xfd,0x01]
+v_permlane16_b32 v5, v1, 0xaf123456, s3
+// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf]
+
+v_permlane16_b32 v5, v1, s2, 0xaf123456
+// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf]
+
+v_permlane16_b32 v5, v1, 0x12345678, 0x12345678
+// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12]
+
v_permlanex16_b32 v5, v1, s2, s3
// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00]
v_permlanex16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1]
// GFX11: encoding: [0xff,0x10,0x5c,0xd6,0xff,0xfb,0xfd,0x01]
+v_permlanex16_b32 v5, v1, 0xaf123456, s3
+// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf]
+
+v_permlanex16_b32 v5, v1, s2, 0xaf123456
+// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf]
+
+v_permlanex16_b32 v5, v1, 0x12345678, 0x12345678
+// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12]
+
v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15]
// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01]
# GFX10: v_permlane16_b32 v5, v1, s103, s3 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0xcf,0x0c,0x00]
0x05,0x00,0x77,0xd7,0x01,0xcf,0x0c,0x00
+# GFX10: v_permlane16_b32 v5, v1, 0xaf123456, s3 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf]
+0x05,0x00,0x77,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf
+
# GFX10: v_permlane16_b32 v5, v1, s2, -1 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0x04,0x03]
0x05,0x00,0x77,0xd7,0x01,0x05,0x04,0x03
# GFX10: v_permlane16_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0x0c,0x00]
0x05,0x00,0x77,0xd7,0x01,0x05,0x0c,0x00
+# GFX10: v_permlane16_b32 v5, v1, s2, 0xaf123456 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf]
+0x05,0x00,0x77,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf
+
# GFX10: v_permlane16_b32 v5, v1, s2, s3 op_sel:[0,1] ; encoding: [0x05,0x10,0x77,0xd7,0x01,0x05,0x0c,0x00]
0x05,0x10,0x77,0xd7,0x01,0x05,0x0c,0x00
# GFX10: v_permlanex16_b32 v5, v1, s103, s3 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0xcf,0x0c,0x00]
0x05,0x00,0x78,0xd7,0x01,0xcf,0x0c,0x00
+# GFX10: v_permlanex16_b32 v5, v1, 0xaf123456, s3 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf]
+0x05,0x00,0x78,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf
+
# GFX10: v_permlanex16_b32 v5, v1, s2, -1 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0x04,0x03]
0x05,0x00,0x78,0xd7,0x01,0x05,0x04,0x03
# GFX10: v_permlanex16_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0x0c,0x00]
0x05,0x00,0x78,0xd7,0x01,0x05,0x0c,0x00
+# GFX10: v_permlanex16_b32 v5, v1, s2, 0xaf123456 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf]
+0x05,0x00,0x78,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf
+
# GFX10: v_permlanex16_b32 v5, v1, s2, s3 op_sel:[0,1] ; encoding: [0x05,0x10,0x78,0xd7,0x01,0x05,0x0c,0x00]
0x05,0x10,0x78,0xd7,0x01,0x05,0x0c,0x00