From 3f0cdc7a11b30d67fa9e66d318c760c4434bd74e Mon Sep 17 00:00:00 2001 From: Konstantin Zhuravlyov Date: Thu, 17 Nov 2016 04:00:46 +0000 Subject: [PATCH] [AMDGPU] Promote f16/i16 conversions to f32/i32 llvm-svn: 287201 --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 60 +++++-------------------------- llvm/lib/Target/AMDGPU/SIISelLowering.h | 6 ---- llvm/test/CodeGen/AMDGPU/fp_to_sint.ll | 6 ++-- llvm/test/CodeGen/AMDGPU/fp_to_uint.ll | 10 +++--- llvm/test/CodeGen/AMDGPU/fptosi.f16.ll | 29 +++++++-------- llvm/test/CodeGen/AMDGPU/fptoui.f16.ll | 44 +++++++++++------------ llvm/test/CodeGen/AMDGPU/sitofp.f16.ll | 30 ++++++---------- llvm/test/CodeGen/AMDGPU/uitofp.f16.ll | 32 ++++++++--------- 8 files changed, 75 insertions(+), 142 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 1f90505..b72415b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -270,10 +270,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_TO_FP16, MVT::i16, Promote); AddPromotedToType(ISD::FP_TO_FP16, MVT::i16, MVT::i32); - setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i16, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i16, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); + setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); // F16 - Constant Actions. setOperationAction(ISD::ConstantFP, MVT::f16, Custom); @@ -287,6 +287,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, // F16 - VOP1 Actions. setOperationAction(ISD::FCOS, MVT::f16, Promote); setOperationAction(ISD::FSIN, MVT::f16, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::f16, Promote); + setOperationAction(ISD::FP_TO_UINT, MVT::f16, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::f16, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::f16, Promote); // F16 - VOP2 Actions. setOperationAction(ISD::BR_CC, MVT::f16, Expand); @@ -1828,12 +1832,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ConstantFP: return lowerConstantFP(Op, DAG); - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: - return lowerFpToInt(Op, DAG); - case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: - return lowerIntToFp(Op, DAG); } return SDValue(); } @@ -2045,48 +2043,6 @@ SDValue SITargetLowering::lowerConstantFP(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -SDValue SITargetLowering::lowerFpToInt(SDValue Op, SelectionDAG &DAG) const { - EVT DstVT = Op.getValueType(); - EVT SrcVT = Op.getOperand(0).getValueType(); - if (DstVT == MVT::i64) { - return Op.getOpcode() == ISD::FP_TO_SINT ? - AMDGPUTargetLowering::LowerFP_TO_SINT(Op, DAG) : - AMDGPUTargetLowering::LowerFP_TO_UINT(Op, DAG); - } - - if (SrcVT == MVT::f16) - return Op; - - SDLoc DL(Op); - SDValue OrigSrc = Op.getOperand(0); - SDValue FPRoundFlag = DAG.getIntPtrConstant(0, DL); - SDValue FPRoundSrc = - DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, OrigSrc, FPRoundFlag); - - return DAG.getNode(Op.getOpcode(), DL, DstVT, FPRoundSrc); -} - -SDValue SITargetLowering::lowerIntToFp(SDValue Op, SelectionDAG &DAG) const { - EVT DstVT = Op.getValueType(); - EVT SrcVT = Op.getOperand(0).getValueType(); - if (SrcVT == MVT::i64) { - return Op.getOpcode() == ISD::SINT_TO_FP ? - AMDGPUTargetLowering::LowerSINT_TO_FP(Op, DAG) : - AMDGPUTargetLowering::LowerUINT_TO_FP(Op, DAG); - } - - if (DstVT == MVT::f16) - return Op; - - SDLoc DL(Op); - SDValue OrigSrc = Op.getOperand(0); - SDValue SExtOrZExtOrTruncSrc = Op.getOpcode() == ISD::SINT_TO_FP ? - DAG.getSExtOrTrunc(OrigSrc, DL, MVT::i32) : - DAG.getZExtOrTrunc(OrigSrc, DL, MVT::i32); - - return DAG.getNode(Op.getOpcode(), DL, DstVT, SExtOrZExtOrTruncSrc); -} - SDValue SITargetLowering::getSegmentAperture(unsigned AS, SelectionDAG &DAG) const { SDLoc SL; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 32a3267..2cbfe11 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -56,12 +56,6 @@ class SITargetLowering final : public AMDGPUTargetLowering { /// \brief Custom lowering for ISD::ConstantFP. SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const; - /// \brief Custom lowering for ISD::FP_TO_SINT, ISD::FP_TO_UINT. - SDValue lowerFpToInt(SDValue Op, SelectionDAG &DAG) const; - - /// \brief Custom lowering for ISD::SINT_TO_FP, ISD::UINT_TO_FP. - SDValue lowerIntToFp(SDValue Op, SelectionDAG &DAG) const; - SDValue getSegmentAperture(unsigned AS, SelectionDAG &DAG) const; SDValue lowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const; SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll index a273f34..381c375 100644 --- a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll +++ b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll @@ -249,10 +249,8 @@ define void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { } ; FUNC-LABEL: {{^}}fp_to_sint_f32_i16: -; SI: v_cvt_i32_f32_e32 v[[VAL:[0-9]+]], s{{[0-9]+}} -; VI: v_cvt_f16_f32_e32 v[[IN_F16:[0-9]+]], s{{[0-9]+}} -; VI: v_cvt_i16_f16_e32 v[[VAL:[0-9]+]], v[[IN_F16]] -; SI: buffer_store_short v[[VAL]] +; GCN: v_cvt_i32_f32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}} +; GCN: buffer_store_short [[VAL]] define void @fp_to_sint_f32_i16(i16 addrspace(1)* %out, float %in) #0 { %sint = fptosi float %in to i16 store i16 %sint, i16 addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll b/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll index d1fc9fa..d089c29 100644 --- a/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll +++ b/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll @@ -240,10 +240,12 @@ define void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { } ; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i16: -; SI: v_cvt_u32_f32_e32 v[[VAL:[0-9]+]], s{{[0-9]+}} -; VI: v_cvt_f16_f32_e32 v[[IN_F16:[0-9]+]], s{{[0-9]+}} -; VI: v_cvt_u16_f16_e32 v[[VAL:[0-9]+]], v[[IN_F16]] -; GCN: buffer_store_short v[[VAL]] +; The reason different instructions are used on SI and VI is because for +; SI fp_to_uint is legalized by the type legalizer and for VI it is +; legalized by the dag legalizer and they legalize fp_to_uint differently. +; SI: v_cvt_u32_f32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}} +; VI: v_cvt_i32_f32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}} +; GCN: buffer_store_short [[VAL]] define void @fp_to_uint_f32_to_i16(i16 addrspace(1)* %out, float %in) #0 { %uint = fptoui float %in to i16 store i16 %uint, i16 addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll index 5991efb..3c973e0 100644 --- a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll @@ -3,9 +3,8 @@ ; GCN-LABEL: {{^}}fptosi_f16_to_i16 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] -; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] -; SI: v_cvt_i32_f32_e32 v[[R_I16:[0-9]+]], v[[A_F32]] -; VI: v_cvt_i16_f16_e32 v[[R_I16:[0-9]+]], v[[A_F16]] +; GCN: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] +; GCN: v_cvt_i32_f32_e32 v[[R_I16:[0-9]+]], v[[A_F32]] ; GCN: buffer_store_short v[[R_I16]] ; GCN: s_endpgm define void @fptosi_f16_to_i16( @@ -54,12 +53,10 @@ entry: ; GCN-LABEL: {{^}}fptosi_v2f16_to_v2i16 ; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] ; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] -; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] -; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] -; SI: v_cvt_i32_f32_e32 v[[R_I16_0:[0-9]+]], v[[A_F32_0]] -; SI: v_cvt_i32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]] -; VI: v_cvt_i16_f16_e32 v[[R_I16_0:[0-9]+]], v[[A_V2_F16]] -; VI: v_cvt_i16_f16_e32 v[[R_I16_1:[0-9]+]], v[[A_F16_1]] +; GCN: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] +; GCN: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] +; GCN: v_cvt_i32_f32_e32 v[[R_I16_0:[0-9]+]], v[[A_F32_0]] +; GCN: v_cvt_i32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]] ; GCN: v_and_b32_e32 v[[R_I16_LO:[0-9]+]], 0xffff, v[[R_I16_0]] ; GCN: v_lshlrev_b32_e32 v[[R_I16_HI:[0-9]+]], 16, v[[R_I16_1]] ; GCN: v_or_b32_e32 v[[R_V2_I16:[0-9]+]], v[[R_I16_HI]], v[[R_I16_LO]] @@ -76,13 +73,13 @@ entry: } ; GCN-LABEL: {{^}}fptosi_v2f16_to_v2i32 -; GCN: buffer_load_dword -; GCN: v_cvt_f32_f16_e32 -; GCN: v_cvt_f32_f16_e32 -; GCN: v_cvt_i32_f32_e32 -; GCN: v_cvt_i32_f32_e32 -; GCN: buffer_store_dwordx2 -; GCN: s_endpgm +; GCN: buffer_load_dword +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_i32_f32_e32 +; GCN: v_cvt_i32_f32_e32 +; GCN: buffer_store_dwordx2 +; GCN: s_endpgm define void @fptosi_v2f16_to_v2i32( <2 x i32> addrspace(1)* %r, <2 x half> addrspace(1)* %a) { diff --git a/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll b/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll index 592c15a..a74d6d3 100644 --- a/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll @@ -3,9 +3,9 @@ ; GCN-LABEL: {{^}}fptoui_f16_to_i16 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] -; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] +; GCN: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_cvt_u32_f32_e32 v[[R_I16:[0-9]+]], v[[A_F32]] -; VI: v_cvt_u16_f16_e32 v[[R_I16:[0-9]+]], v[[A_F16]] +; VI: v_cvt_i32_f32_e32 v[[R_I16:[0-9]+]], v[[A_F32]] ; GCN: buffer_store_short v[[R_I16]] ; GCN: s_endpgm define void @fptoui_f16_to_i16( @@ -52,20 +52,18 @@ entry: } ; GCN-LABEL: {{^}}fptoui_v2f16_to_v2i16 -; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] -; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] -; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] -; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] -; SI: v_cvt_u32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]] -; SI: v_cvt_u32_f32_e32 v[[R_I16_0:[0-9]+]], v[[A_F32_0]] -; VI: v_cvt_u16_f16_e32 v[[R_I16_0:[0-9]+]], v[[A_V2_F16]] -; VI: v_cvt_u16_f16_e32 v[[R_I16_1:[0-9]+]], v[[A_F16_1]] -; VI: v_and_b32_e32 v[[R_I16_LO:[0-9]+]], 0xffff, v[[R_I16_0]] -; GCN: v_lshlrev_b32_e32 v[[R_I16_HI:[0-9]+]], 16, v[[R_I16_1]] -; SI: v_or_b32_e32 v[[R_V2_I16:[0-9]+]], v[[R_I16_HI]], v[[R_I16_0]] -; VI: v_or_b32_e32 v[[R_V2_I16:[0-9]+]], v[[R_I16_HI]], v[[R_I16_LO]] -; GCN: buffer_store_dword v[[R_V2_I16]] -; GCN: s_endpgm +; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] +; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] +; GCN-DAG: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] +; GCN-DAG: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] +; SI: v_cvt_u32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]] +; SI: v_cvt_u32_f32_e32 v[[R_I16_0:[0-9]+]], v[[A_F32_0]] +; VI: v_cvt_i32_f32_e32 v[[R_I16_0:[0-9]+]], v[[A_F32_0]] +; VI: v_cvt_i32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]] +; GCN: v_lshlrev_b32_e32 v[[R_I16_HI:[0-9]+]], 16, v[[R_I16_1]] +; GCN: v_or_b32_e32 v[[R_V2_I16:[0-9]+]], v[[R_I16_HI]], v[[R_I16_0]] +; GCN: buffer_store_dword v[[R_V2_I16]] +; GCN: s_endpgm define void @fptoui_v2f16_to_v2i16( <2 x i16> addrspace(1)* %r, <2 x half> addrspace(1)* %a) { @@ -77,13 +75,13 @@ entry: } ; GCN-LABEL: {{^}}fptoui_v2f16_to_v2i32 -; GCN: buffer_load_dword -; GCN: v_cvt_f32_f16_e32 -; GCN: v_cvt_f32_f16_e32 -; GCN: v_cvt_u32_f32_e32 -; GCN: v_cvt_u32_f32_e32 -; GCN: buffer_store_dwordx2 -; GCN: s_endpgm +; GCN: buffer_load_dword +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_u32_f32_e32 +; GCN: v_cvt_u32_f32_e32 +; GCN: buffer_store_dwordx2 +; GCN: s_endpgm define void @fptoui_v2f16_to_v2i32( <2 x i32> addrspace(1)* %r, <2 x half> addrspace(1)* %a) { diff --git a/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll b/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll index 6c1d8ec..0910d77 100644 --- a/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll @@ -3,9 +3,8 @@ ; GCN-LABEL: {{^}}sitofp_i16_to_f16 ; GCN: buffer_load_{{sshort|ushort}} v[[A_I16:[0-9]+]] -; SI: v_cvt_f32_i32_e32 v[[A_F32:[0-9]+]], v[[A_I16]] -; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]] -; VI: v_cvt_f16_i16_e32 v[[R_F16:[0-9]+]], v[[A_I16]] +; GCN: v_cvt_f32_i32_e32 v[[A_F32:[0-9]+]], v[[A_I16]] +; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]] ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define void @sitofp_i16_to_f16( @@ -37,22 +36,15 @@ entry: ; f16 = sitofp i64 is in sint_to_fp.i64.ll ; GCN-LABEL: {{^}}sitofp_v2i16_to_v2f16 -; GCN: buffer_load_dword v[[A_V2_I16:[0-9]+]] -; SI: v_bfe_i32 v[[A_I16_0:[0-9]+]], v[[A_V2_I16]], 0, 16 -; SI: v_ashrrev_i32_e32 v[[A_I16_1:[0-9]+]], 16, v[[A_V2_I16]] -; SI: v_cvt_f32_i32_e32 v[[A_F32_1:[0-9]+]], v[[A_I16_1]] -; SI: v_cvt_f32_i32_e32 v[[A_F32_0:[0-9]+]], v[[A_I16_0]] -; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]] -; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[A_F32_0]] -; VI: v_lshrrev_b32_e32 v[[A_I16_1:[0-9]+]], 16, v[[A_V2_I16]] -; VI: v_cvt_f16_i16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_I16]] -; VI: v_cvt_f16_i16_e32 v[[R_F16_1:[0-9]+]], v[[A_I16_1]] -; VI: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] -; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] -; SI: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] -; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] -; GCN: buffer_store_dword v[[R_V2_F16]] -; GCN: s_endpgm +; GCN: buffer_load_dword +; GCN: v_cvt_f32_i32_e32 +; GCN: v_cvt_f32_i32_e32 +; GCN: v_cvt_f16_f32_e32 +; GCN: v_cvt_f16_f32_e32 +; GCN-DAG: v_lshlrev_b32_e32 +; GCN-DAG: v_or_b32_e32 +; GCN: buffer_store_dword +; GCN: s_endpgm define void @sitofp_v2i16_to_v2f16( <2 x half> addrspace(1)* %r, <2 x i16> addrspace(1)* %a) { diff --git a/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll b/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll index 62131e7..73d9e8c 100644 --- a/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll @@ -4,8 +4,8 @@ ; GCN-LABEL: {{^}}uitofp_i16_to_f16 ; GCN: buffer_load_ushort v[[A_I16:[0-9]+]] ; SI: v_cvt_f32_u32_e32 v[[A_F32:[0-9]+]], v[[A_I16]] -; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]] -; VI: v_cvt_f16_u16_e32 v[[R_F16:[0-9]+]], v[[A_I16]] +; VI: v_cvt_f32_i32_e32 v[[A_F32:[0-9]+]], v[[A_I16]] +; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]] ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define void @uitofp_i16_to_f16( @@ -37,22 +37,18 @@ entry: ; f16 = uitofp i64 is in uint_to_fp.i64.ll ; GCN-LABEL: {{^}}uitofp_v2i16_to_v2f16 -; GCN: buffer_load_dword v[[A_V2_I16:[0-9]+]] -; SI: s_mov_b32 s[[MASK:[0-9]+]], 0xffff{{$}} -; SI: v_and_b32_e32 v[[A_I16_0:[0-9]+]], s[[MASK]], v[[A_V2_I16]] -; GCN: v_lshrrev_b32_e32 v[[A_I16_1:[0-9]+]], 16, v[[A_V2_I16]] -; SI: v_cvt_f32_u32_e32 v[[A_F32_1:[0-9]+]], v[[A_I16_1]] -; SI: v_cvt_f32_u32_e32 v[[A_F32_0:[0-9]+]], v[[A_I16_0]] -; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]] -; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[A_F32_0]] -; VI: v_cvt_f16_u16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_I16]] -; VI: v_cvt_f16_u16_e32 v[[R_F16_1:[0-9]+]], v[[A_I16_1]] -; VI: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] -; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] -; SI: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], s[[MASK]], v[[R_F16_0]] -; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] -; GCN: buffer_store_dword v[[R_V2_F16]] -; GCN: s_endpgm +; GCN: buffer_load_dword +; SI: v_cvt_f32_u32_e32 +; SI: v_cvt_f32_u32_e32 +; VI: v_cvt_f32_i32_e32 +; VI: v_cvt_f32_i32_e32 +; GCN: v_cvt_f16_f32_e32 +; GCN: v_cvt_f16_f32_e32 +; GCN-DAG: v_and_b32_e32 +; GCN-DAG: v_lshlrev_b32_e32 +; GCN-DAG: v_or_b32_e32 +; GCN: buffer_store_dword +; GCN: s_endpgm define void @uitofp_v2i16_to_v2f16( <2 x half> addrspace(1)* %r, <2 x i16> addrspace(1)* %a) { -- 2.7.4