From: Carl Ritson Date: Fri, 8 Mar 2019 09:03:11 +0000 (+0000) Subject: [AMDGPU] V_CVT_F32_UBYTE{0,1,2,3} are full rate instructions X-Git-Tag: llvmorg-10-init~10445 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1a98dc184044768dd672c54b3b9557a05dd082f1;p=platform%2Fupstream%2Fllvm.git [AMDGPU] V_CVT_F32_UBYTE{0,1,2,3} are full rate instructions Summary: Fix a bug in the scheduling model where V_CVT_F32_UBYTE{0,1,2,3} are incorrectly marked as quarter rate instructions. Reviewers: arsenm, rampitec Reviewed By: rampitec Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59091 llvm-svn: 355671 --- diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index aafe033..85077be 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -185,13 +185,14 @@ defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32 defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; +defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>; +defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; +} // End SchedRW = [WriteQuarterRate32] + defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>; defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>; defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>; defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>; -defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>; -defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; -} // End SchedRW = [WriteQuarterRate32] defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>; diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll index 4c41565..6ffc9e2 100644 --- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -116,8 +116,8 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1) ; VI-DAG: v_add_u16_e32 ; VI-DAG: v_add_u16_e32 -; GCN: {{buffer|flat}}_store_dwordx4 -; GCN: {{buffer|flat}}_store_dword +; GCN-DAG: {{buffer|flat}}_store_dwordx4 +; GCN-DAG: {{buffer|flat}}_store_dword ; GCN: s_endpgm define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind { diff --git a/llvm/test/CodeGen/AMDGPU/udivrem24.ll b/llvm/test/CodeGen/AMDGPU/udivrem24.ll index 2c38f71..6e37578 100644 --- a/llvm/test/CodeGen/AMDGPU/udivrem24.ll +++ b/llvm/test/CodeGen/AMDGPU/udivrem24.ll @@ -4,8 +4,8 @@ ; FUNC-LABEL: {{^}}udiv24_i8: ; SI: v_cvt_f32_ubyte -; SI: v_cvt_f32_ubyte -; SI: v_rcp_iflag_f32 +; SI-DAG: v_cvt_f32_ubyte +; SI-DAG: v_rcp_iflag_f32 ; SI: v_cvt_u32_f32 ; EG: UINT_TO_FLT @@ -176,8 +176,8 @@ define amdgpu_kernel void @test_no_udiv24_i32_2(i32 addrspace(1)* %out, i32 addr ; FUNC-LABEL: {{^}}urem24_i8: ; SI: v_cvt_f32_ubyte -; SI: v_cvt_f32_ubyte -; SI: v_rcp_iflag_f32 +; SI-DAG: v_cvt_f32_ubyte +; SI-DAG: v_rcp_iflag_f32 ; SI: v_cvt_u32_f32 ; EG: UINT_TO_FLT