From: wanglian Date: Thu, 28 Jul 2022 07:40:06 +0000 (+0800) Subject: [LegalizeTypes][VP] Add split operand support for VP float and integer casting X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b6b0690355ccf2bb17750e7766e6b8ff758630e7;p=platform%2Fupstream%2Fllvm.git [LegalizeTypes][VP] Add split operand support for VP float and integer casting Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D130685 --- diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 7cafa7d..9aa9aff 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2725,6 +2725,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STRICT_UINT_TO_FP: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: + case ISD::VP_SINT_TO_FP: + case ISD::VP_UINT_TO_FP: if (N->getValueType(0).bitsLT( N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType())) Res = SplitVecOp_TruncateHelper(N); @@ -2737,6 +2739,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::VP_FP_TO_SINT: + case ISD::VP_FP_TO_UINT: case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_EXTEND: diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll index 1cff5b3..7205483 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll @@ -309,6 +309,50 @@ define @vfptosi_nxv2i64_nxv2f64_unmasked( %v } +declare @llvm.vp.fptosi.nxv32i16.nxv32f32(, , i32) + +define @vfptosi_nxv32i16_nxv32f32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfptosi_nxv32i16_nxv32f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, mu +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: vslidedown.vx v0, v0, a4 +; CHECK-NEXT: bltu a0, a3, .LBB25_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v16, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB25_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB25_4: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call @llvm.vp.fptosi.nxv32i16.nxv32f32( %va, %m, i32 %evl) + ret %v +} + declare @llvm.vp.fptosi.nxv32i32.nxv32f32(, , i32) define @vfptosi_nxv32i32_nxv32f32( %va, %m, i32 zeroext %evl) { @@ -322,16 +366,16 @@ define @vfptosi_nxv32i32_nxv32f32( %va, ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub a3, a0, a1 ; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB25_2 +; CHECK-NEXT: bltu a0, a3, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB25_4 +; CHECK-NEXT: bltu a0, a1, .LBB26_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB25_4: +; CHECK-NEXT: .LBB26_4: ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t @@ -346,18 +390,18 @@ define @vfptosi_nxv32i32_nxv32f32_unmasked( @vfptoui_nxv2i64_nxv2f64_unmasked( %v } +declare @llvm.vp.fptoui.nxv32i16.nxv32f32(, , i32) + +define @vfptoui_nxv32i16_nxv32f32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfptoui_nxv32i16_nxv32f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, mu +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: vslidedown.vx v0, v0, a4 +; CHECK-NEXT: bltu a0, a3, .LBB25_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v16, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB25_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB25_4: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call @llvm.vp.fptoui.nxv32i16.nxv32f32( %va, %m, i32 %evl) + ret %v +} + declare @llvm.vp.fptoui.nxv32i32.nxv32f32(, , i32) define @vfptoui_nxv32i32_nxv32f32( %va, %m, i32 zeroext %evl) { @@ -322,16 +366,16 @@ define @vfptoui_nxv32i32_nxv32f32( %va, ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub a3, a0, a1 ; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB25_2 +; CHECK-NEXT: bltu a0, a3, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB25_4 +; CHECK-NEXT: bltu a0, a1, .LBB26_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB25_4: +; CHECK-NEXT: .LBB26_4: ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8, v0.t @@ -346,18 +390,18 @@ define @vfptoui_nxv32i32_nxv32f32_unmasked( @vsitofp_nxv2f64_nxv2i64_unmasked( %v } +declare @llvm.vp.sitofp.nxv32f16.nxv32i32(, , i32) + +define @vsitofp_nxv32f16_nxv32i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsitofp_nxv32f16_nxv32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, mu +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: vslidedown.vx v0, v0, a4 +; CHECK-NEXT: bltu a0, a3, .LBB25_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, mu +; CHECK-NEXT: vfncvt.f.x.w v12, v16, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB25_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB25_4: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfncvt.f.x.w v8, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call @llvm.vp.sitofp.nxv32f16.nxv32i32( %va, %m, i32 %evl) + ret %v +} + declare @llvm.vp.sitofp.nxv32f32.nxv32i32(, , i32) define @vsitofp_nxv32f32_nxv32i32( %va, %m, i32 zeroext %evl) { @@ -314,16 +358,16 @@ define @vsitofp_nxv32f32_nxv32i32( %va, ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub a3, a0, a1 ; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB25_2 +; CHECK-NEXT: bltu a0, a3, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB25_4 +; CHECK-NEXT: bltu a0, a1, .LBB26_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB25_4: +; CHECK-NEXT: .LBB26_4: ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -338,18 +382,18 @@ define @vsitofp_nxv32f32_nxv32i32_unmasked( @vuitofp_nxv2f64_nxv2i64_unmasked( %v } +declare @llvm.vp.uitofp.nxv32f16.nxv32i32(, , i32) + +define @vuitofp_nxv32f16_nxv32i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vuitofp_nxv32f16_nxv32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, mu +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: vslidedown.vx v0, v0, a4 +; CHECK-NEXT: bltu a0, a3, .LBB25_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, mu +; CHECK-NEXT: vfncvt.f.xu.w v12, v16, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB25_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB25_4: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfncvt.f.xu.w v8, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call @llvm.vp.uitofp.nxv32f16.nxv32i32( %va, %m, i32 %evl) + ret %v +} + declare @llvm.vp.uitofp.nxv32f32.nxv32i32(, , i32) define @vuitofp_nxv32f32_nxv32i32( %va, %m, i32 zeroext %evl) { @@ -314,16 +358,16 @@ define @vuitofp_nxv32f32_nxv32i32( %va, ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub a3, a0, a1 ; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB25_2 +; CHECK-NEXT: bltu a0, a3, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; CHECK-NEXT: vfcvt.f.xu.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB25_4 +; CHECK-NEXT: bltu a0, a1, .LBB26_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB25_4: +; CHECK-NEXT: .LBB26_4: ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t @@ -338,18 +382,18 @@ define @vuitofp_nxv32f32_nxv32i32_unmasked(