From 02f435db0b5f91986a7408ff8a1f1fda4918bde0 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Wed, 24 Feb 2021 10:18:28 +0000 Subject: [PATCH] [RISCV] Support fixed-length vector i2fp/fp2i conversions This patch extends the support for scalable-vector int->fp and fp->int conversions by additionally handling fixed-length vectors. The existing scalable-vector lowering re-expresses widening/narrowing by x4+ conversions as standard nodes. The fixed-length vector support slots in at "the end" of this process by lowering the now equally-sized and widening/narrowing by x2 nodes to our custom VL versions. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D97374 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 59 ++- llvm/lib/Target/RISCV/RISCVISelLowering.h | 4 + llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td | 98 +++++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll | 445 +++++++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll | 391 ++++++++++++++++++ 5 files changed, 994 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 69610218..ef4d1ea 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -571,6 +571,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::MULHS, VT, Custom); setOperationAction(ISD::MULHU, VT, Custom); + setOperationAction(ISD::SINT_TO_FP, VT, Custom); + setOperationAction(ISD::UINT_TO_FP, VT, Custom); + setOperationAction(ISD::FP_TO_SINT, VT, Custom); + setOperationAction(ISD::FP_TO_UINT, VT, Custom); + setOperationAction(ISD::VSELECT, VT, Custom); setOperationAction(ISD::TRUNCATE, VT, Custom); @@ -1370,11 +1375,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, SDLoc DL(Op); SDValue Src = Op.getOperand(0); MVT EltVT = VT.getVectorElementType(); - MVT SrcEltVT = Src.getSimpleValueType().getVectorElementType(); + MVT SrcVT = Src.getSimpleValueType(); + MVT SrcEltVT = SrcVT.getVectorElementType(); unsigned EltSize = EltVT.getSizeInBits(); unsigned SrcEltSize = SrcEltVT.getSizeInBits(); assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && "Unexpected vector element types"); + bool IsInt2FP = SrcEltVT.isInteger(); // Widening conversions if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) { @@ -1416,7 +1423,49 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); } - return Op; + // Scalable vectors can exit here. Patterns will handle equally-sized + // conversions halving/doubling ones. + if (!VT.isFixedLengthVector()) + return Op; + + // For fixed-length vectors we lower to a custom "VL" node. + unsigned RVVOpc = 0; + switch (Op.getOpcode()) { + default: + llvm_unreachable("Impossible opcode"); + case ISD::FP_TO_SINT: + RVVOpc = RISCVISD::FP_TO_SINT_VL; + break; + case ISD::FP_TO_UINT: + RVVOpc = RISCVISD::FP_TO_UINT_VL; + break; + case ISD::SINT_TO_FP: + RVVOpc = RISCVISD::SINT_TO_FP_VL; + break; + case ISD::UINT_TO_FP: + RVVOpc = RISCVISD::UINT_TO_FP_VL; + break; + } + + MVT ContainerVT, SrcContainerVT; + // Derive the reference container type from the larger vector type. + if (SrcEltSize > EltSize) { + SrcContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( + DAG, SrcVT, Subtarget); + ContainerVT = + SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); + } else { + ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( + DAG, VT, Subtarget); + SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT); + } + + SDValue Mask, VL; + std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + + Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); + Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL); + return convertFromScalableVector(VT, Src, DAG, Subtarget); } case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_UMAX: @@ -5573,8 +5622,12 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(UMAX_VL) NODE_NAME_CASE(MULHS_VL) NODE_NAME_CASE(MULHU_VL) - NODE_NAME_CASE(FP_ROUND_VL) + NODE_NAME_CASE(FP_TO_SINT_VL) + NODE_NAME_CASE(FP_TO_UINT_VL) + NODE_NAME_CASE(SINT_TO_FP_VL) + NODE_NAME_CASE(UINT_TO_FP_VL) NODE_NAME_CASE(FP_EXTEND_VL) + NODE_NAME_CASE(FP_ROUND_VL) NODE_NAME_CASE(SETCC_VL) NODE_NAME_CASE(VSELECT_VL) NODE_NAME_CASE(VMAND_VL) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 606d171..f2d8000 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -176,6 +176,10 @@ enum NodeType : unsigned { UMAX_VL, MULHS_VL, MULHU_VL, + FP_TO_SINT_VL, + FP_TO_UINT_VL, + SINT_TO_FP_VL, + UINT_TO_FP_VL, FP_ROUND_VL, FP_EXTEND_VL, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index e1543ca..73c4197 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -109,6 +109,19 @@ def riscv_fpround_vl : SDNode<"RISCVISD::FP_ROUND_VL", SDT_RISCVFPRoundOp_VL>; def riscv_fpextend_vl : SDNode<"RISCVISD::FP_EXTEND_VL", SDT_RISCVFPExtendOp_VL>; def riscv_fncvt_rod_vl : SDNode<"RISCVISD::VFNCVT_ROD_VL", SDT_RISCVFPRoundOp_VL>; +def SDT_RISCVFP2IOp_VL : SDTypeProfile<1, 3, [ + SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1>, + SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT> +]>; +def SDT_RISCVI2FPOp_VL : SDTypeProfile<1, 3, [ + SDTCisFP<0>, SDTCisInt<1>, SDTCisSameNumEltsAs<0, 1>, + SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT> +]>; + +def riscv_fp_to_sint_vl : SDNode<"RISCVISD::FP_TO_SINT_VL", SDT_RISCVFP2IOp_VL>; +def riscv_fp_to_uint_vl : SDNode<"RISCVISD::FP_TO_UINT_VL", SDT_RISCVFP2IOp_VL>; +def riscv_sint_to_fp_vl : SDNode<"RISCVISD::SINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>; +def riscv_uint_to_fp_vl : SDNode<"RISCVISD::UINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>; def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL", SDTypeProfile<1, 5, [SDTCVecEltisVT<0, i1>, @@ -393,6 +406,76 @@ multiclass VPatExtendSDNode_V_VL { + foreach fvti = AllFloatVectors in { + defvar ivti = GetIntVTypeInfo.Vti; + def : Pat<(ivti.Vector (vop (fvti.Vector fvti.RegClass:$rs1), + (fvti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast(instruction_name#"_"#ivti.LMul.MX) + fvti.RegClass:$rs1, GPR:$vl, ivti.SEW)>; + } +} + +multiclass VPatConvertI2FPSDNode_V_VL { + foreach fvti = AllFloatVectors in { + defvar ivti = GetIntVTypeInfo.Vti; + def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1), + (ivti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast(instruction_name#"_"#fvti.LMul.MX) + ivti.RegClass:$rs1, GPR:$vl, fvti.SEW)>; + } +} + +multiclass VPatWConvertFP2ISDNode_V_VL { + foreach fvtiToFWti = AllWidenableFloatVectors in { + defvar fvti = fvtiToFWti.Vti; + defvar iwti = GetIntVTypeInfo.Vti; + def : Pat<(iwti.Vector (vop (fvti.Vector fvti.RegClass:$rs1), + (fvti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast(instruction_name#"_"#fvti.LMul.MX) + fvti.RegClass:$rs1, GPR:$vl, fvti.SEW)>; + } +} + +multiclass VPatWConvertI2FPSDNode_V_VL { + foreach vtiToWti = AllWidenableIntToFloatVectors in { + defvar ivti = vtiToWti.Vti; + defvar fwti = vtiToWti.Wti; + def : Pat<(fwti.Vector (vop (ivti.Vector ivti.RegClass:$rs1), + (ivti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast(instruction_name#"_"#ivti.LMul.MX) + ivti.RegClass:$rs1, GPR:$vl, ivti.SEW)>; + } +} + +multiclass VPatNConvertFP2ISDNode_V_VL { + foreach vtiToWti = AllWidenableIntToFloatVectors in { + defvar vti = vtiToWti.Vti; + defvar fwti = vtiToWti.Wti; + def : Pat<(vti.Vector (vop (fwti.Vector fwti.RegClass:$rs1), + (fwti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast(instruction_name#"_"#vti.LMul.MX) + fwti.RegClass:$rs1, GPR:$vl, vti.SEW)>; + } +} + +multiclass VPatNConvertI2FPSDNode_V_VL { + foreach fvtiToFWti = AllWidenableFloatVectors in { + defvar fvti = fvtiToFWti.Vti; + defvar iwti = GetIntVTypeInfo.Vti; + def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1), + (iwti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast(instruction_name#"_"#fvti.LMul.MX) + iwti.RegClass:$rs1, GPR:$vl, fvti.SEW)>; + } +} + //===----------------------------------------------------------------------===// // Patterns. //===----------------------------------------------------------------------===// @@ -755,7 +838,17 @@ foreach fvti = AllFloatVectors in { (fvti.Scalar fvti.ScalarRegClass:$rs2), GPR:$vl, fvti.SEW)>; + // 14.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions + defm "" : VPatConvertFP2ISDNode_V_VL; + defm "" : VPatConvertFP2ISDNode_V_VL; + defm "" : VPatConvertI2FPSDNode_V_VL; + defm "" : VPatConvertI2FPSDNode_V_VL; + // 14.18. Widening Floating-Point/Integer Type-Convert Instructions + defm "" : VPatWConvertFP2ISDNode_V_VL; + defm "" : VPatWConvertFP2ISDNode_V_VL; + defm "" : VPatWConvertI2FPSDNode_V_VL; + defm "" : VPatWConvertI2FPSDNode_V_VL; foreach fvtiToFWti = AllWidenableFloatVectors in { defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; @@ -766,6 +859,11 @@ foreach fvti = AllFloatVectors in { fvti.RegClass:$rs1, GPR:$vl, fvti.SEW)>; } + // 14.19 Narrowing Floating-Point/Integer Type-Convert Instructions + defm "" : VPatNConvertFP2ISDNode_V_VL; + defm "" : VPatNConvertFP2ISDNode_V_VL; + defm "" : VPatNConvertI2FPSDNode_V_VL; + defm "" : VPatNConvertI2FPSDNode_V_VL; foreach fvtiToFWti = AllWidenableFloatVectors in { defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll new file mode 100644 index 0000000..5889e0d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll @@ -0,0 +1,445 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 + +define void @fp2si_v2f32_v2i32(<2 x float>* %x, <2 x i32>* %y) { +; CHECK-LABEL: fp2si_v2f32_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vfcvt.rtz.x.f.v v25, v25 +; CHECK-NEXT: vse32.v v25, (a1) +; CHECK-NEXT: ret + %a = load <2 x float>, <2 x float>* %x + %d = fptosi <2 x float> %a to <2 x i32> + store <2 x i32> %d, <2 x i32>* %y + ret void +} + +define void @fp2ui_v2f32_v2i32(<2 x float>* %x, <2 x i32>* %y) { +; CHECK-LABEL: fp2ui_v2f32_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vfcvt.rtz.xu.f.v v25, v25 +; CHECK-NEXT: vse32.v v25, (a1) +; CHECK-NEXT: ret + %a = load <2 x float>, <2 x float>* %x + %d = fptoui <2 x float> %a to <2 x i32> + store <2 x i32> %d, <2 x i32>* %y + ret void +} + +define void @fp2si_v8f32_v8i32(<8 x float>* %x, <8 x i32>* %y) { +; LMULMAX8-LABEL: fp2si_v8f32_v8i32: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli a2, 8, e32,m2,ta,mu +; LMULMAX8-NEXT: vle32.v v26, (a0) +; LMULMAX8-NEXT: vfcvt.rtz.x.f.v v26, v26 +; LMULMAX8-NEXT: vse32.v v26, (a1) +; LMULMAX8-NEXT: ret +; +; LMULMAX1-LABEL: fp2si_v8f32_v8i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: addi a2, a0, 16 +; LMULMAX1-NEXT: vle32.v v25, (a2) +; LMULMAX1-NEXT: vle32.v v26, (a0) +; LMULMAX1-NEXT: vfcvt.rtz.x.f.v v25, v25 +; LMULMAX1-NEXT: vfcvt.rtz.x.f.v v26, v26 +; LMULMAX1-NEXT: vse32.v v26, (a1) +; LMULMAX1-NEXT: addi a0, a1, 16 +; LMULMAX1-NEXT: vse32.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = load <8 x float>, <8 x float>* %x + %d = fptosi <8 x float> %a to <8 x i32> + store <8 x i32> %d, <8 x i32>* %y + ret void +} + +define void @fp2ui_v8f32_v8i32(<8 x float>* %x, <8 x i32>* %y) { +; LMULMAX8-LABEL: fp2ui_v8f32_v8i32: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli a2, 8, e32,m2,ta,mu +; LMULMAX8-NEXT: vle32.v v26, (a0) +; LMULMAX8-NEXT: vfcvt.rtz.xu.f.v v26, v26 +; LMULMAX8-NEXT: vse32.v v26, (a1) +; LMULMAX8-NEXT: ret +; +; LMULMAX1-LABEL: fp2ui_v8f32_v8i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: addi a2, a0, 16 +; LMULMAX1-NEXT: vle32.v v25, (a2) +; LMULMAX1-NEXT: vle32.v v26, (a0) +; LMULMAX1-NEXT: vfcvt.rtz.xu.f.v v25, v25 +; LMULMAX1-NEXT: vfcvt.rtz.xu.f.v v26, v26 +; LMULMAX1-NEXT: vse32.v v26, (a1) +; LMULMAX1-NEXT: addi a0, a1, 16 +; LMULMAX1-NEXT: vse32.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = load <8 x float>, <8 x float>* %x + %d = fptoui <8 x float> %a to <8 x i32> + store <8 x i32> %d, <8 x i32>* %y + ret void +} + +define void @fp2si_v2f32_v2i64(<2 x float>* %x, <2 x i64>* %y) { +; CHECK-LABEL: fp2si_v2f32_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; CHECK-NEXT: vfwcvt.rtz.x.f.v v26, v25 +; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; CHECK-NEXT: vse64.v v26, (a1) +; CHECK-NEXT: ret + %a = load <2 x float>, <2 x float>* %x + %d = fptosi <2 x float> %a to <2 x i64> + store <2 x i64> %d, <2 x i64>* %y + ret void +} + +define void @fp2ui_v2f32_v2i64(<2 x float>* %x, <2 x i64>* %y) { +; CHECK-LABEL: fp2ui_v2f32_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v26, v25 +; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; CHECK-NEXT: vse64.v v26, (a1) +; CHECK-NEXT: ret + %a = load <2 x float>, <2 x float>* %x + %d = fptoui <2 x float> %a to <2 x i64> + store <2 x i64> %d, <2 x i64>* %y + ret void +} + +define void @fp2si_v8f32_v8i64(<8 x float>* %x, <8 x i64>* %y) { +; LMULMAX8-LABEL: fp2si_v8f32_v8i64: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli a2, 8, e32,m2,ta,mu +; LMULMAX8-NEXT: vle32.v v26, (a0) +; LMULMAX8-NEXT: vfwcvt.rtz.x.f.v v28, v26 +; LMULMAX8-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX8-NEXT: vse64.v v28, (a1) +; LMULMAX8-NEXT: ret +; +; LMULMAX1-LABEL: fp2si_v8f32_v8i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: addi a2, a0, 16 +; LMULMAX1-NEXT: vle32.v v25, (a2) +; LMULMAX1-NEXT: vle32.v v26, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v27, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v28, v25 +; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v25, v26 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v29, v26 +; LMULMAX1-NEXT: addi a0, a1, 16 +; LMULMAX1-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vse64.v v29, (a0) +; LMULMAX1-NEXT: vse64.v v25, (a1) +; LMULMAX1-NEXT: addi a0, a1, 48 +; LMULMAX1-NEXT: vse64.v v28, (a0) +; LMULMAX1-NEXT: addi a0, a1, 32 +; LMULMAX1-NEXT: vse64.v v27, (a0) +; LMULMAX1-NEXT: ret + %a = load <8 x float>, <8 x float>* %x + %d = fptosi <8 x float> %a to <8 x i64> + store <8 x i64> %d, <8 x i64>* %y + ret void +} + +define void @fp2ui_v8f32_v8i64(<8 x float>* %x, <8 x i64>* %y) { +; LMULMAX8-LABEL: fp2ui_v8f32_v8i64: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli a2, 8, e32,m2,ta,mu +; LMULMAX8-NEXT: vle32.v v26, (a0) +; LMULMAX8-NEXT: vfwcvt.rtz.xu.f.v v28, v26 +; LMULMAX8-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX8-NEXT: vse64.v v28, (a1) +; LMULMAX8-NEXT: ret +; +; LMULMAX1-LABEL: fp2ui_v8f32_v8i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: addi a2, a0, 16 +; LMULMAX1-NEXT: vle32.v v25, (a2) +; LMULMAX1-NEXT: vle32.v v26, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v27, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v28, v25 +; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v25, v26 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v29, v26 +; LMULMAX1-NEXT: addi a0, a1, 16 +; LMULMAX1-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vse64.v v29, (a0) +; LMULMAX1-NEXT: vse64.v v25, (a1) +; LMULMAX1-NEXT: addi a0, a1, 48 +; LMULMAX1-NEXT: vse64.v v28, (a0) +; LMULMAX1-NEXT: addi a0, a1, 32 +; LMULMAX1-NEXT: vse64.v v27, (a0) +; LMULMAX1-NEXT: ret + %a = load <8 x float>, <8 x float>* %x + %d = fptoui <8 x float> %a to <8 x i64> + store <8 x i64> %d, <8 x i64>* %y + ret void +} + +define void @fp2si_v2f16_v2i64(<2 x half>* %x, <2 x i64>* %y) { +; CHECK-LABEL: fp2si_v2f16_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v26, v25 +; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; CHECK-NEXT: vfwcvt.rtz.x.f.v v25, v26 +; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; CHECK-NEXT: vse64.v v25, (a1) +; CHECK-NEXT: ret + %a = load <2 x half>, <2 x half>* %x + %d = fptosi <2 x half> %a to <2 x i64> + store <2 x i64> %d, <2 x i64>* %y + ret void +} + +define void @fp2ui_v2f16_v2i64(<2 x half>* %x, <2 x i64>* %y) { +; CHECK-LABEL: fp2ui_v2f16_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v26, v25 +; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v25, v26 +; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; CHECK-NEXT: vse64.v v25, (a1) +; CHECK-NEXT: ret + %a = load <2 x half>, <2 x half>* %x + %d = fptoui <2 x half> %a to <2 x i64> + store <2 x i64> %d, <2 x i64>* %y + ret void +} + +define void @fp2si_v2f64_v2i8(<2 x double>* %x, <2 x i8>* %y) { +; CHECK-LABEL: fp2si_v2f64_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v26, v25 +; CHECK-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e8,mf4,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; CHECK-NEXT: vse8.v v26, (a1) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %d = fptosi <2 x double> %a to <2 x i8> + store <2 x i8> %d, <2 x i8>* %y + ret void +} + +define void @fp2ui_v2f64_v2i8(<2 x double>* %x, <2 x i8>* %y) { +; CHECK-LABEL: fp2ui_v2f64_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v26, v25 +; CHECK-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e8,mf4,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; CHECK-NEXT: vse8.v v26, (a1) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %d = fptoui <2 x double> %a to <2 x i8> + store <2 x i8> %d, <2 x i8>* %y + ret void +} + +define void @fp2si_v8f64_v8i8(<8 x double>* %x, <8 x i8>* %y) { +; LMULMAX8-LABEL: fp2si_v8f64_v8i8: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli a2, 8, e64,m4,ta,mu +; LMULMAX8-NEXT: vle64.v v28, (a0) +; LMULMAX8-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; LMULMAX8-NEXT: vfncvt.rtz.x.f.w v26, v28 +; LMULMAX8-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; LMULMAX8-NEXT: vnsrl.wi v25, v26, 0 +; LMULMAX8-NEXT: vsetivli a0, 8, e8,mf2,ta,mu +; LMULMAX8-NEXT: vnsrl.wi v26, v25, 0 +; LMULMAX8-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX8-NEXT: vse8.v v26, (a1) +; LMULMAX8-NEXT: ret +; +; LMULMAX1-LABEL: fp2si_v8f64_v8i8: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi sp, sp, -16 +; LMULMAX1-NEXT: .cfi_def_cfa_offset 16 +; LMULMAX1-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vle64.v v25, (a0) +; LMULMAX1-NEXT: addi a2, a0, 32 +; LMULMAX1-NEXT: vle64.v v26, (a2) +; LMULMAX1-NEXT: addi a2, a0, 48 +; LMULMAX1-NEXT: vle64.v v27, (a2) +; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: vle64.v v28, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v29, v27 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v27, v29, 0 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v29, v27, 0 +; LMULMAX1-NEXT: addi a0, sp, 6 +; LMULMAX1-NEXT: vsetivli a2, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vse8.v v29, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v27, v28 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v28, v27, 0 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v27, v28, 0 +; LMULMAX1-NEXT: addi a0, sp, 2 +; LMULMAX1-NEXT: vsetivli a2, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vse8.v v27, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v27, v26 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v27, 0 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v27, v26, 0 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: addi a0, sp, 4 +; LMULMAX1-NEXT: vse8.v v27, (a0) +; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: addi a0, sp, 4 +; LMULMAX1-NEXT: vle8.v v26, (a0) +; LMULMAX1-NEXT: addi a0, sp, 12 +; LMULMAX1-NEXT: vse8.v v26, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v26, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v26, 0 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vse8.v v26, (sp) +; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: vle8.v v25, (sp) +; LMULMAX1-NEXT: addi a0, sp, 8 +; LMULMAX1-NEXT: vse8.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: addi a0, sp, 8 +; LMULMAX1-NEXT: vle8.v v25, (a0) +; LMULMAX1-NEXT: vse8.v v25, (a1) +; LMULMAX1-NEXT: addi sp, sp, 16 +; LMULMAX1-NEXT: ret + %a = load <8 x double>, <8 x double>* %x + %d = fptosi <8 x double> %a to <8 x i8> + store <8 x i8> %d, <8 x i8>* %y + ret void +} + +define void @fp2ui_v8f64_v8i8(<8 x double>* %x, <8 x i8>* %y) { +; LMULMAX8-LABEL: fp2ui_v8f64_v8i8: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli a2, 8, e64,m4,ta,mu +; LMULMAX8-NEXT: vle64.v v28, (a0) +; LMULMAX8-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; LMULMAX8-NEXT: vfncvt.rtz.xu.f.w v26, v28 +; LMULMAX8-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; LMULMAX8-NEXT: vnsrl.wi v25, v26, 0 +; LMULMAX8-NEXT: vsetivli a0, 8, e8,mf2,ta,mu +; LMULMAX8-NEXT: vnsrl.wi v26, v25, 0 +; LMULMAX8-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX8-NEXT: vse8.v v26, (a1) +; LMULMAX8-NEXT: ret +; +; LMULMAX1-LABEL: fp2ui_v8f64_v8i8: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi sp, sp, -16 +; LMULMAX1-NEXT: .cfi_def_cfa_offset 16 +; LMULMAX1-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vle64.v v25, (a0) +; LMULMAX1-NEXT: addi a2, a0, 32 +; LMULMAX1-NEXT: vle64.v v26, (a2) +; LMULMAX1-NEXT: addi a2, a0, 48 +; LMULMAX1-NEXT: vle64.v v27, (a2) +; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: vle64.v v28, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v29, v27 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v27, v29, 0 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v29, v27, 0 +; LMULMAX1-NEXT: addi a0, sp, 6 +; LMULMAX1-NEXT: vsetivli a2, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vse8.v v29, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v27, v28 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v28, v27, 0 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v27, v28, 0 +; LMULMAX1-NEXT: addi a0, sp, 2 +; LMULMAX1-NEXT: vsetivli a2, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vse8.v v27, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v27, v26 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v27, 0 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v27, v26, 0 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: addi a0, sp, 4 +; LMULMAX1-NEXT: vse8.v v27, (a0) +; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: addi a0, sp, 4 +; LMULMAX1-NEXT: vle8.v v26, (a0) +; LMULMAX1-NEXT: addi a0, sp, 12 +; LMULMAX1-NEXT: vse8.v v26, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v26, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v26, 0 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vse8.v v26, (sp) +; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: vle8.v v25, (sp) +; LMULMAX1-NEXT: addi a0, sp, 8 +; LMULMAX1-NEXT: vse8.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: addi a0, sp, 8 +; LMULMAX1-NEXT: vle8.v v25, (a0) +; LMULMAX1-NEXT: vse8.v v25, (a1) +; LMULMAX1-NEXT: addi sp, sp, 16 +; LMULMAX1-NEXT: ret + %a = load <8 x double>, <8 x double>* %x + %d = fptoui <8 x double> %a to <8 x i8> + store <8 x i8> %d, <8 x i8>* %y + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll new file mode 100644 index 0000000..71ad2d9 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll @@ -0,0 +1,391 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 + +define void @si2fp_v2i32_v2f32(<2 x i32>* %x, <2 x float>* %y) { +; CHECK-LABEL: si2fp_v2i32_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vfcvt.f.x.v v25, v25 +; CHECK-NEXT: vse32.v v25, (a1) +; CHECK-NEXT: ret + %a = load <2 x i32>, <2 x i32>* %x + %d = sitofp <2 x i32> %a to <2 x float> + store <2 x float> %d, <2 x float>* %y + ret void +} + +define void @ui2fp_v2i32_v2f32(<2 x i32>* %x, <2 x float>* %y) { +; CHECK-LABEL: ui2fp_v2i32_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vfcvt.f.xu.v v25, v25 +; CHECK-NEXT: vse32.v v25, (a1) +; CHECK-NEXT: ret + %a = load <2 x i32>, <2 x i32>* %x + %d = uitofp <2 x i32> %a to <2 x float> + store <2 x float> %d, <2 x float>* %y + ret void +} + +define void @si2fp_v8i32_v8f32(<8 x i32>* %x, <8 x float>* %y) { +; LMULMAX8-LABEL: si2fp_v8i32_v8f32: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli a2, 8, e32,m2,ta,mu +; LMULMAX8-NEXT: vle32.v v26, (a0) +; LMULMAX8-NEXT: vfcvt.f.x.v v26, v26 +; LMULMAX8-NEXT: vse32.v v26, (a1) +; LMULMAX8-NEXT: ret +; +; LMULMAX1-LABEL: si2fp_v8i32_v8f32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: addi a2, a0, 16 +; LMULMAX1-NEXT: vle32.v v25, (a2) +; LMULMAX1-NEXT: vle32.v v26, (a0) +; LMULMAX1-NEXT: vfcvt.f.x.v v25, v25 +; LMULMAX1-NEXT: vfcvt.f.x.v v26, v26 +; LMULMAX1-NEXT: vse32.v v26, (a1) +; LMULMAX1-NEXT: addi a0, a1, 16 +; LMULMAX1-NEXT: vse32.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = load <8 x i32>, <8 x i32>* %x + %d = sitofp <8 x i32> %a to <8 x float> + store <8 x float> %d, <8 x float>* %y + ret void +} + +define void @ui2fp_v8i32_v8f32(<8 x i32>* %x, <8 x float>* %y) { +; LMULMAX8-LABEL: ui2fp_v8i32_v8f32: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli a2, 8, e32,m2,ta,mu +; LMULMAX8-NEXT: vle32.v v26, (a0) +; LMULMAX8-NEXT: vfcvt.f.xu.v v26, v26 +; LMULMAX8-NEXT: vse32.v v26, (a1) +; LMULMAX8-NEXT: ret +; +; LMULMAX1-LABEL: ui2fp_v8i32_v8f32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: addi a2, a0, 16 +; LMULMAX1-NEXT: vle32.v v25, (a2) +; LMULMAX1-NEXT: vle32.v v26, (a0) +; LMULMAX1-NEXT: vfcvt.f.xu.v v25, v25 +; LMULMAX1-NEXT: vfcvt.f.xu.v v26, v26 +; LMULMAX1-NEXT: vse32.v v26, (a1) +; LMULMAX1-NEXT: addi a0, a1, 16 +; LMULMAX1-NEXT: vse32.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = load <8 x i32>, <8 x i32>* %x + %d = uitofp <8 x i32> %a to <8 x float> + store <8 x float> %d, <8 x float>* %y + ret void +} + +define void @si2fp_v2i16_v2f64(<2 x i16>* %x, <2 x double>* %y) { +; CHECK-LABEL: si2fp_v2i16_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; CHECK-NEXT: vsext.vf4 v26, v25 +; CHECK-NEXT: vfcvt.f.x.v v25, v26 +; CHECK-NEXT: vse64.v v25, (a1) +; CHECK-NEXT: ret + %a = load <2 x i16>, <2 x i16>* %x + %d = sitofp <2 x i16> %a to <2 x double> + store <2 x double> %d, <2 x double>* %y + ret void +} + +define void @ui2fp_v2i16_v2f64(<2 x i16>* %x, <2 x double>* %y) { +; CHECK-LABEL: ui2fp_v2i16_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; CHECK-NEXT: vzext.vf4 v26, v25 +; CHECK-NEXT: vfcvt.f.xu.v v25, v26 +; CHECK-NEXT: vse64.v v25, (a1) +; CHECK-NEXT: ret + %a = load <2 x i16>, <2 x i16>* %x + %d = uitofp <2 x i16> %a to <2 x double> + store <2 x double> %d, <2 x double>* %y + ret void +} + +define void @si2fp_v8i16_v8f64(<8 x i16>* %x, <8 x double>* %y) { +; LMULMAX8-LABEL: si2fp_v8i16_v8f64: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli a2, 8, e16,m1,ta,mu +; LMULMAX8-NEXT: vle16.v v25, (a0) +; LMULMAX8-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX8-NEXT: vsext.vf4 v28, v25 +; LMULMAX8-NEXT: vfcvt.f.x.v v28, v28 +; LMULMAX8-NEXT: vse64.v v28, (a1) +; LMULMAX8-NEXT: ret +; +; LMULMAX1-LABEL: si2fp_v8i16_v8f64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a2, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vle16.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v27, v26 +; LMULMAX1-NEXT: vfcvt.f.x.v v26, v27 +; LMULMAX1-NEXT: vsext.vf4 v27, v25 +; LMULMAX1-NEXT: vfcvt.f.x.v v27, v27 +; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v28, v25 +; LMULMAX1-NEXT: vfcvt.f.x.v v28, v28 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v29, v25 +; LMULMAX1-NEXT: vfcvt.f.x.v v25, v29 +; LMULMAX1-NEXT: addi a0, a1, 48 +; LMULMAX1-NEXT: vse64.v v25, (a0) +; LMULMAX1-NEXT: addi a0, a1, 32 +; LMULMAX1-NEXT: vse64.v v28, (a0) +; LMULMAX1-NEXT: vse64.v v27, (a1) +; LMULMAX1-NEXT: addi a0, a1, 16 +; LMULMAX1-NEXT: vse64.v v26, (a0) +; LMULMAX1-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %d = sitofp <8 x i16> %a to <8 x double> + store <8 x double> %d, <8 x double>* %y + ret void +} + +define void @ui2fp_v8i16_v8f64(<8 x i16>* %x, <8 x double>* %y) { +; LMULMAX8-LABEL: ui2fp_v8i16_v8f64: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli a2, 8, e16,m1,ta,mu +; LMULMAX8-NEXT: vle16.v v25, (a0) +; LMULMAX8-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX8-NEXT: vzext.vf4 v28, v25 +; LMULMAX8-NEXT: vfcvt.f.xu.v v28, v28 +; LMULMAX8-NEXT: vse64.v v28, (a1) +; LMULMAX8-NEXT: ret +; +; LMULMAX1-LABEL: ui2fp_v8i16_v8f64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a2, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vle16.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v27, v26 +; LMULMAX1-NEXT: vfcvt.f.xu.v v26, v27 +; LMULMAX1-NEXT: vzext.vf4 v27, v25 +; LMULMAX1-NEXT: vfcvt.f.xu.v v27, v27 +; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v28, v25 +; LMULMAX1-NEXT: vfcvt.f.xu.v v28, v28 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v29, v25 +; LMULMAX1-NEXT: vfcvt.f.xu.v v25, v29 +; LMULMAX1-NEXT: addi a0, a1, 48 +; LMULMAX1-NEXT: vse64.v v25, (a0) +; LMULMAX1-NEXT: addi a0, a1, 32 +; LMULMAX1-NEXT: vse64.v v28, (a0) +; LMULMAX1-NEXT: vse64.v v27, (a1) +; LMULMAX1-NEXT: addi a0, a1, 16 +; LMULMAX1-NEXT: vse64.v v26, (a0) +; LMULMAX1-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %d = uitofp <8 x i16> %a to <8 x double> + store <8 x double> %d, <8 x double>* %y + ret void +} + +define void @si2fp_v2i64_v2f16(<2 x i64>* %x, <2 x half>* %y) { +; CHECK-LABEL: si2fp_v2i64_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; CHECK-NEXT: vfncvt.f.x.w v26, v25 +; CHECK-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v25, v26 +; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; CHECK-NEXT: vse16.v v25, (a1) +; CHECK-NEXT: ret + %a = load <2 x i64>, <2 x i64>* %x + %d = sitofp <2 x i64> %a to <2 x half> + store <2 x half> %d, <2 x half>* %y + ret void +} + +define void @ui2fp_v2i64_v2f16(<2 x i64>* %x, <2 x half>* %y) { +; CHECK-LABEL: ui2fp_v2i64_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; CHECK-NEXT: vfncvt.f.xu.w v26, v25 +; CHECK-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v25, v26 +; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; CHECK-NEXT: vse16.v v25, (a1) +; CHECK-NEXT: ret + %a = load <2 x i64>, <2 x i64>* %x + %d = uitofp <2 x i64> %a to <2 x half> + store <2 x half> %d, <2 x half>* %y + ret void +} + +define void @si2fp_v8i64_v8f16(<8 x i64>* %x, <8 x half>* %y) { +; LMULMAX8-LABEL: si2fp_v8i64_v8f16: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli a2, 8, e64,m4,ta,mu +; LMULMAX8-NEXT: vle64.v v28, (a0) +; LMULMAX8-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; LMULMAX8-NEXT: vfncvt.f.x.w v26, v28 +; LMULMAX8-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; LMULMAX8-NEXT: vfncvt.f.f.w v25, v26 +; LMULMAX8-NEXT: vse16.v v25, (a1) +; LMULMAX8-NEXT: ret +; +; LMULMAX1-LABEL: si2fp_v8i64_v8f16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi sp, sp, -32 +; LMULMAX1-NEXT: .cfi_def_cfa_offset 32 +; LMULMAX1-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vle64.v v25, (a0) +; LMULMAX1-NEXT: addi a2, a0, 32 +; LMULMAX1-NEXT: vle64.v v26, (a2) +; LMULMAX1-NEXT: addi a2, a0, 48 +; LMULMAX1-NEXT: vle64.v v27, (a2) +; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: vle64.v v28, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.f.x.w v29, v27 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.f.f.w v27, v29 +; LMULMAX1-NEXT: addi a0, sp, 12 +; LMULMAX1-NEXT: vsetivli a2, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vse16.v v27, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.f.x.w v27, v28 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.f.f.w v28, v27 +; LMULMAX1-NEXT: addi a0, sp, 4 +; LMULMAX1-NEXT: vsetivli a2, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vse16.v v28, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.f.x.w v27, v26 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.f.f.w v26, v27 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: addi a0, sp, 8 +; LMULMAX1-NEXT: vse16.v v26, (a0) +; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: addi a0, sp, 8 +; LMULMAX1-NEXT: vle16.v v26, (a0) +; LMULMAX1-NEXT: addi a0, sp, 24 +; LMULMAX1-NEXT: vse16.v v26, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.f.x.w v26, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.f.f.w v25, v26 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vse16.v v25, (sp) +; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vle16.v v25, (sp) +; LMULMAX1-NEXT: addi a0, sp, 16 +; LMULMAX1-NEXT: vse16.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: addi a0, sp, 16 +; LMULMAX1-NEXT: vle16.v v25, (a0) +; LMULMAX1-NEXT: vse16.v v25, (a1) +; LMULMAX1-NEXT: addi sp, sp, 32 +; LMULMAX1-NEXT: ret + %a = load <8 x i64>, <8 x i64>* %x + %d = sitofp <8 x i64> %a to <8 x half> + store <8 x half> %d, <8 x half>* %y + ret void +} + +define void @ui2fp_v8i64_v8f16(<8 x i64>* %x, <8 x half>* %y) { +; LMULMAX8-LABEL: ui2fp_v8i64_v8f16: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli a2, 8, e64,m4,ta,mu +; LMULMAX8-NEXT: vle64.v v28, (a0) +; LMULMAX8-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; LMULMAX8-NEXT: vfncvt.f.xu.w v26, v28 +; LMULMAX8-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; LMULMAX8-NEXT: vfncvt.f.f.w v25, v26 +; LMULMAX8-NEXT: vse16.v v25, (a1) +; LMULMAX8-NEXT: ret +; +; LMULMAX1-LABEL: ui2fp_v8i64_v8f16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi sp, sp, -32 +; LMULMAX1-NEXT: .cfi_def_cfa_offset 32 +; LMULMAX1-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vle64.v v25, (a0) +; LMULMAX1-NEXT: addi a2, a0, 32 +; LMULMAX1-NEXT: vle64.v v26, (a2) +; LMULMAX1-NEXT: addi a2, a0, 48 +; LMULMAX1-NEXT: vle64.v v27, (a2) +; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: vle64.v v28, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.f.xu.w v29, v27 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.f.f.w v27, v29 +; LMULMAX1-NEXT: addi a0, sp, 12 +; LMULMAX1-NEXT: vsetivli a2, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vse16.v v27, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.f.xu.w v27, v28 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.f.f.w v28, v27 +; LMULMAX1-NEXT: addi a0, sp, 4 +; LMULMAX1-NEXT: vsetivli a2, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vse16.v v28, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.f.xu.w v27, v26 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.f.f.w v26, v27 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: addi a0, sp, 8 +; LMULMAX1-NEXT: vse16.v v26, (a0) +; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: addi a0, sp, 8 +; LMULMAX1-NEXT: vle16.v v26, (a0) +; LMULMAX1-NEXT: addi a0, sp, 24 +; LMULMAX1-NEXT: vse16.v v26, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.f.xu.w v26, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; LMULMAX1-NEXT: vfncvt.f.f.w v25, v26 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vse16.v v25, (sp) +; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vle16.v v25, (sp) +; LMULMAX1-NEXT: addi a0, sp, 16 +; LMULMAX1-NEXT: vse16.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: addi a0, sp, 16 +; LMULMAX1-NEXT: vle16.v v25, (a0) +; LMULMAX1-NEXT: vse16.v v25, (a1) +; LMULMAX1-NEXT: addi sp, sp, 32 +; LMULMAX1-NEXT: ret + %a = load <8 x i64>, <8 x i64>* %x + %d = uitofp <8 x i64> %a to <8 x half> + store <8 x half> %d, <8 x half>* %y + ret void +} -- 2.7.4