From d157e3f387c918a6736fb29fccd78a80425e5f88 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 1 Apr 2021 10:17:53 -0700 Subject: [PATCH] [RISCV] Fix handling of nxvXi64 vmsgt(u).vx intrinsics on RV32. We need to splat the scalar separately and use .vv, but there is no vmsgt(u).vv. So add isel patterns to select vmslt(u).vv with swapped operands. We also need to get VT to use for the splat from an operand rather than the result since the result VT is nxvXi1. Reviewed By: HsiangKai Differential Revision: https://reviews.llvm.org/D99704 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 10 +- llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 49 +++- llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll | 293 +++++++++++++++++++++++- llvm/test/CodeGen/RISCV/rvv/vmsgt-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll | 293 +++++++++++++++++++++++- llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv64.ll | 2 +- 6 files changed, 643 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6cafa27..08df519 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2720,7 +2720,6 @@ static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG, SmallVector Operands(Op->op_begin(), Op->op_end()); SDValue &ScalarOp = Operands[SplatOp]; MVT OpVT = ScalarOp.getSimpleValueType(); - MVT VT = Op.getSimpleValueType(); MVT XLenVT = Subtarget.getXLenVT(); // If this isn't a scalar, or its type is XLenVT we're done. @@ -2739,6 +2738,15 @@ static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG, return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); } + // Use the previous operand to get the vXi64 VT. The result might be a mask + // VT for compares. Using the previous operand assumes that the previous + // operand will never have a smaller element size than a scalar operand and + // that a widening operation never uses SEW=64. + // NOTE: If this fails the below assert, we can probably just find the + // element count from any operand or result and use it to construct the VT. + assert(II->SplatOperand > 1 && "Unexpected splat operand!"); + MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType(); + // The more complex case is when the scalar is larger than XLenVT. assert(XLenVT == MVT::i32 && OpVT == MVT::i64 && VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!"); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index c73b460..7ac2509 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -3891,13 +3891,60 @@ defm "" : VPatBinaryM_VV_VX_VI<"int_riscv_vmsle", "PseudoVMSLE", AllIntegerVecto defm "" : VPatBinaryM_VX_VI<"int_riscv_vmsgtu", "PseudoVMSGTU", AllIntegerVectors>; defm "" : VPatBinaryM_VX_VI<"int_riscv_vmsgt", "PseudoVMSGT", AllIntegerVectors>; +// Match vmsgt with 2 vector operands to vmslt with the operands swapped. +// Occurs when legalizing vmsgt(u).vx intrinsics for i64 on RV32 since we need +// to use a more complex splat sequence. Add the pattern for all VTs for +// consistency. +foreach vti = AllIntegerVectors in { + def : Pat<(vti.Mask (int_riscv_vmsgt (vti.Vector vti.RegClass:$rs2), + (vti.Vector vti.RegClass:$rs1), + (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMSLT_VV_"#vti.LMul.MX) vti.RegClass:$rs1, + vti.RegClass:$rs2, + GPR:$vl, + vti.SEW)>; + def : Pat<(vti.Mask (int_riscv_vmsgt_mask (vti.Mask VR:$merge), + (vti.Vector vti.RegClass:$rs2), + (vti.Vector vti.RegClass:$rs1), + (vti.Mask V0), + (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMSLT_VV_"#vti.LMul.MX#"_MASK") + VR:$merge, + vti.RegClass:$rs1, + vti.RegClass:$rs2, + (vti.Mask V0), + GPR:$vl, + vti.SEW)>; + + def : Pat<(vti.Mask (int_riscv_vmsgtu (vti.Vector vti.RegClass:$rs2), + (vti.Vector vti.RegClass:$rs1), + (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMSLTU_VV_"#vti.LMul.MX) vti.RegClass:$rs1, + vti.RegClass:$rs2, + GPR:$vl, + vti.SEW)>; + def : Pat<(vti.Mask (int_riscv_vmsgtu_mask (vti.Mask VR:$merge), + (vti.Vector vti.RegClass:$rs2), + (vti.Vector vti.RegClass:$rs1), + (vti.Mask V0), + (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMSLTU_VV_"#vti.LMul.MX#"_MASK") + VR:$merge, + vti.RegClass:$rs1, + vti.RegClass:$rs2, + (vti.Mask V0), + GPR:$vl, + vti.SEW)>; +} + // Match vmslt(u).vx intrinsics to vmsle(u).vi if the scalar is -15 to 16. This // avoids the user needing to know that there is no vmslt(u).vi instruction. // This is limited to vmslt(u).vx as there is no vmsge().vx intrinsic or // instruction. foreach vti = AllIntegerVectors in { def : Pat<(vti.Mask (int_riscv_vmslt (vti.Vector vti.RegClass:$rs1), - (vti.Scalar simm5_plus1:$rs2), (XLenVT (VLOp GPR:$vl)))), + (vti.Scalar simm5_plus1:$rs2), + (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMSLE_VI_"#vti.LMul.MX) vti.RegClass:$rs1, (DecImm simm5_plus1:$rs2), GPR:$vl, diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll index f1942ab..a6a0588 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ ; RUN: --riscv-no-aliases < %s | FileCheck %s declare @llvm.riscv.vmsgt.nxv1i8.i8( , @@ -706,6 +706,192 @@ entry: ret %a } +declare @llvm.riscv.vmsgt.nxv1i64.i64( + , + i64, + i32); + +define @intrinsic_vmsgt_vx_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv1i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsll.vx v25, v25, a1 +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vsll.vx v26, v26, a1 +; CHECK-NEXT: vsrl.vx v26, v26, a1 +; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: vmslt.vv v0, v25, v8 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgt.nxv1i64.i64( + %0, + i64 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv1i64.i64( + , + , + i64, + , + i32); + +define @intrinsic_vmsgt_mask_vx_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv1i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 +; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsll.vx v26, v26, a1 +; CHECK-NEXT: vmv.v.x v27, a0 +; CHECK-NEXT: vsll.vx v27, v27, a1 +; CHECK-NEXT: vsrl.vx v27, v27, a1 +; CHECK-NEXT: vor.vv v26, v27, v26 +; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmslt.vv v25, v26, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgt.mask.nxv1i64.i64( + %0, + %1, + i64 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv2i64.i64( + , + i64, + i32); + +define @intrinsic_vmsgt_vx_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv2i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsll.vx v26, v26, a1 +; CHECK-NEXT: vmv.v.x v28, a0 +; CHECK-NEXT: vsll.vx v28, v28, a1 +; CHECK-NEXT: vsrl.vx v28, v28, a1 +; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: vmslt.vv v0, v26, v8 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgt.nxv2i64.i64( + %0, + i64 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv2i64.i64( + , + , + i64, + , + i32); + +define @intrinsic_vmsgt_mask_vx_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv2i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 +; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsll.vx v26, v26, a1 +; CHECK-NEXT: vmv.v.x v28, a0 +; CHECK-NEXT: vsll.vx v28, v28, a1 +; CHECK-NEXT: vsrl.vx v28, v28, a1 +; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmslt.vv v25, v26, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgt.mask.nxv2i64.i64( + %0, + %1, + i64 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgt.nxv4i64.i64( + , + i64, + i32); + +define @intrinsic_vmsgt_vx_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmsgt_vx_nxv4i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu +; CHECK-NEXT: vmv.v.x v28, a1 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsll.vx v28, v28, a1 +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vsll.vx v12, v12, a1 +; CHECK-NEXT: vsrl.vx v12, v12, a1 +; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: vmslt.vv v0, v28, v8 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgt.nxv4i64.i64( + %0, + i64 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgt.mask.nxv4i64.i64( + , + , + i64, + , + i32); + +define @intrinsic_vmsgt_mask_vx_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 +; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu +; CHECK-NEXT: vmv.v.x v28, a1 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsll.vx v28, v28, a1 +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vsll.vx v16, v16, a1 +; CHECK-NEXT: vsrl.vx v16, v16, a1 +; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmslt.vv v25, v28, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgt.mask.nxv4i64.i64( + %0, + %1, + i64 %2, + %3, + i32 %4) + + ret %a +} + define @intrinsic_vmsgt_vi_nxv1i8_i8( %0, i32 %1) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry @@ -1230,3 +1416,108 @@ entry: ret %a } + +define @intrinsic_vmsgt_vi_nxv1i64_i64( %0, i32 %1) nounwind { +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv1i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vmsgt.vi v0, v8, 9 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgt.nxv1i64.i64( + %0, + i64 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv1i64_i64( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv1i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 +; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsgt.vi v25, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgt.mask.nxv1i64.i64( + %0, + %1, + i64 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv2i64_i64( %0, i32 %1) nounwind { +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv2i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vmsgt.vi v0, v8, 9 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgt.nxv2i64.i64( + %0, + i64 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv2i64_i64( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 +; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgt.vi v25, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgt.mask.nxv2i64.i64( + %0, + %1, + i64 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgt_vi_nxv4i64_i64( %0, i32 %1) nounwind { +; CHECK-LABEL: intrinsic_vmsgt_vi_nxv4i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vmsgt.vi v0, v8, 9 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgt.nxv4i64.i64( + %0, + i64 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgt_mask_vi_nxv4i64_i64( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 +; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgt.vi v25, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgt.mask.nxv4i64.i64( + %0, + %1, + i64 9, + %2, + i32 %3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv64.ll index 9dc6bc0c..fcbb58f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ ; RUN: --riscv-no-aliases < %s | FileCheck %s declare @llvm.riscv.vmsgt.nxv1i8.i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll index 3119ebd..35b01e9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ ; RUN: --riscv-no-aliases < %s | FileCheck %s declare @llvm.riscv.vmsgtu.nxv1i8.i8( , @@ -706,6 +706,192 @@ entry: ret %a } +declare @llvm.riscv.vmsgtu.nxv1i64.i64( + , + i64, + i32); + +define @intrinsic_vmsgtu_vx_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv1i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsll.vx v25, v25, a1 +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vsll.vx v26, v26, a1 +; CHECK-NEXT: vsrl.vx v26, v26, a1 +; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: vmsltu.vv v0, v25, v8 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgtu.nxv1i64.i64( + %0, + i64 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv1i64.i64( + , + , + i64, + , + i32); + +define @intrinsic_vmsgtu_mask_vx_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 +; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsll.vx v26, v26, a1 +; CHECK-NEXT: vmv.v.x v27, a0 +; CHECK-NEXT: vsll.vx v27, v27, a1 +; CHECK-NEXT: vsrl.vx v27, v27, a1 +; CHECK-NEXT: vor.vv v26, v27, v26 +; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsltu.vv v25, v26, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgtu.mask.nxv1i64.i64( + %0, + %1, + i64 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv2i64.i64( + , + i64, + i32); + +define @intrinsic_vmsgtu_vx_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv2i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsll.vx v26, v26, a1 +; CHECK-NEXT: vmv.v.x v28, a0 +; CHECK-NEXT: vsll.vx v28, v28, a1 +; CHECK-NEXT: vsrl.vx v28, v28, a1 +; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: vmsltu.vv v0, v26, v8 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgtu.nxv2i64.i64( + %0, + i64 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv2i64.i64( + , + , + i64, + , + i32); + +define @intrinsic_vmsgtu_mask_vx_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 +; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsll.vx v26, v26, a1 +; CHECK-NEXT: vmv.v.x v28, a0 +; CHECK-NEXT: vsll.vx v28, v28, a1 +; CHECK-NEXT: vsrl.vx v28, v28, a1 +; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsltu.vv v25, v26, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgtu.mask.nxv2i64.i64( + %0, + %1, + i64 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmsgtu.nxv4i64.i64( + , + i64, + i32); + +define @intrinsic_vmsgtu_vx_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv4i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu +; CHECK-NEXT: vmv.v.x v28, a1 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsll.vx v28, v28, a1 +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vsll.vx v12, v12, a1 +; CHECK-NEXT: vsrl.vx v12, v12, a1 +; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: vmsltu.vv v0, v28, v8 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgtu.nxv4i64.i64( + %0, + i64 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmsgtu.mask.nxv4i64.i64( + , + , + i64, + , + i32); + +define @intrinsic_vmsgtu_mask_vx_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 +; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu +; CHECK-NEXT: vmv.v.x v28, a1 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsll.vx v28, v28, a1 +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vsll.vx v16, v16, a1 +; CHECK-NEXT: vsrl.vx v16, v16, a1 +; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsltu.vv v25, v28, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgtu.mask.nxv4i64.i64( + %0, + %1, + i64 %2, + %3, + i32 %4) + + ret %a +} + define @intrinsic_vmsgtu_vi_nxv1i8_i8( %0, i32 %1) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry @@ -1230,3 +1416,108 @@ entry: ret %a } + +define @intrinsic_vmsgtu_vi_nxv1i64_i64( %0, i32 %1) nounwind { +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv1i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vmsgtu.vi v0, v8, 9 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgtu.nxv1i64.i64( + %0, + i64 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv1i64_i64( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv1i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 +; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsgtu.vi v25, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgtu.mask.nxv1i64.i64( + %0, + %1, + i64 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv2i64_i64( %0, i32 %1) nounwind { +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv2i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vmsgtu.vi v0, v8, 9 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgtu.nxv2i64.i64( + %0, + i64 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv2i64_i64( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 +; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgtu.vi v25, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgtu.mask.nxv2i64.i64( + %0, + %1, + i64 9, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vmsgtu_vi_nxv4i64_i64( %0, i32 %1) nounwind { +; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv4i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vmsgtu.vi v0, v8, 9 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgtu.nxv4i64.i64( + %0, + i64 9, + i32 %1) + + ret %a +} + +define @intrinsic_vmsgtu_mask_vi_nxv4i64_i64( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 +; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgtu.vi v25, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vmsgtu.mask.nxv4i64.i64( + %0, + %1, + i64 9, + %2, + i32 %3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv64.ll index 231d578..ecb29a1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ ; RUN: --riscv-no-aliases < %s | FileCheck %s declare @llvm.riscv.vmsgtu.nxv1i8.i8( , -- 2.7.4