From b8701990202cd305236401f76331d53e65953ef2 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 4 Feb 2021 12:07:59 +0000 Subject: [PATCH] [RISCV] Add patterns for scalable-vector fabs & fcopysign The patterns mostly follow the scalar counterparts, save for some extra optimizations to match the vector/scalar forms. The patch adds a DAGCombine for ISD::FCOPYSIGN to try and reorder ISD::FNEG around any ISD::FP_EXTEND or ISD::FP_TRUNC of the second operand. This helps us achieve better codegen to match vfsgnjn. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D96028 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 27 + llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td | 23 +- llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll | 185 +++ llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll | 1465 ++++++++++++++++++++ 4 files changed, 1699 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 2a9ea89..e772246 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -497,6 +497,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); + setOperationAction(ISD::FCOPYSIGN, VT, Legal); }; if (Subtarget.hasStdExtZfh()) @@ -604,6 +605,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.hasStdExtZbp()) { setTargetDAGCombine(ISD::OR); } + if (Subtarget.hasStdExtV()) + setTargetDAGCombine(ISD::FCOPYSIGN); } EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, @@ -2966,6 +2969,30 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, } break; } + case ISD::FCOPYSIGN: { + EVT VT = N->getValueType(0); + if (!VT.isVector()) + break; + // There is a form of VFSGNJ which injects the negated sign of its second + // operand. Try and bubble any FNEG up after the extend/round to produce + // this optimized pattern. Avoid modifying cases where FP_ROUND and + // TRUNC=1. + SDValue In2 = N->getOperand(1); + // Avoid cases where the extend/round has multiple uses, as duplicating + // those is typically more expensive than removing a fneg. + if (!In2.hasOneUse()) + break; + if (In2.getOpcode() != ISD::FP_EXTEND && + (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0)) + break; + In2 = In2.getOperand(0); + if (In2.getOpcode() != ISD::FNEG) + break; + SDLoc DL(N); + SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT); + return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0), + DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound)); + } } return SDValue(); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 7424910..2c845af 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -642,11 +642,32 @@ foreach vti = AllFloatVectors in { (!cast("PseudoVFSQRT_V_"# vti.LMul.MX) vti.RegClass:$rs2, vti.AVL, vti.SEW)>; - // 14.10. Vector Floating-Point Sign-Injection Instructions + // 14.12. Vector Floating-Point Sign-Injection Instructions + def : Pat<(fabs (vti.Vector vti.RegClass:$rs)), + (!cast("PseudoVFSGNJX_VV_"# vti.LMul.MX) + vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.SEW)>; // Handle fneg with VFSGNJN using the same input for both operands. def : Pat<(fneg (vti.Vector vti.RegClass:$rs)), (!cast("PseudoVFSGNJN_VV_"# vti.LMul.MX) vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.SEW)>; + + def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), + (vti.Vector vti.RegClass:$rs2))), + (!cast("PseudoVFSGNJ_VV_"# vti.LMul.MX) + vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.SEW)>; + def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), + (vti.Vector (splat_vector vti.ScalarRegClass:$rs2)))), + (!cast("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) + vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.SEW)>; + + def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), + (vti.Vector (fneg vti.RegClass:$rs2)))), + (!cast("PseudoVFSGNJN_VV_"# vti.LMul.MX) + vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.SEW)>; + def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), + (vti.Vector (fneg (splat_vector vti.ScalarRegClass:$rs2))))), + (!cast("PseudoVFSGNJN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) + vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.SEW)>; } // 14.11. Vector Floating-Point Compare Instructions diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll new file mode 100644 index 0000000..314a757 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll @@ -0,0 +1,185 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +declare @llvm.fabs.nxv1f16() + +define @vfabs_nxv1f16( %v) { +; CHECK-LABEL: vfabs_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfsgnjx.vv v8, v8, v8 +; CHECK-NEXT: ret + %r = call @llvm.fabs.nxv1f16( %v) + ret %r +} + +declare @llvm.fabs.nxv2f16() + +define @vfabs_nxv2f16( %v) { +; CHECK-LABEL: vfabs_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfsgnjx.vv v8, v8, v8 +; CHECK-NEXT: ret + %r = call @llvm.fabs.nxv2f16( %v) + ret %r +} + +declare @llvm.fabs.nxv4f16() + +define @vfabs_nxv4f16( %v) { +; CHECK-LABEL: vfabs_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfsgnjx.vv v8, v8, v8 +; CHECK-NEXT: ret + %r = call @llvm.fabs.nxv4f16( %v) + ret %r +} + +declare @llvm.fabs.nxv8f16() + +define @vfabs_nxv8f16( %v) { +; CHECK-LABEL: vfabs_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfsgnjx.vv v8, v8, v8 +; CHECK-NEXT: ret + %r = call @llvm.fabs.nxv8f16( %v) + ret %r +} + +declare @llvm.fabs.nxv16f16() + +define @vfabs_nxv16f16( %v) { +; CHECK-LABEL: vfabs_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu +; CHECK-NEXT: vfsgnjx.vv v8, v8, v8 +; CHECK-NEXT: ret + %r = call @llvm.fabs.nxv16f16( %v) + ret %r +} + +declare @llvm.fabs.nxv32f16() + +define @vfabs_nxv32f16( %v) { +; CHECK-LABEL: vfabs_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: vfsgnjx.vv v8, v8, v8 +; CHECK-NEXT: ret + %r = call @llvm.fabs.nxv32f16( %v) + ret %r +} + +declare @llvm.fabs.nxv1f32() + +define @vfabs_nxv1f32( %v) { +; CHECK-LABEL: vfabs_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfsgnjx.vv v8, v8, v8 +; CHECK-NEXT: ret + %r = call @llvm.fabs.nxv1f32( %v) + ret %r +} + +declare @llvm.fabs.nxv2f32() + +define @vfabs_nxv2f32( %v) { +; CHECK-LABEL: vfabs_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfsgnjx.vv v8, v8, v8 +; CHECK-NEXT: ret + %r = call @llvm.fabs.nxv2f32( %v) + ret %r +} + +declare @llvm.fabs.nxv4f32() + +define @vfabs_nxv4f32( %v) { +; CHECK-LABEL: vfabs_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vfsgnjx.vv v8, v8, v8 +; CHECK-NEXT: ret + %r = call @llvm.fabs.nxv4f32( %v) + ret %r +} + +declare @llvm.fabs.nxv8f32() + +define @vfabs_nxv8f32( %v) { +; CHECK-LABEL: vfabs_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfsgnjx.vv v8, v8, v8 +; CHECK-NEXT: ret + %r = call @llvm.fabs.nxv8f32( %v) + ret %r +} + +declare @llvm.fabs.nxv16f32() + +define @vfabs_nxv16f32( %v) { +; CHECK-LABEL: vfabs_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: vfsgnjx.vv v8, v8, v8 +; CHECK-NEXT: ret + %r = call @llvm.fabs.nxv16f32( %v) + ret %r +} + +declare @llvm.fabs.nxv1f64() + +define @vfabs_nxv1f64( %v) { +; CHECK-LABEL: vfabs_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfsgnjx.vv v8, v8, v8 +; CHECK-NEXT: ret + %r = call @llvm.fabs.nxv1f64( %v) + ret %r +} + +declare @llvm.fabs.nxv2f64() + +define @vfabs_nxv2f64( %v) { +; CHECK-LABEL: vfabs_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vfsgnjx.vv v8, v8, v8 +; CHECK-NEXT: ret + %r = call @llvm.fabs.nxv2f64( %v) + ret %r +} + +declare @llvm.fabs.nxv4f64() + +define @vfabs_nxv4f64( %v) { +; CHECK-LABEL: vfabs_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vfsgnjx.vv v8, v8, v8 +; CHECK-NEXT: ret + %r = call @llvm.fabs.nxv4f64( %v) + ret %r +} + +declare @llvm.fabs.nxv8f64() + +define @vfabs_nxv8f64( %v) { +; CHECK-LABEL: vfabs_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfsgnjx.vv v8, v8, v8 +; CHECK-NEXT: ret + %r = call @llvm.fabs.nxv8f64( %v) + ret %r +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll new file mode 100644 index 0000000..9c7f426e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll @@ -0,0 +1,1465 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +declare @llvm.copysign.nxv1f16(, ) + +define @vfcopysign_vv_nxv1f16( %vm, %vs) { +; CHECK-LABEL: vfcopysign_vv_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: ret + %r = call @llvm.copysign.nxv1f16( %vm, %vs) + ret %r +} + +define @vfcopysign_vf_nxv1f16( %vm, half %s) { +; CHECK-LABEL: vfcopysign_vf_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, half %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %r = call @llvm.copysign.nxv1f16( %vm, %splat) + ret %r +} + +define @vfcopynsign_vv_nxv1f16( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_vv_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 +; CHECK-NEXT: ret + %n = fneg %vs + %r = call @llvm.copysign.nxv1f16( %vm, %n) + ret %r +} + +define @vfcopynsign_vf_nxv1f16( %vm, half %s) { +; CHECK-LABEL: vfcopynsign_vf_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, half %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %r = call @llvm.copysign.nxv1f16( %vm, %n) + ret %r +} + +define @vfcopysign_exttrunc_vv_nxv1f16_nxv1f32( %vm, %vs) { +; CHECK-LABEL: vfcopysign_exttrunc_vv_nxv1f16_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v25, v9 +; CHECK-NEXT: vfsgnj.vv v8, v8, v25 +; CHECK-NEXT: ret + %e = fptrunc %vs to + %r = call @llvm.copysign.nxv1f16( %vm, %e) + ret %r +} + +define @vfcopysign_exttrunc_vf_nxv1f16_nxv1f32( %vm, float %s) { +; CHECK-LABEL: vfcopysign_exttrunc_vf_nxv1f16_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v25 +; CHECK-NEXT: vfsgnj.vv v8, v8, v26 +; CHECK-NEXT: ret + %head = insertelement undef, float %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %esplat = fptrunc %splat to + %r = call @llvm.copysign.nxv1f16( %vm, %esplat) + ret %r +} + +define @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v25, v9 +; CHECK-NEXT: vfsgnjn.vv v8, v8, v25 +; CHECK-NEXT: ret + %n = fneg %vs + %eneg = fptrunc %n to + %r = call @llvm.copysign.nxv1f16( %vm, %eneg) + ret %r +} + +define @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f32( %vm, float %s) { +; CHECK-LABEL: vfcopynsign_exttrunc_vf_nxv1f16_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v25 +; CHECK-NEXT: vfsgnjn.vv v8, v8, v26 +; CHECK-NEXT: ret + %head = insertelement undef, float %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %eneg = fptrunc %n to + %r = call @llvm.copysign.nxv1f16( %vm, %eneg) + ret %r +} + +define @vfcopysign_exttrunc_vv_nxv1f16_nxv1f64( %vm, %vs) { +; CHECK-LABEL: vfcopysign_exttrunc_vv_nxv1f16_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfncvt.rod.f.f.w v25, v9 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v25 +; CHECK-NEXT: vfsgnj.vv v8, v8, v26 +; CHECK-NEXT: ret + %e = fptrunc %vs to + %r = call @llvm.copysign.nxv1f16( %vm, %e) + ret %r +} + +define @vfcopysign_exttrunc_vf_nxv1f16_nxv1f64( %vm, double %s) { +; CHECK-LABEL: vfcopysign_exttrunc_vf_nxv1f16_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfncvt.rod.f.f.w v26, v25 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v25, v26 +; CHECK-NEXT: vfsgnj.vv v8, v8, v25 +; CHECK-NEXT: ret + %head = insertelement undef, double %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %esplat = fptrunc %splat to + %r = call @llvm.copysign.nxv1f16( %vm, %esplat) + ret %r +} + +define @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfncvt.rod.f.f.w v25, v9 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v25 +; CHECK-NEXT: vfsgnjn.vv v8, v8, v26 +; CHECK-NEXT: ret + %n = fneg %vs + %eneg = fptrunc %n to + %r = call @llvm.copysign.nxv1f16( %vm, %eneg) + ret %r +} + +define @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f64( %vm, double %s) { +; CHECK-LABEL: vfcopynsign_exttrunc_vf_nxv1f16_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfncvt.rod.f.f.w v26, v25 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v25, v26 +; CHECK-NEXT: vfsgnjn.vv v8, v8, v25 +; CHECK-NEXT: ret + %head = insertelement undef, double %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %eneg = fptrunc %n to + %r = call @llvm.copysign.nxv1f16( %vm, %eneg) + ret %r +} + +declare @llvm.copysign.nxv2f16(, ) + +define @vfcopysign_vv_nxv2f16( %vm, %vs) { +; CHECK-LABEL: vfcopysign_vv_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: ret + %r = call @llvm.copysign.nxv2f16( %vm, %vs) + ret %r +} + +define @vfcopysign_vf_nxv2f16( %vm, half %s) { +; CHECK-LABEL: vfcopysign_vf_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, half %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %r = call @llvm.copysign.nxv2f16( %vm, %splat) + ret %r +} + +define @vfcopynsign_vv_nxv2f16( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_vv_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 +; CHECK-NEXT: ret + %n = fneg %vs + %r = call @llvm.copysign.nxv2f16( %vm, %n) + ret %r +} + +define @vfcopynsign_vf_nxv2f16( %vm, half %s) { +; CHECK-LABEL: vfcopynsign_vf_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, half %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %r = call @llvm.copysign.nxv2f16( %vm, %n) + ret %r +} + +declare @llvm.copysign.nxv4f16(, ) + +define @vfcopysign_vv_nxv4f16( %vm, %vs) { +; CHECK-LABEL: vfcopysign_vv_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: ret + %r = call @llvm.copysign.nxv4f16( %vm, %vs) + ret %r +} + +define @vfcopysign_vf_nxv4f16( %vm, half %s) { +; CHECK-LABEL: vfcopysign_vf_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, half %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %r = call @llvm.copysign.nxv4f16( %vm, %splat) + ret %r +} + +define @vfcopynsign_vv_nxv4f16( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_vv_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 +; CHECK-NEXT: ret + %n = fneg %vs + %r = call @llvm.copysign.nxv4f16( %vm, %n) + ret %r +} + +define @vfcopynsign_vf_nxv4f16( %vm, half %s) { +; CHECK-LABEL: vfcopynsign_vf_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, half %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %r = call @llvm.copysign.nxv4f16( %vm, %n) + ret %r +} + +declare @llvm.copysign.nxv8f16(, ) + +define @vfcopysign_vv_nxv8f16( %vm, %vs) { +; CHECK-LABEL: vfcopysign_vv_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v10 +; CHECK-NEXT: ret + %r = call @llvm.copysign.nxv8f16( %vm, %vs) + ret %r +} + +define @vfcopysign_vf_nxv8f16( %vm, half %s) { +; CHECK-LABEL: vfcopysign_vf_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, half %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %r = call @llvm.copysign.nxv8f16( %vm, %splat) + ret %r +} + +define @vfcopynsign_vv_nxv8f16( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_vv_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v10 +; CHECK-NEXT: ret + %n = fneg %vs + %r = call @llvm.copysign.nxv8f16( %vm, %n) + ret %r +} + +define @vfcopynsign_vf_nxv8f16( %vm, half %s) { +; CHECK-LABEL: vfcopynsign_vf_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, half %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %r = call @llvm.copysign.nxv8f16( %vm, %n) + ret %r +} + +define @vfcopysign_exttrunc_vv_nxv8f16_nxv8f32( %vm, %vs) { +; CHECK-LABEL: vfcopysign_exttrunc_vv_nxv8f16_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v12 +; CHECK-NEXT: vfsgnj.vv v8, v8, v26 +; CHECK-NEXT: ret + %e = fptrunc %vs to + %r = call @llvm.copysign.nxv8f16( %vm, %e) + ret %r +} + +define @vfcopysign_exttrunc_vf_nxv8f16_nxv8f32( %vm, float %s) { +; CHECK-LABEL: vfcopysign_exttrunc_vf_nxv8f16_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfmv.v.f v28, fa0 +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v28 +; CHECK-NEXT: vfsgnj.vv v8, v8, v26 +; CHECK-NEXT: ret + %head = insertelement undef, float %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %esplat = fptrunc %splat to + %r = call @llvm.copysign.nxv8f16( %vm, %esplat) + ret %r +} + +define @vfcopynsign_exttrunc_vv_nxv8f16_nxv8f32( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_exttrunc_vv_nxv8f16_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v12 +; CHECK-NEXT: vfsgnjn.vv v8, v8, v26 +; CHECK-NEXT: ret + %n = fneg %vs + %eneg = fptrunc %n to + %r = call @llvm.copysign.nxv8f16( %vm, %eneg) + ret %r +} + +define @vfcopynsign_exttrunc_vf_nxv8f16_nxv8f32( %vm, float %s) { +; CHECK-LABEL: vfcopynsign_exttrunc_vf_nxv8f16_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfmv.v.f v28, fa0 +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v28 +; CHECK-NEXT: vfsgnjn.vv v8, v8, v26 +; CHECK-NEXT: ret + %head = insertelement undef, float %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %eneg = fptrunc %n to + %r = call @llvm.copysign.nxv8f16( %vm, %eneg) + ret %r +} + +define @vfcopysign_exttrunc_vv_nxv8f16_nxv8f64( %vm, %vs) { +; CHECK-LABEL: vfcopysign_exttrunc_vv_nxv8f16_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfncvt.rod.f.f.w v28, v16 +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v28 +; CHECK-NEXT: vfsgnj.vv v8, v8, v26 +; CHECK-NEXT: ret + %e = fptrunc %vs to + %r = call @llvm.copysign.nxv8f16( %vm, %e) + ret %r +} + +define @vfcopysign_exttrunc_vf_nxv8f16_nxv8f64( %vm, double %s) { +; CHECK-LABEL: vfcopysign_exttrunc_vf_nxv8f16_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfmv.v.f v16, fa0 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfncvt.rod.f.f.w v28, v16 +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v28 +; CHECK-NEXT: vfsgnj.vv v8, v8, v26 +; CHECK-NEXT: ret + %head = insertelement undef, double %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %esplat = fptrunc %splat to + %r = call @llvm.copysign.nxv8f16( %vm, %esplat) + ret %r +} + +define @vfcopynsign_exttrunc_vv_nxv8f16_nxv8f64( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_exttrunc_vv_nxv8f16_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfncvt.rod.f.f.w v28, v16 +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v28 +; CHECK-NEXT: vfsgnjn.vv v8, v8, v26 +; CHECK-NEXT: ret + %n = fneg %vs + %eneg = fptrunc %n to + %r = call @llvm.copysign.nxv8f16( %vm, %eneg) + ret %r +} + +define @vfcopynsign_exttrunc_vf_nxv8f16_nxv8f64( %vm, double %s) { +; CHECK-LABEL: vfcopynsign_exttrunc_vf_nxv8f16_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfmv.v.f v16, fa0 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfncvt.rod.f.f.w v28, v16 +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v28 +; CHECK-NEXT: vfsgnjn.vv v8, v8, v26 +; CHECK-NEXT: ret + %head = insertelement undef, double %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %eneg = fptrunc %n to + %r = call @llvm.copysign.nxv8f16( %vm, %eneg) + ret %r +} + +declare @llvm.copysign.nxv16f16(, ) + +define @vfcopysign_vv_nxv16f16( %vm, %vs) { +; CHECK-LABEL: vfcopysign_vv_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v12 +; CHECK-NEXT: ret + %r = call @llvm.copysign.nxv16f16( %vm, %vs) + ret %r +} + +define @vfcopysign_vf_nxv16f16( %vm, half %s) { +; CHECK-LABEL: vfcopysign_vf_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, half %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %r = call @llvm.copysign.nxv16f16( %vm, %splat) + ret %r +} + +define @vfcopynsign_vv_nxv16f16( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_vv_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v12 +; CHECK-NEXT: ret + %n = fneg %vs + %r = call @llvm.copysign.nxv16f16( %vm, %n) + ret %r +} + +define @vfcopynsign_vf_nxv16f16( %vm, half %s) { +; CHECK-LABEL: vfcopynsign_vf_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, half %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %r = call @llvm.copysign.nxv16f16( %vm, %n) + ret %r +} + +declare @llvm.copysign.nxv32f16(, ) + +define @vfcopysign_vv_nxv32f16( %vm, %vs) { +; CHECK-LABEL: vfcopysign_vv_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v16 +; CHECK-NEXT: ret + %r = call @llvm.copysign.nxv32f16( %vm, %vs) + ret %r +} + +define @vfcopysign_vf_nxv32f16( %vm, half %s) { +; CHECK-LABEL: vfcopysign_vf_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, half %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %r = call @llvm.copysign.nxv32f16( %vm, %splat) + ret %r +} + +define @vfcopynsign_vv_nxv32f16( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_vv_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v16 +; CHECK-NEXT: ret + %n = fneg %vs + %r = call @llvm.copysign.nxv32f16( %vm, %n) + ret %r +} + +define @vfcopynsign_vf_nxv32f16( %vm, half %s) { +; CHECK-LABEL: vfcopynsign_vf_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, half %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %r = call @llvm.copysign.nxv32f16( %vm, %n) + ret %r +} + +declare @llvm.copysign.nxv1f32(, ) + +define @vfcopysign_vv_nxv1f32( %vm, %vs) { +; CHECK-LABEL: vfcopysign_vv_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: ret + %r = call @llvm.copysign.nxv1f32( %vm, %vs) + ret %r +} + +define @vfcopysign_vf_nxv1f32( %vm, float %s) { +; CHECK-LABEL: vfcopysign_vf_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, float %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %r = call @llvm.copysign.nxv1f32( %vm, %splat) + ret %r +} + +define @vfcopynsign_vv_nxv1f32( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_vv_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 +; CHECK-NEXT: ret + %n = fneg %vs + %r = call @llvm.copysign.nxv1f32( %vm, %n) + ret %r +} + +define @vfcopynsign_vf_nxv1f32( %vm, float %s) { +; CHECK-LABEL: vfcopynsign_vf_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, float %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %r = call @llvm.copysign.nxv1f32( %vm, %n) + ret %r +} + +define @vfcopysign_exttrunc_vv_nxv1f32_nxv1f16( %vm, %vs) { +; CHECK-LABEL: vfcopysign_exttrunc_vv_nxv1f32_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v25, v9 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v25 +; CHECK-NEXT: ret + %e = fpext %vs to + %r = call @llvm.copysign.nxv1f32( %vm, %e) + ret %r +} + +define @vfcopysign_exttrunc_vf_nxv1f32_nxv1f16( %vm, half %s) { +; CHECK-LABEL: vfcopysign_exttrunc_vf_nxv1f32_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vfwcvt.f.f.v v26, v25 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v26 +; CHECK-NEXT: ret + %head = insertelement undef, half %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %esplat = fpext %splat to + %r = call @llvm.copysign.nxv1f32( %vm, %esplat) + ret %r +} + +define @vfcopynsign_exttrunc_vv_nxv1f32_nxv1f16( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_exttrunc_vv_nxv1f32_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v25, v9 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v25 +; CHECK-NEXT: ret + %n = fneg %vs + %eneg = fpext %n to + %r = call @llvm.copysign.nxv1f32( %vm, %eneg) + ret %r +} + +define @vfcopynsign_exttrunc_vf_nxv1f32_nxv1f16( %vm, half %s) { +; CHECK-LABEL: vfcopynsign_exttrunc_vf_nxv1f32_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vfwcvt.f.f.v v26, v25 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v26 +; CHECK-NEXT: ret + %head = insertelement undef, half %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %eneg = fpext %n to + %r = call @llvm.copysign.nxv1f32( %vm, %eneg) + ret %r +} + +define @vfcopysign_exttrunc_vv_nxv1f32_nxv1f64( %vm, %vs) { +; CHECK-LABEL: vfcopysign_exttrunc_vv_nxv1f32_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v25, v9 +; CHECK-NEXT: vfsgnj.vv v8, v8, v25 +; CHECK-NEXT: ret + %e = fptrunc %vs to + %r = call @llvm.copysign.nxv1f32( %vm, %e) + ret %r +} + +define @vfcopysign_exttrunc_vf_nxv1f32_nxv1f64( %vm, double %s) { +; CHECK-LABEL: vfcopysign_exttrunc_vf_nxv1f32_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v25 +; CHECK-NEXT: vfsgnj.vv v8, v8, v26 +; CHECK-NEXT: ret + %head = insertelement undef, double %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %esplat = fptrunc %splat to + %r = call @llvm.copysign.nxv1f32( %vm, %esplat) + ret %r +} + +define @vfcopynsign_exttrunc_vv_nxv1f32_nxv1f64( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_exttrunc_vv_nxv1f32_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v25, v9 +; CHECK-NEXT: vfsgnjn.vv v8, v8, v25 +; CHECK-NEXT: ret + %n = fneg %vs + %eneg = fptrunc %n to + %r = call @llvm.copysign.nxv1f32( %vm, %eneg) + ret %r +} + +define @vfcopynsign_exttrunc_vf_nxv1f32_nxv1f64( %vm, double %s) { +; CHECK-LABEL: vfcopynsign_exttrunc_vf_nxv1f32_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v25 +; CHECK-NEXT: vfsgnjn.vv v8, v8, v26 +; CHECK-NEXT: ret + %head = insertelement undef, double %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %eneg = fptrunc %n to + %r = call @llvm.copysign.nxv1f32( %vm, %eneg) + ret %r +} + +declare @llvm.copysign.nxv2f32(, ) + +define @vfcopysign_vv_nxv2f32( %vm, %vs) { +; CHECK-LABEL: vfcopysign_vv_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: ret + %r = call @llvm.copysign.nxv2f32( %vm, %vs) + ret %r +} + +define @vfcopysign_vf_nxv2f32( %vm, float %s) { +; CHECK-LABEL: vfcopysign_vf_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, float %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %r = call @llvm.copysign.nxv2f32( %vm, %splat) + ret %r +} + +define @vfcopynsign_vv_nxv2f32( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_vv_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 +; CHECK-NEXT: ret + %n = fneg %vs + %r = call @llvm.copysign.nxv2f32( %vm, %n) + ret %r +} + +define @vfcopynsign_vf_nxv2f32( %vm, float %s) { +; CHECK-LABEL: vfcopynsign_vf_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, float %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %r = call @llvm.copysign.nxv2f32( %vm, %n) + ret %r +} + +declare @llvm.copysign.nxv4f32(, ) + +define @vfcopysign_vv_nxv4f32( %vm, %vs) { +; CHECK-LABEL: vfcopysign_vv_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v10 +; CHECK-NEXT: ret + %r = call @llvm.copysign.nxv4f32( %vm, %vs) + ret %r +} + +define @vfcopysign_vf_nxv4f32( %vm, float %s) { +; CHECK-LABEL: vfcopysign_vf_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, float %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %r = call @llvm.copysign.nxv4f32( %vm, %splat) + ret %r +} + +define @vfcopynsign_vv_nxv4f32( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_vv_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v10 +; CHECK-NEXT: ret + %n = fneg %vs + %r = call @llvm.copysign.nxv4f32( %vm, %n) + ret %r +} + +define @vfcopynsign_vf_nxv4f32( %vm, float %s) { +; CHECK-LABEL: vfcopynsign_vf_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, float %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %r = call @llvm.copysign.nxv4f32( %vm, %n) + ret %r +} + +declare @llvm.copysign.nxv8f32(, ) + +define @vfcopysign_vv_nxv8f32( %vm, %vs) { +; CHECK-LABEL: vfcopysign_vv_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v12 +; CHECK-NEXT: ret + %r = call @llvm.copysign.nxv8f32( %vm, %vs) + ret %r +} + +define @vfcopysign_vf_nxv8f32( %vm, float %s) { +; CHECK-LABEL: vfcopysign_vf_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, float %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %r = call @llvm.copysign.nxv8f32( %vm, %splat) + ret %r +} + +define @vfcopynsign_vv_nxv8f32( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_vv_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v12 +; CHECK-NEXT: ret + %n = fneg %vs + %r = call @llvm.copysign.nxv8f32( %vm, %n) + ret %r +} + +define @vfcopynsign_vf_nxv8f32( %vm, float %s) { +; CHECK-LABEL: vfcopynsign_vf_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, float %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %r = call @llvm.copysign.nxv8f32( %vm, %n) + ret %r +} + +define @vfcopysign_exttrunc_vv_nxv8f32_nxv8f16( %vm, %vs) { +; CHECK-LABEL: vfcopysign_exttrunc_vv_nxv8f32_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v28, v12 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v28 +; CHECK-NEXT: ret + %e = fpext %vs to + %r = call @llvm.copysign.nxv8f32( %vm, %e) + ret %r +} + +define @vfcopysign_exttrunc_vf_nxv8f32_nxv8f16( %vm, half %s) { +; CHECK-LABEL: vfcopysign_exttrunc_vf_nxv8f32_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfmv.v.f v26, fa0 +; CHECK-NEXT: vfwcvt.f.f.v v28, v26 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v28 +; CHECK-NEXT: ret + %head = insertelement undef, half %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %esplat = fpext %splat to + %r = call @llvm.copysign.nxv8f32( %vm, %esplat) + ret %r +} + +define @vfcopynsign_exttrunc_vv_nxv8f32_nxv8f16( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_exttrunc_vv_nxv8f32_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v28, v12 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v28 +; CHECK-NEXT: ret + %n = fneg %vs + %eneg = fpext %n to + %r = call @llvm.copysign.nxv8f32( %vm, %eneg) + ret %r +} + +define @vfcopynsign_exttrunc_vf_nxv8f32_nxv8f16( %vm, half %s) { +; CHECK-LABEL: vfcopynsign_exttrunc_vf_nxv8f32_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfmv.v.f v26, fa0 +; CHECK-NEXT: vfwcvt.f.f.v v28, v26 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v28 +; CHECK-NEXT: ret + %head = insertelement undef, half %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %eneg = fpext %n to + %r = call @llvm.copysign.nxv8f32( %vm, %eneg) + ret %r +} + +define @vfcopysign_exttrunc_vv_nxv8f32_nxv8f64( %vm, %vs) { +; CHECK-LABEL: vfcopysign_exttrunc_vv_nxv8f32_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v28, v16 +; CHECK-NEXT: vfsgnj.vv v8, v8, v28 +; CHECK-NEXT: ret + %e = fptrunc %vs to + %r = call @llvm.copysign.nxv8f32( %vm, %e) + ret %r +} + +define @vfcopysign_exttrunc_vf_nxv8f32_nxv8f64( %vm, double %s) { +; CHECK-LABEL: vfcopysign_exttrunc_vf_nxv8f32_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfmv.v.f v16, fa0 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v28, v16 +; CHECK-NEXT: vfsgnj.vv v8, v8, v28 +; CHECK-NEXT: ret + %head = insertelement undef, double %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %esplat = fptrunc %splat to + %r = call @llvm.copysign.nxv8f32( %vm, %esplat) + ret %r +} + +define @vfcopynsign_exttrunc_vv_nxv8f32_nxv8f64( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_exttrunc_vv_nxv8f32_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v28, v16 +; CHECK-NEXT: vfsgnjn.vv v8, v8, v28 +; CHECK-NEXT: ret + %n = fneg %vs + %eneg = fptrunc %n to + %r = call @llvm.copysign.nxv8f32( %vm, %eneg) + ret %r +} + +define @vfcopynsign_exttrunc_vf_nxv8f32_nxv8f64( %vm, double %s) { +; CHECK-LABEL: vfcopynsign_exttrunc_vf_nxv8f32_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfmv.v.f v16, fa0 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v28, v16 +; CHECK-NEXT: vfsgnjn.vv v8, v8, v28 +; CHECK-NEXT: ret + %head = insertelement undef, double %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %eneg = fptrunc %n to + %r = call @llvm.copysign.nxv8f32( %vm, %eneg) + ret %r +} + +declare @llvm.copysign.nxv16f32(, ) + +define @vfcopysign_vv_nxv16f32( %vm, %vs) { +; CHECK-LABEL: vfcopysign_vv_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v16 +; CHECK-NEXT: ret + %r = call @llvm.copysign.nxv16f32( %vm, %vs) + ret %r +} + +define @vfcopysign_vf_nxv16f32( %vm, float %s) { +; CHECK-LABEL: vfcopysign_vf_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, float %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %r = call @llvm.copysign.nxv16f32( %vm, %splat) + ret %r +} + +define @vfcopynsign_vv_nxv16f32( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_vv_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v16 +; CHECK-NEXT: ret + %n = fneg %vs + %r = call @llvm.copysign.nxv16f32( %vm, %n) + ret %r +} + +define @vfcopynsign_vf_nxv16f32( %vm, float %s) { +; CHECK-LABEL: vfcopynsign_vf_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, float %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %r = call @llvm.copysign.nxv16f32( %vm, %n) + ret %r +} + +declare @llvm.copysign.nxv1f64(, ) + +define @vfcopysign_vv_nxv1f64( %vm, %vs) { +; CHECK-LABEL: vfcopysign_vv_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: ret + %r = call @llvm.copysign.nxv1f64( %vm, %vs) + ret %r +} + +define @vfcopysign_vf_nxv1f64( %vm, double %s) { +; CHECK-LABEL: vfcopysign_vf_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, double %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %r = call @llvm.copysign.nxv1f64( %vm, %splat) + ret %r +} + +define @vfcopynsign_vv_nxv1f64( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_vv_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 +; CHECK-NEXT: ret + %n = fneg %vs + %r = call @llvm.copysign.nxv1f64( %vm, %n) + ret %r +} + +define @vfcopynsign_vf_nxv1f64( %vm, double %s) { +; CHECK-LABEL: vfcopynsign_vf_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, double %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %r = call @llvm.copysign.nxv1f64( %vm, %n) + ret %r +} + +define @vfcopysign_exttrunc_vv_nxv1f64_nxv1f16( %vm, %vs) { +; CHECK-LABEL: vfcopysign_exttrunc_vv_nxv1f64_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v25, v9 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v26, v25 +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v26 +; CHECK-NEXT: ret + %e = fpext %vs to + %r = call @llvm.copysign.nxv1f64( %vm, %e) + ret %r +} + +define @vfcopysign_exttrunc_vf_nxv1f64_nxv1f16( %vm, half %s) { +; CHECK-LABEL: vfcopysign_exttrunc_vf_nxv1f64_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vfwcvt.f.f.v v26, v25 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v25, v26 +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v25 +; CHECK-NEXT: ret + %head = insertelement undef, half %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %esplat = fpext %splat to + %r = call @llvm.copysign.nxv1f64( %vm, %esplat) + ret %r +} + +define @vfcopynsign_exttrunc_vv_nxv1f64_nxv1f16( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_exttrunc_vv_nxv1f64_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v25, v9 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v26, v25 +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v26 +; CHECK-NEXT: ret + %n = fneg %vs + %eneg = fpext %n to + %r = call @llvm.copysign.nxv1f64( %vm, %eneg) + ret %r +} + +define @vfcopynsign_exttrunc_vf_nxv1f64_nxv1f16( %vm, half %s) { +; CHECK-LABEL: vfcopynsign_exttrunc_vf_nxv1f64_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vfwcvt.f.f.v v26, v25 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v25, v26 +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v25 +; CHECK-NEXT: ret + %head = insertelement undef, half %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %eneg = fpext %n to + %r = call @llvm.copysign.nxv1f64( %vm, %eneg) + ret %r +} + +define @vfcopysign_exttrunc_vv_nxv1f64_nxv1f32( %vm, %vs) { +; CHECK-LABEL: vfcopysign_exttrunc_vv_nxv1f64_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v25, v9 +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v25 +; CHECK-NEXT: ret + %e = fpext %vs to + %r = call @llvm.copysign.nxv1f64( %vm, %e) + ret %r +} + +define @vfcopysign_exttrunc_vf_nxv1f64_nxv1f32( %vm, float %s) { +; CHECK-LABEL: vfcopysign_exttrunc_vf_nxv1f64_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vfwcvt.f.f.v v26, v25 +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v26 +; CHECK-NEXT: ret + %head = insertelement undef, float %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %esplat = fpext %splat to + %r = call @llvm.copysign.nxv1f64( %vm, %esplat) + ret %r +} + +define @vfcopynsign_exttrunc_vv_nxv1f64_nxv1f32( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_exttrunc_vv_nxv1f64_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v25, v9 +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v25 +; CHECK-NEXT: ret + %n = fneg %vs + %eneg = fpext %n to + %r = call @llvm.copysign.nxv1f64( %vm, %eneg) + ret %r +} + +define @vfcopynsign_exttrunc_vf_nxv1f64_nxv1f32( %vm, float %s) { +; CHECK-LABEL: vfcopynsign_exttrunc_vf_nxv1f64_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vfwcvt.f.f.v v26, v25 +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v26 +; CHECK-NEXT: ret + %head = insertelement undef, float %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %eneg = fpext %n to + %r = call @llvm.copysign.nxv1f64( %vm, %eneg) + ret %r +} + +declare @llvm.copysign.nxv2f64(, ) + +define @vfcopysign_vv_nxv2f64( %vm, %vs) { +; CHECK-LABEL: vfcopysign_vv_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v10 +; CHECK-NEXT: ret + %r = call @llvm.copysign.nxv2f64( %vm, %vs) + ret %r +} + +define @vfcopysign_vf_nxv2f64( %vm, double %s) { +; CHECK-LABEL: vfcopysign_vf_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, double %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %r = call @llvm.copysign.nxv2f64( %vm, %splat) + ret %r +} + +define @vfcopynsign_vv_nxv2f64( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_vv_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v10 +; CHECK-NEXT: ret + %n = fneg %vs + %r = call @llvm.copysign.nxv2f64( %vm, %n) + ret %r +} + +define @vfcopynsign_vf_nxv2f64( %vm, double %s) { +; CHECK-LABEL: vfcopynsign_vf_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, double %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %r = call @llvm.copysign.nxv2f64( %vm, %n) + ret %r +} + +declare @llvm.copysign.nxv4f64(, ) + +define @vfcopysign_vv_nxv4f64( %vm, %vs) { +; CHECK-LABEL: vfcopysign_vv_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v12 +; CHECK-NEXT: ret + %r = call @llvm.copysign.nxv4f64( %vm, %vs) + ret %r +} + +define @vfcopysign_vf_nxv4f64( %vm, double %s) { +; CHECK-LABEL: vfcopysign_vf_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, double %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %r = call @llvm.copysign.nxv4f64( %vm, %splat) + ret %r +} + +define @vfcopynsign_vv_nxv4f64( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_vv_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v12 +; CHECK-NEXT: ret + %n = fneg %vs + %r = call @llvm.copysign.nxv4f64( %vm, %n) + ret %r +} + +define @vfcopynsign_vf_nxv4f64( %vm, double %s) { +; CHECK-LABEL: vfcopynsign_vf_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, double %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %r = call @llvm.copysign.nxv4f64( %vm, %n) + ret %r +} + +declare @llvm.copysign.nxv8f64(, ) + +define @vfcopysign_vv_nxv8f64( %vm, %vs) { +; CHECK-LABEL: vfcopysign_vv_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v16 +; CHECK-NEXT: ret + %r = call @llvm.copysign.nxv8f64( %vm, %vs) + ret %r +} + +define @vfcopysign_vf_nxv8f64( %vm, double %s) { +; CHECK-LABEL: vfcopysign_vf_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, double %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %r = call @llvm.copysign.nxv8f64( %vm, %splat) + ret %r +} + +define @vfcopynsign_vv_nxv8f64( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_vv_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v16 +; CHECK-NEXT: ret + %n = fneg %vs + %r = call @llvm.copysign.nxv8f64( %vm, %n) + ret %r +} + +define @vfcopynsign_vf_nxv8f64( %vm, double %s) { +; CHECK-LABEL: vfcopynsign_vf_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret + %head = insertelement undef, double %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %r = call @llvm.copysign.nxv8f64( %vm, %n) + ret %r +} + +define @vfcopysign_exttrunc_vv_nxv8f64_nxv8f16( %vm, %vs) { +; CHECK-LABEL: vfcopysign_exttrunc_vv_nxv8f64_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v28, v16 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v16, v28 +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v16 +; CHECK-NEXT: ret + %e = fpext %vs to + %r = call @llvm.copysign.nxv8f64( %vm, %e) + ret %r +} + +define @vfcopysign_exttrunc_vf_nxv8f64_nxv8f16( %vm, half %s) { +; CHECK-LABEL: vfcopysign_exttrunc_vf_nxv8f64_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfmv.v.f v26, fa0 +; CHECK-NEXT: vfwcvt.f.f.v v28, v26 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v16, v28 +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, half %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %esplat = fpext %splat to + %r = call @llvm.copysign.nxv8f64( %vm, %esplat) + ret %r +} + +define @vfcopynsign_exttrunc_vv_nxv8f64_nxv8f16( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_exttrunc_vv_nxv8f64_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v28, v16 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v16, v28 +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v16 +; CHECK-NEXT: ret + %n = fneg %vs + %eneg = fpext %n to + %r = call @llvm.copysign.nxv8f64( %vm, %eneg) + ret %r +} + +define @vfcopynsign_exttrunc_vf_nxv8f64_nxv8f16( %vm, half %s) { +; CHECK-LABEL: vfcopynsign_exttrunc_vf_nxv8f64_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfmv.v.f v26, fa0 +; CHECK-NEXT: vfwcvt.f.f.v v28, v26 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v16, v28 +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, half %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %eneg = fpext %n to + %r = call @llvm.copysign.nxv8f64( %vm, %eneg) + ret %r +} + +define @vfcopysign_exttrunc_vv_nxv8f64_nxv8f32( %vm, %vs) { +; CHECK-LABEL: vfcopysign_exttrunc_vv_nxv8f64_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v24, v16 +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v24 +; CHECK-NEXT: ret + %e = fpext %vs to + %r = call @llvm.copysign.nxv8f64( %vm, %e) + ret %r +} + +define @vfcopysign_exttrunc_vf_nxv8f64_nxv8f32( %vm, float %s) { +; CHECK-LABEL: vfcopysign_exttrunc_vf_nxv8f64_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfmv.v.f v28, fa0 +; CHECK-NEXT: vfwcvt.f.f.v v16, v28 +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfsgnj.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, float %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %esplat = fpext %splat to + %r = call @llvm.copysign.nxv8f64( %vm, %esplat) + ret %r +} + +define @vfcopynsign_exttrunc_vv_nxv8f64_nxv8f32( %vm, %vs) { +; CHECK-LABEL: vfcopynsign_exttrunc_vv_nxv8f64_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v24, v16 +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v24 +; CHECK-NEXT: ret + %n = fneg %vs + %eneg = fpext %n to + %r = call @llvm.copysign.nxv8f64( %vm, %eneg) + ret %r +} + +define @vfcopynsign_exttrunc_vf_nxv8f64_nxv8f32( %vm, float %s) { +; CHECK-LABEL: vfcopynsign_exttrunc_vf_nxv8f64_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfmv.v.f v28, fa0 +; CHECK-NEXT: vfwcvt.f.f.v v16, v28 +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfsgnjn.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, float %s, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %n = fneg %splat + %eneg = fpext %n to + %r = call @llvm.copysign.nxv8f64( %vm, %eneg) + ret %r +} -- 2.7.4