[RISCV] Define the vfsqrt RVV intrinsics

author Evandro Menezes <evandro.menezes@sifive.com>

Wed, 23 Dec 2020 06:43:15 +0000 (00:43 -0600)

committer Evandro Menezes <evandro.menezes@sifive.com>

Thu, 7 Jan 2021 23:29:29 +0000 (17:29 -0600)
author Evandro Menezes <evandro.menezes@sifive.com>
Wed, 23 Dec 2020 06:43:15 +0000 (00:43 -0600)
committer Evandro Menezes <evandro.menezes@sifive.com>
Thu, 7 Jan 2021 23:29:29 +0000 (17:29 -0600)
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td

index fe0d6b00a3c2e5cca899ec7987d4f9ebe70a696d..a28f8eb5ab085e04fb93e97adf2a36b58e97f338 100644 (file)
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -189,6 +189,19 @@ let TargetPrefix = "riscv" in {
                       LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty,
                       LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
                      [NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic;
+  // For destination vector type is the same as source vector.
+  // Input: (vector_in, vl)
+  class RISCVUnaryAANoMask
+        : Intrinsic<[llvm_anyvector_ty],
+                    [LLVMMatchType<0>, llvm_anyint_ty],
+                    [IntrNoMem]>, RISCVVIntrinsic;
+  // For destination vector type is the same as first source vector (with mask).
+  // Input: (vector_in, mask, vl)
+  class RISCVUnaryAAMask
+        : Intrinsic<[llvm_anyvector_ty],
+                    [LLVMMatchType<0>, LLVMMatchType<0>,
+                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
+                    [IntrNoMem]>, RISCVVIntrinsic;
    // For destination vector type is the same as first and second source vector.
    // Input: (vector_in, vector_in, vl)
    class RISCVBinaryAAANoMask
@@ -210,7 +223,6 @@ let TargetPrefix = "riscv" in {
                      [IntrNoMem]>, RISCVVIntrinsic {
      let ExtendOperand = 2;
    }
-
    // For destination vector type is the same as first source vector (with mask).
    // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
    class RISCVBinaryAAXMask
@@ -326,7 +338,6 @@ let TargetPrefix = "riscv" in {
                      [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
      let ExtendOperand = 3;
    }
-
    class RISCVTernaryAAAXNoMask
          : Intrinsic<[llvm_anyvector_ty],
                      [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty,
@@ -470,7 +481,10 @@ let TargetPrefix = "riscv" in {
      def "int_riscv_" # NAME : RISCVIStore;
      def "int_riscv_" # NAME # "_mask" : RISCVIStoreMask;
    }
-
+  multiclass RISCVUnaryAA {
+    def "int_riscv_" # NAME : RISCVUnaryAANoMask;
+    def "int_riscv_" # NAME # "_mask" : RISCVUnaryAAMask;
+  }
    // AAX means the destination type(A) is the same as the first source
    // type(A). X means any type for the second source operand.
    multiclass RISCVBinaryAAX {
@@ -685,6 +699,8 @@ let TargetPrefix = "riscv" in {
    defm vfwmsac : RISCVTernaryWide;
    defm vfwnmsac : RISCVTernaryWide;
  
+  defm vfsqrt : RISCVUnaryAA;
+
    defm vfmin : RISCVBinaryAAX;
    defm vfmax : RISCVBinaryAAX;
  
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td

index 62d887524950cd7e0cd766a1a58f8c5d0ab0e75d..2557b49f0c1c94ec64a1069d26d86cb60547e1bb 100644 (file)
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -1097,6 +1097,15 @@ multiclass VPseudoUnaryV_F_NoDummyMask {
    }
  }
  
+multiclass VPseudoUnaryV_V {
+  foreach m = MxList.m in {
+    let VLMul = m.value in {
+      def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>;
+      def "_V_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>;
+    }
+  }
+}
+
  multiclass PseudoUnaryV_VF2 {
    defvar constraints = "@earlyclobber $rd";
    foreach m = MxList.m[1-6] in
@@ -1688,6 +1697,18 @@ multiclass VPatUnaryV_VF<string intrinsic, string instruction, string suffix,
     }
  }
  
+multiclass VPatUnaryV_V<string intrinsic, string instruction,
+                        list<VTypeInfo> vtilist> {
+  foreach vti = vtilist in {
+    def : VPatUnaryNoMask<intrinsic, instruction, "V",
+                          vti.Vector, vti.Vector,
+                          vti.SEW, vti.LMul, vti.RegClass>;
+    def : VPatUnaryMask<intrinsic, instruction, "V",
+                        vti.Vector, vti.Vector, vti.Mask,
+                        vti.SEW, vti.LMul, vti.RegClass, vti.RegClass>;
+  }
+}
+
  multiclass VPatNullaryV<string intrinsic, string instruction>
  {
    foreach vti = AllIntegerVectors in {
@@ -1712,7 +1733,6 @@ multiclass VPatNullaryM<string intrinsic, string inst> {
                          (NoX0 GPR:$vl), mti.SEW)>;
  }
  
-
  multiclass VPatBinary<string intrinsic,
                        string inst,
                        string kind,
@@ -2574,7 +2594,6 @@ defm PseudoVMERGE      : VPseudoBinaryV_VM_XM_IM;
  //===----------------------------------------------------------------------===//
  // 12.17. Vector Integer Move Instructions
  //===----------------------------------------------------------------------===//
-
  defm PseudoVMV_V       : VPseudoUnaryV_V_X_I_NoDummyMask;
  
  //===----------------------------------------------------------------------===//
@@ -2670,6 +2689,11 @@ defm PseudoVFWNMACC    : VPseudoTernaryW_VV_VX</*IsFloat*/true>;
  defm PseudoVFWMSAC     : VPseudoTernaryW_VV_VX</*IsFloat*/true>;
  defm PseudoVFWNMSAC    : VPseudoTernaryW_VV_VX</*IsFloat*/true>;
  
+//===----------------------------------------------------------------------===//
+// 14.8. Vector Floating-Point Square-Root Instruction
+//===----------------------------------------------------------------------===//
+defm PseudoVFSQRT      : VPseudoUnaryV_V;
+
  //===----------------------------------------------------------------------===//
  // 14.11. Vector Floating-Point Min/Max Instructions
  //===----------------------------------------------------------------------===//
@@ -3306,6 +3330,11 @@ defm "" : VPatTernaryW_VV_VX<"int_riscv_vfwnmacc", "PseudoVFWNMACC", AllWidenabl
  defm "" : VPatTernaryW_VV_VX<"int_riscv_vfwmsac", "PseudoVFWMSAC", AllWidenableFloatVectors>;
  defm "" : VPatTernaryW_VV_VX<"int_riscv_vfwnmsac", "PseudoVFWNMSAC", AllWidenableFloatVectors>;
  
+//===----------------------------------------------------------------------===//
+// 14.8. Vector Floating-Point Square-Root Instruction
+//===----------------------------------------------------------------------===//
+defm "" : VPatUnaryV_V<"int_riscv_vfsqrt", "PseudoVFSQRT", AllFloatVectors>;
+
  //===----------------------------------------------------------------------===//
  // 14.11. Vector Floating-Point Min/Max Instructions
  //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll

new file mode 100644 (file)

index 0000000..48c6dc9
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll
@@ -0,0 +1,512 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \
+; RUN:   --riscv-no-aliases < %s | FileCheck %s
+declare <vscale x 1 x half> @llvm.riscv.vfsqrt.nxv1f16(
+  <vscale x 1 x half>,
+  i32);
+
+define <vscale x 1 x half> @intrinsic_vfsqrt_v_nxv1f16_nxv1f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 1 x half> %0,
+  i32 %1) nounwind {
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfsqrt.nxv1f16(
+    <vscale x 1 x half> %0,
+    i32 %1)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vfsqrt.mask.nxv1f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  <vscale x 1 x i1>,
+  i32);
+
+define <vscale x 1 x half> @intrinsic_vfsqrt_mask_v_nxv1f16_nxv1f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 1 x half> %0,
+  <vscale x 1 x half> %1,
+  <vscale x 1 x i1> %2,
+  i32 %3) nounwind {
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfsqrt.mask.nxv1f16(
+    <vscale x 1 x half> %0,
+    <vscale x 1 x half> %1,
+    <vscale x 1 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vfsqrt.nxv2f16(
+  <vscale x 2 x half>,
+  i32);
+
+define <vscale x 2 x half> @intrinsic_vfsqrt_v_nxv2f16_nxv2f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 2 x half> %0,
+  i32 %1) nounwind {
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vfsqrt.nxv2f16(
+    <vscale x 2 x half> %0,
+    i32 %1)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vfsqrt.mask.nxv2f16(
+  <vscale x 2 x half>,
+  <vscale x 2 x half>,
+  <vscale x 2 x i1>,
+  i32);
+
+define <vscale x 2 x half> @intrinsic_vfsqrt_mask_v_nxv2f16_nxv2f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 2 x half> %0,
+  <vscale x 2 x half> %1,
+  <vscale x 2 x i1> %2,
+  i32 %3) nounwind {
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vfsqrt.mask.nxv2f16(
+    <vscale x 2 x half> %0,
+    <vscale x 2 x half> %1,
+    <vscale x 2 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vfsqrt.nxv4f16(
+  <vscale x 4 x half>,
+  i32);
+
+define <vscale x 4 x half> @intrinsic_vfsqrt_v_nxv4f16_nxv4f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 4 x half> %0,
+  i32 %1) nounwind {
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vfsqrt.nxv4f16(
+    <vscale x 4 x half> %0,
+    i32 %1)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vfsqrt.mask.nxv4f16(
+  <vscale x 4 x half>,
+  <vscale x 4 x half>,
+  <vscale x 4 x i1>,
+  i32);
+
+define <vscale x 4 x half> @intrinsic_vfsqrt_mask_v_nxv4f16_nxv4f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m1,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 4 x half> %0,
+  <vscale x 4 x half> %1,
+  <vscale x 4 x i1> %2,
+  i32 %3) nounwind {
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vfsqrt.mask.nxv4f16(
+    <vscale x 4 x half> %0,
+    <vscale x 4 x half> %1,
+    <vscale x 4 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vfsqrt.nxv8f16(
+  <vscale x 8 x half>,
+  i32);
+
+define <vscale x 8 x half> @intrinsic_vfsqrt_v_nxv8f16_nxv8f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 8 x half> %0,
+  i32 %1) nounwind {
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vfsqrt.nxv8f16(
+    <vscale x 8 x half> %0,
+    i32 %1)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vfsqrt.mask.nxv8f16(
+  <vscale x 8 x half>,
+  <vscale x 8 x half>,
+  <vscale x 8 x i1>,
+  i32);
+
+define <vscale x 8 x half> @intrinsic_vfsqrt_mask_v_nxv8f16_nxv8f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m2,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v18, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 8 x half> %0,
+  <vscale x 8 x half> %1,
+  <vscale x 8 x i1> %2,
+  i32 %3) nounwind {
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vfsqrt.mask.nxv8f16(
+    <vscale x 8 x half> %0,
+    <vscale x 8 x half> %1,
+    <vscale x 8 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vfsqrt.nxv16f16(
+  <vscale x 16 x half>,
+  i32);
+
+define <vscale x 16 x half> @intrinsic_vfsqrt_v_nxv16f16_nxv16f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 16 x half> %0,
+  i32 %1) nounwind {
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vfsqrt.nxv16f16(
+    <vscale x 16 x half> %0,
+    i32 %1)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vfsqrt.mask.nxv16f16(
+  <vscale x 16 x half>,
+  <vscale x 16 x half>,
+  <vscale x 16 x i1>,
+  i32);
+
+define <vscale x 16 x half> @intrinsic_vfsqrt_mask_v_nxv16f16_nxv16f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m4,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v20, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 16 x half> %0,
+  <vscale x 16 x half> %1,
+  <vscale x 16 x i1> %2,
+  i32 %3) nounwind {
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vfsqrt.mask.nxv16f16(
+    <vscale x 16 x half> %0,
+    <vscale x 16 x half> %1,
+    <vscale x 16 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vfsqrt.nxv32f16(
+  <vscale x 32 x half>,
+  i32);
+
+define <vscale x 32 x half> @intrinsic_vfsqrt_v_nxv32f16_nxv32f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 32 x half> %0,
+  i32 %1) nounwind {
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vfsqrt.nxv32f16(
+    <vscale x 32 x half> %0,
+    i32 %1)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vfsqrt.mask.nxv32f16(
+  <vscale x 32 x half>,
+  <vscale x 32 x half>,
+  <vscale x 32 x i1>,
+  i32);
+
+define <vscale x 32 x half> @intrinsic_vfsqrt_mask_v_nxv32f16_nxv32f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a2, zero, e16,m8,ta,mu
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetvli a0, a1, e16,m8,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v8, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 32 x half> %0,
+  <vscale x 32 x half> %1,
+  <vscale x 32 x i1> %2,
+  i32 %3) nounwind {
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vfsqrt.mask.nxv32f16(
+    <vscale x 32 x half> %0,
+    <vscale x 32 x half> %1,
+    <vscale x 32 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfsqrt.nxv1f32(
+  <vscale x 1 x float>,
+  i32);
+
+define <vscale x 1 x float> @intrinsic_vfsqrt_v_nxv1f32_nxv1f32(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 1 x float> %0,
+  i32 %1) nounwind {
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vfsqrt.nxv1f32(
+    <vscale x 1 x float> %0,
+    i32 %1)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfsqrt.mask.nxv1f32(
+  <vscale x 1 x float>,
+  <vscale x 1 x float>,
+  <vscale x 1 x i1>,
+  i32);
+
+define <vscale x 1 x float> @intrinsic_vfsqrt_mask_v_nxv1f32_nxv1f32(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 1 x float> %0,
+  <vscale x 1 x float> %1,
+  <vscale x 1 x i1> %2,
+  i32 %3) nounwind {
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vfsqrt.mask.nxv1f32(
+    <vscale x 1 x float> %0,
+    <vscale x 1 x float> %1,
+    <vscale x 1 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(
+  <vscale x 2 x float>,
+  i32);
+
+define <vscale x 2 x float> @intrinsic_vfsqrt_v_nxv2f32_nxv2f32(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 2 x float> %0,
+  i32 %1) nounwind {
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(
+    <vscale x 2 x float> %0,
+    i32 %1)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfsqrt.mask.nxv2f32(
+  <vscale x 2 x float>,
+  <vscale x 2 x float>,
+  <vscale x 2 x i1>,
+  i32);
+
+define <vscale x 2 x float> @intrinsic_vfsqrt_mask_v_nxv2f32_nxv2f32(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 2 x float> %0,
+  <vscale x 2 x float> %1,
+  <vscale x 2 x i1> %2,
+  i32 %3) nounwind {
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vfsqrt.mask.nxv2f32(
+    <vscale x 2 x float> %0,
+    <vscale x 2 x float> %1,
+    <vscale x 2 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(
+  <vscale x 4 x float>,
+  i32);
+
+define <vscale x 4 x float> @intrinsic_vfsqrt_v_nxv4f32_nxv4f32(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 4 x float> %0,
+  i32 %1) nounwind {
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(
+    <vscale x 4 x float> %0,
+    i32 %1)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfsqrt.mask.nxv4f32(
+  <vscale x 4 x float>,
+  <vscale x 4 x float>,
+  <vscale x 4 x i1>,
+  i32);
+
+define <vscale x 4 x float> @intrinsic_vfsqrt_mask_v_nxv4f32_nxv4f32(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v18, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 4 x float> %0,
+  <vscale x 4 x float> %1,
+  <vscale x 4 x i1> %2,
+  i32 %3) nounwind {
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vfsqrt.mask.nxv4f32(
+    <vscale x 4 x float> %0,
+    <vscale x 4 x float> %1,
+    <vscale x 4 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfsqrt.nxv8f32(
+  <vscale x 8 x float>,
+  i32);
+
+define <vscale x 8 x float> @intrinsic_vfsqrt_v_nxv8f32_nxv8f32(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 8 x float> %0,
+  i32 %1) nounwind {
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vfsqrt.nxv8f32(
+    <vscale x 8 x float> %0,
+    i32 %1)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfsqrt.mask.nxv8f32(
+  <vscale x 8 x float>,
+  <vscale x 8 x float>,
+  <vscale x 8 x i1>,
+  i32);
+
+define <vscale x 8 x float> @intrinsic_vfsqrt_mask_v_nxv8f32_nxv8f32(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v20, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 8 x float> %0,
+  <vscale x 8 x float> %1,
+  <vscale x 8 x i1> %2,
+  i32 %3) nounwind {
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vfsqrt.mask.nxv8f32(
+    <vscale x 8 x float> %0,
+    <vscale x 8 x float> %1,
+    <vscale x 8 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfsqrt.nxv16f32(
+  <vscale x 16 x float>,
+  i32);
+
+define <vscale x 16 x float> @intrinsic_vfsqrt_v_nxv16f32_nxv16f32(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 16 x float> %0,
+  i32 %1) nounwind {
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vfsqrt.nxv16f32(
+    <vscale x 16 x float> %0,
+    i32 %1)
+
+  ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfsqrt.mask.nxv16f32(
+  <vscale x 16 x float>,
+  <vscale x 16 x float>,
+  <vscale x 16 x i1>,
+  i32);
+
+define <vscale x 16 x float> @intrinsic_vfsqrt_mask_v_nxv16f32_nxv16f32(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a2, zero, e32,m8,ta,mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetvli a0, a1, e32,m8,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v8, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 16 x float> %0,
+  <vscale x 16 x float> %1,
+  <vscale x 16 x i1> %2,
+  i32 %3) nounwind {
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vfsqrt.mask.nxv16f32(
+    <vscale x 16 x float> %0,
+    <vscale x 16 x float> %1,
+    <vscale x 16 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 16 x float> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll

new file mode 100644 (file)

index 0000000..0870693
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll
@@ -0,0 +1,698 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \
+; RUN:   --riscv-no-aliases < %s | FileCheck %s
+declare <vscale x 1 x half> @llvm.riscv.vfsqrt.nxv1f16(
+  <vscale x 1 x half>,
+  i64);
+
+define <vscale x 1 x half> @intrinsic_vfsqrt_v_nxv1f16_nxv1f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 1 x half> %0,
+  i64 %1) nounwind {
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfsqrt.nxv1f16(
+    <vscale x 1 x half> %0,
+    i64 %1)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vfsqrt.mask.nxv1f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x half> @intrinsic_vfsqrt_mask_v_nxv1f16_nxv1f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 1 x half> %0,
+  <vscale x 1 x half> %1,
+  <vscale x 1 x i1> %2,
+  i64 %3) nounwind {
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfsqrt.mask.nxv1f16(
+    <vscale x 1 x half> %0,
+    <vscale x 1 x half> %1,
+    <vscale x 1 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vfsqrt.nxv2f16(
+  <vscale x 2 x half>,
+  i64);
+
+define <vscale x 2 x half> @intrinsic_vfsqrt_v_nxv2f16_nxv2f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 2 x half> %0,
+  i64 %1) nounwind {
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vfsqrt.nxv2f16(
+    <vscale x 2 x half> %0,
+    i64 %1)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vfsqrt.mask.nxv2f16(
+  <vscale x 2 x half>,
+  <vscale x 2 x half>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x half> @intrinsic_vfsqrt_mask_v_nxv2f16_nxv2f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 2 x half> %0,
+  <vscale x 2 x half> %1,
+  <vscale x 2 x i1> %2,
+  i64 %3) nounwind {
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vfsqrt.mask.nxv2f16(
+    <vscale x 2 x half> %0,
+    <vscale x 2 x half> %1,
+    <vscale x 2 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vfsqrt.nxv4f16(
+  <vscale x 4 x half>,
+  i64);
+
+define <vscale x 4 x half> @intrinsic_vfsqrt_v_nxv4f16_nxv4f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 4 x half> %0,
+  i64 %1) nounwind {
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vfsqrt.nxv4f16(
+    <vscale x 4 x half> %0,
+    i64 %1)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vfsqrt.mask.nxv4f16(
+  <vscale x 4 x half>,
+  <vscale x 4 x half>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x half> @intrinsic_vfsqrt_mask_v_nxv4f16_nxv4f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m1,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 4 x half> %0,
+  <vscale x 4 x half> %1,
+  <vscale x 4 x i1> %2,
+  i64 %3) nounwind {
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vfsqrt.mask.nxv4f16(
+    <vscale x 4 x half> %0,
+    <vscale x 4 x half> %1,
+    <vscale x 4 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vfsqrt.nxv8f16(
+  <vscale x 8 x half>,
+  i64);
+
+define <vscale x 8 x half> @intrinsic_vfsqrt_v_nxv8f16_nxv8f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 8 x half> %0,
+  i64 %1) nounwind {
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vfsqrt.nxv8f16(
+    <vscale x 8 x half> %0,
+    i64 %1)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vfsqrt.mask.nxv8f16(
+  <vscale x 8 x half>,
+  <vscale x 8 x half>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x half> @intrinsic_vfsqrt_mask_v_nxv8f16_nxv8f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m2,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v18, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 8 x half> %0,
+  <vscale x 8 x half> %1,
+  <vscale x 8 x i1> %2,
+  i64 %3) nounwind {
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vfsqrt.mask.nxv8f16(
+    <vscale x 8 x half> %0,
+    <vscale x 8 x half> %1,
+    <vscale x 8 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vfsqrt.nxv16f16(
+  <vscale x 16 x half>,
+  i64);
+
+define <vscale x 16 x half> @intrinsic_vfsqrt_v_nxv16f16_nxv16f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 16 x half> %0,
+  i64 %1) nounwind {
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vfsqrt.nxv16f16(
+    <vscale x 16 x half> %0,
+    i64 %1)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vfsqrt.mask.nxv16f16(
+  <vscale x 16 x half>,
+  <vscale x 16 x half>,
+  <vscale x 16 x i1>,
+  i64);
+
+define <vscale x 16 x half> @intrinsic_vfsqrt_mask_v_nxv16f16_nxv16f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m4,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v20, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 16 x half> %0,
+  <vscale x 16 x half> %1,
+  <vscale x 16 x i1> %2,
+  i64 %3) nounwind {
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vfsqrt.mask.nxv16f16(
+    <vscale x 16 x half> %0,
+    <vscale x 16 x half> %1,
+    <vscale x 16 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vfsqrt.nxv32f16(
+  <vscale x 32 x half>,
+  i64);
+
+define <vscale x 32 x half> @intrinsic_vfsqrt_v_nxv32f16_nxv32f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 32 x half> %0,
+  i64 %1) nounwind {
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vfsqrt.nxv32f16(
+    <vscale x 32 x half> %0,
+    i64 %1)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vfsqrt.mask.nxv32f16(
+  <vscale x 32 x half>,
+  <vscale x 32 x half>,
+  <vscale x 32 x i1>,
+  i64);
+
+define <vscale x 32 x half> @intrinsic_vfsqrt_mask_v_nxv32f16_nxv32f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a2, zero, e16,m8,ta,mu
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetvli a0, a1, e16,m8,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v8, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 32 x half> %0,
+  <vscale x 32 x half> %1,
+  <vscale x 32 x i1> %2,
+  i64 %3) nounwind {
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vfsqrt.mask.nxv32f16(
+    <vscale x 32 x half> %0,
+    <vscale x 32 x half> %1,
+    <vscale x 32 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfsqrt.nxv1f32(
+  <vscale x 1 x float>,
+  i64);
+
+define <vscale x 1 x float> @intrinsic_vfsqrt_v_nxv1f32_nxv1f32(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 1 x float> %0,
+  i64 %1) nounwind {
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vfsqrt.nxv1f32(
+    <vscale x 1 x float> %0,
+    i64 %1)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfsqrt.mask.nxv1f32(
+  <vscale x 1 x float>,
+  <vscale x 1 x float>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x float> @intrinsic_vfsqrt_mask_v_nxv1f32_nxv1f32(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 1 x float> %0,
+  <vscale x 1 x float> %1,
+  <vscale x 1 x i1> %2,
+  i64 %3) nounwind {
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vfsqrt.mask.nxv1f32(
+    <vscale x 1 x float> %0,
+    <vscale x 1 x float> %1,
+    <vscale x 1 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(
+  <vscale x 2 x float>,
+  i64);
+
+define <vscale x 2 x float> @intrinsic_vfsqrt_v_nxv2f32_nxv2f32(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 2 x float> %0,
+  i64 %1) nounwind {
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(
+    <vscale x 2 x float> %0,
+    i64 %1)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfsqrt.mask.nxv2f32(
+  <vscale x 2 x float>,
+  <vscale x 2 x float>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x float> @intrinsic_vfsqrt_mask_v_nxv2f32_nxv2f32(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 2 x float> %0,
+  <vscale x 2 x float> %1,
+  <vscale x 2 x i1> %2,
+  i64 %3) nounwind {
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vfsqrt.mask.nxv2f32(
+    <vscale x 2 x float> %0,
+    <vscale x 2 x float> %1,
+    <vscale x 2 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(
+  <vscale x 4 x float>,
+  i64);
+
+define <vscale x 4 x float> @intrinsic_vfsqrt_v_nxv4f32_nxv4f32(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 4 x float> %0,
+  i64 %1) nounwind {
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(
+    <vscale x 4 x float> %0,
+    i64 %1)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfsqrt.mask.nxv4f32(
+  <vscale x 4 x float>,
+  <vscale x 4 x float>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x float> @intrinsic_vfsqrt_mask_v_nxv4f32_nxv4f32(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v18, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 4 x float> %0,
+  <vscale x 4 x float> %1,
+  <vscale x 4 x i1> %2,
+  i64 %3) nounwind {
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vfsqrt.mask.nxv4f32(
+    <vscale x 4 x float> %0,
+    <vscale x 4 x float> %1,
+    <vscale x 4 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfsqrt.nxv8f32(
+  <vscale x 8 x float>,
+  i64);
+
+define <vscale x 8 x float> @intrinsic_vfsqrt_v_nxv8f32_nxv8f32(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 8 x float> %0,
+  i64 %1) nounwind {
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vfsqrt.nxv8f32(
+    <vscale x 8 x float> %0,
+    i64 %1)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfsqrt.mask.nxv8f32(
+  <vscale x 8 x float>,
+  <vscale x 8 x float>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x float> @intrinsic_vfsqrt_mask_v_nxv8f32_nxv8f32(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v20, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 8 x float> %0,
+  <vscale x 8 x float> %1,
+  <vscale x 8 x i1> %2,
+  i64 %3) nounwind {
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vfsqrt.mask.nxv8f32(
+    <vscale x 8 x float> %0,
+    <vscale x 8 x float> %1,
+    <vscale x 8 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfsqrt.nxv16f32(
+  <vscale x 16 x float>,
+  i64);
+
+define <vscale x 16 x float> @intrinsic_vfsqrt_v_nxv16f32_nxv16f32(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 16 x float> %0,
+  i64 %1) nounwind {
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vfsqrt.nxv16f32(
+    <vscale x 16 x float> %0,
+    i64 %1)
+
+  ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfsqrt.mask.nxv16f32(
+  <vscale x 16 x float>,
+  <vscale x 16 x float>,
+  <vscale x 16 x i1>,
+  i64);
+
+define <vscale x 16 x float> @intrinsic_vfsqrt_mask_v_nxv16f32_nxv16f32(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a2, zero, e32,m8,ta,mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetvli a0, a1, e32,m8,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v8, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 16 x float> %0,
+  <vscale x 16 x float> %1,
+  <vscale x 16 x i1> %2,
+  i64 %3) nounwind {
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vfsqrt.mask.nxv16f32(
+    <vscale x 16 x float> %0,
+    <vscale x 16 x float> %1,
+    <vscale x 16 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vfsqrt.nxv1f64(
+  <vscale x 1 x double>,
+  i64);
+
+define <vscale x 1 x double> @intrinsic_vfsqrt_v_nxv1f64_nxv1f64(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f64_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 1 x double> %0,
+  i64 %1) nounwind {
+entry:
+  %a = call <vscale x 1 x double> @llvm.riscv.vfsqrt.nxv1f64(
+    <vscale x 1 x double> %0,
+    i64 %1)
+
+  ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vfsqrt.mask.nxv1f64(
+  <vscale x 1 x double>,
+  <vscale x 1 x double>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x double> @intrinsic_vfsqrt_mask_v_nxv1f64_nxv1f64(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f64_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m1,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 1 x double> %0,
+  <vscale x 1 x double> %1,
+  <vscale x 1 x i1> %2,
+  i64 %3) nounwind {
+entry:
+  %a = call <vscale x 1 x double> @llvm.riscv.vfsqrt.mask.nxv1f64(
+    <vscale x 1 x double> %0,
+    <vscale x 1 x double> %1,
+    <vscale x 1 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vfsqrt.nxv2f64(
+  <vscale x 2 x double>,
+  i64);
+
+define <vscale x 2 x double> @intrinsic_vfsqrt_v_nxv2f64_nxv2f64(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f64_nxv2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 2 x double> %0,
+  i64 %1) nounwind {
+entry:
+  %a = call <vscale x 2 x double> @llvm.riscv.vfsqrt.nxv2f64(
+    <vscale x 2 x double> %0,
+    i64 %1)
+
+  ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vfsqrt.mask.nxv2f64(
+  <vscale x 2 x double>,
+  <vscale x 2 x double>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x double> @intrinsic_vfsqrt_mask_v_nxv2f64_nxv2f64(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f64_nxv2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m2,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v18, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 2 x double> %0,
+  <vscale x 2 x double> %1,
+  <vscale x 2 x i1> %2,
+  i64 %3) nounwind {
+entry:
+  %a = call <vscale x 2 x double> @llvm.riscv.vfsqrt.mask.nxv2f64(
+    <vscale x 2 x double> %0,
+    <vscale x 2 x double> %1,
+    <vscale x 2 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vfsqrt.nxv4f64(
+  <vscale x 4 x double>,
+  i64);
+
+define <vscale x 4 x double> @intrinsic_vfsqrt_v_nxv4f64_nxv4f64(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f64_nxv4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 4 x double> %0,
+  i64 %1) nounwind {
+entry:
+  %a = call <vscale x 4 x double> @llvm.riscv.vfsqrt.nxv4f64(
+    <vscale x 4 x double> %0,
+    i64 %1)
+
+  ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vfsqrt.mask.nxv4f64(
+  <vscale x 4 x double>,
+  <vscale x 4 x double>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x double> @intrinsic_vfsqrt_mask_v_nxv4f64_nxv4f64(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f64_nxv4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m4,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v20, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 4 x double> %0,
+  <vscale x 4 x double> %1,
+  <vscale x 4 x i1> %2,
+  i64 %3) nounwind {
+entry:
+  %a = call <vscale x 4 x double> @llvm.riscv.vfsqrt.mask.nxv4f64(
+    <vscale x 4 x double> %0,
+    <vscale x 4 x double> %1,
+    <vscale x 4 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vfsqrt.nxv8f64(
+  <vscale x 8 x double>,
+  i64);
+
+define <vscale x 8 x double> @intrinsic_vfsqrt_v_nxv8f64_nxv8f64(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 8 x double> %0,
+  i64 %1) nounwind {
+entry:
+  %a = call <vscale x 8 x double> @llvm.riscv.vfsqrt.nxv8f64(
+    <vscale x 8 x double> %0,
+    i64 %1)
+
+  ret <vscale x 8 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vfsqrt.mask.nxv8f64(
+  <vscale x 8 x double>,
+  <vscale x 8 x double>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x double> @intrinsic_vfsqrt_mask_v_nxv8f64_nxv8f64(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv8f64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a2, zero, e64,m8,ta,mu
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vsetvli a0, a1, e64,m8,tu,mu
+; CHECK-NEXT:    vfsqrt.v v16, v8, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+  <vscale x 8 x double> %0,
+  <vscale x 8 x double> %1,
+  <vscale x 8 x i1> %2,
+  i64 %3) nounwind {
+entry:
+  %a = call <vscale x 8 x double> @llvm.riscv.vfsqrt.mask.nxv8f64(
+    <vscale x 8 x double> %0,
+    <vscale x 8 x double> %1,
+    <vscale x 8 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 8 x double> %a
+}
author	Evandro Menezes <evandro.menezes@sifive.com>
	Wed, 23 Dec 2020 06:43:15 +0000 (00:43 -0600)
committer	Evandro Menezes <evandro.menezes@sifive.com>
	Thu, 7 Jan 2021 23:29:29 +0000 (17:29 -0600)
llvm/include/llvm/IR/IntrinsicsRISCV.td		patch \| blob \| history
llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td		patch \| blob \| history
llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll	[new file with mode: 0644]	patch \| blob