[AArch64] Add missing intrinsics for vrnd
authorJingu Kang <jingu.kang@arm.com>
Thu, 4 Mar 2021 14:40:01 +0000 (14:40 +0000)
committerJingu Kang <jingu.kang@arm.com>
Fri, 5 Mar 2021 11:26:12 +0000 (11:26 +0000)
clang/include/clang/Basic/arm_neon.td
clang/lib/Basic/Targets/AArch64.cpp
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c [new file with mode: 0644]
clang/test/Preprocessor/aarch64-target-features.c
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll [new file with mode: 0644]

index 5360729..6e3ed6e 100644 (file)
@@ -1210,6 +1210,13 @@ def FRINTZ_S64 : SInst<"vrnd", "..", "dQd">;
 def FRINTI_S64 : SInst<"vrndi", "..", "dQd">;
 }
 
+let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_FRINT)" in {
+def FRINT32X_S32 : SInst<"vrnd32x", "..", "fQf">;
+def FRINT32Z_S32 : SInst<"vrnd32z", "..", "fQf">;
+def FRINT64X_S32 : SInst<"vrnd64x", "..", "fQf">;
+def FRINT64Z_S32 : SInst<"vrnd64z", "..", "fQf">;
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // MaxNum/MinNum Floating Point
 
index f171346..5b7981e 100644 (file)
@@ -182,6 +182,7 @@ void AArch64TargetInfo::getTargetDefinesARMV84A(const LangOptions &Opts,
 
 void AArch64TargetInfo::getTargetDefinesARMV85A(const LangOptions &Opts,
                                                 MacroBuilder &Builder) const {
+  Builder.defineMacro("__ARM_FEATURE_FRINT", "1");
   // Also include the Armv8.4 defines
   getTargetDefinesARMV84A(Opts, Builder);
 }
index 3571716..eb5c430 100644 (file)
@@ -5823,6 +5823,14 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
+  NEONMAP1(vrnd32x_v, aarch64_neon_frint32x, Add1ArgType),
+  NEONMAP1(vrnd32xq_v, aarch64_neon_frint32x, Add1ArgType),
+  NEONMAP1(vrnd32z_v, aarch64_neon_frint32z, Add1ArgType),
+  NEONMAP1(vrnd32zq_v, aarch64_neon_frint32z, Add1ArgType),
+  NEONMAP1(vrnd64x_v, aarch64_neon_frint64x, Add1ArgType),
+  NEONMAP1(vrnd64xq_v, aarch64_neon_frint64x, Add1ArgType),
+  NEONMAP1(vrnd64z_v, aarch64_neon_frint64z, Add1ArgType),
+  NEONMAP1(vrnd64zq_v, aarch64_neon_frint64z, Add1ArgType),
   NEONMAP0(vrndi_v),
   NEONMAP0(vrndiq_v),
   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
@@ -10539,6 +10547,30 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
               : Intrinsic::trunc;
     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
   }
+  case NEON::BI__builtin_neon_vrnd32x_v:
+  case NEON::BI__builtin_neon_vrnd32xq_v: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Int = Intrinsic::aarch64_neon_frint32x;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
+  }
+  case NEON::BI__builtin_neon_vrnd32z_v:
+  case NEON::BI__builtin_neon_vrnd32zq_v: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Int = Intrinsic::aarch64_neon_frint32z;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
+  }
+  case NEON::BI__builtin_neon_vrnd64x_v:
+  case NEON::BI__builtin_neon_vrnd64xq_v: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Int = Intrinsic::aarch64_neon_frint64x;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
+  }
+  case NEON::BI__builtin_neon_vrnd64z_v:
+  case NEON::BI__builtin_neon_vrnd64zq_v: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Int = Intrinsic::aarch64_neon_frint64z;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
+  }
   case NEON::BI__builtin_neon_vrnd_v:
   case NEON::BI__builtin_neon_vrndq_v: {
     Int = Builder.getIsFPConstrained()
diff --git a/clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c b/clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c
new file mode 100644 (file)
index 0000000..a0694ee
--- /dev/null
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +v8.5a\
+// RUN: -flax-vector-conversions=none -S -disable-O0-optnone -emit-llvm -o - %s \
+// RUN: | opt -S -mem2reg \
+// RUN: | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+
+// CHECK-LABEL: test_vrnd32x_f32
+// CHECK:  [[RND:%.*]] =  call <2 x float> @llvm.aarch64.neon.frint32x.v2f32(<2 x float> %a)
+// CHECK:  ret <2 x float> [[RND]]
+float32x2_t test_vrnd32x_f32(float32x2_t a) {
+  return vrnd32x_f32(a);
+}
+
+// CHECK-LABEL: test_vrnd32xq_f32
+// CHECK:  [[RND:%.*]] =  call <4 x float> @llvm.aarch64.neon.frint32x.v4f32(<4 x float> %a)
+// CHECK:  ret <4 x float> [[RND]]
+float32x4_t test_vrnd32xq_f32(float32x4_t a) {
+  return vrnd32xq_f32(a);
+}
+
+// CHECK-LABEL: test_vrnd32z_f32
+// CHECK:  [[RND:%.*]] =  call <2 x float> @llvm.aarch64.neon.frint32z.v2f32(<2 x float> %a)
+// CHECK:  ret <2 x float> [[RND]]
+float32x2_t test_vrnd32z_f32(float32x2_t a) {
+  return vrnd32z_f32(a);
+}
+
+// CHECK-LABEL: test_vrnd32zq_f32
+// CHECK:  [[RND:%.*]] =  call <4 x float> @llvm.aarch64.neon.frint32z.v4f32(<4 x float> %a)
+// CHECK:  ret <4 x float> [[RND]]
+float32x4_t test_vrnd32zq_f32(float32x4_t a) {
+  return vrnd32zq_f32(a);
+}
+
+// CHECK-LABEL: test_vrnd64x_f32
+// CHECK:  [[RND:%.*]] =  call <2 x float> @llvm.aarch64.neon.frint64x.v2f32(<2 x float> %a)
+// CHECK:  ret <2 x float> [[RND]]
+float32x2_t test_vrnd64x_f32(float32x2_t a) {
+  return vrnd64x_f32(a);
+}
+
+// CHECK-LABEL: test_vrnd64xq_f32
+// CHECK:  [[RND:%.*]] =  call <4 x float> @llvm.aarch64.neon.frint64x.v4f32(<4 x float> %a)
+// CHECK:  ret <4 x float> [[RND]]
+float32x4_t test_vrnd64xq_f32(float32x4_t a) {
+  return vrnd64xq_f32(a);
+}
+
+// CHECK-LABEL: test_vrnd64z_f32
+// CHECK:  [[RND:%.*]] =  call <2 x float> @llvm.aarch64.neon.frint64z.v2f32(<2 x float> %a)
+// CHECK:  ret <2 x float> [[RND]]
+float32x2_t test_vrnd64z_f32(float32x2_t a) {
+  return vrnd64z_f32(a);
+}
+
+// CHECK-LABEL: test_vrnd64zq_f32
+// CHECK:  [[RND:%.*]] =  call <4 x float> @llvm.aarch64.neon.frint64z.v4f32(<4 x float> %a)
+// CHECK:  ret <4 x float> [[RND]]
+float32x4_t test_vrnd64zq_f32(float32x4_t a) {
+  return vrnd64zq_f32(a);
+}
index ceda05d..430508c 100644 (file)
 // RUN: %clang -target arm64-none-linux-gnu -march=armv8-a+crypto -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRYPTO %s
 // CHECK-CRYPTO: __ARM_FEATURE_CRYPTO 1
 
+// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.5-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-8_5 %s
+// CHECK-8_5: __ARM_FEATURE_FRINT 1
+
+// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.4-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-8_4 %s
+// CHECK-8_4-NOT: __ARM_FEATURE_FRINT 1
+
 // RUN: %clang -target aarch64-none-linux-gnu -mcrc -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRC32 %s
 // RUN: %clang -target arm64-none-linux-gnu -mcrc -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRC32 %s
 // RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+crc -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRC32 %s
index 4d9a068..0fb0214 100644 (file)
@@ -462,6 +462,12 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
   // intrinsic.
   def int_aarch64_neon_frintn : AdvSIMD_1FloatArg_Intrinsic;
 
+  // v8.5-A Vector FP Rounding
+  def int_aarch64_neon_frint32x : AdvSIMD_1FloatArg_Intrinsic;
+  def int_aarch64_neon_frint32z : AdvSIMD_1FloatArg_Intrinsic;
+  def int_aarch64_neon_frint64x : AdvSIMD_1FloatArg_Intrinsic;
+  def int_aarch64_neon_frint64z : AdvSIMD_1FloatArg_Intrinsic;
+
   // Scalar FP->Int conversions
 
   // Vector FP Inexact Narrowing
@@ -475,7 +481,7 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
   def int_aarch64_neon_udot : AdvSIMD_Dot_Intrinsic;
   def int_aarch64_neon_sdot : AdvSIMD_Dot_Intrinsic;
 
-// v8.6-A Matrix Multiply Intrinsics
+  // v8.6-A Matrix Multiply Intrinsics
   def int_aarch64_neon_ummla : AdvSIMD_MatMul_Intrinsic;
   def int_aarch64_neon_smmla : AdvSIMD_MatMul_Intrinsic;
   def int_aarch64_neon_usmmla : AdvSIMD_MatMul_Intrinsic;
index 05d3e88..e9f2940 100644 (file)
@@ -4091,10 +4091,10 @@ defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>;
 defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>;
 
 let Predicates = [HasFRInt3264] in {
-  defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z">;
-  defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z">;
-  defm FRINT32X : FRIntNNTVector<1, 0, "frint32x">;
-  defm FRINT64X : FRIntNNTVector<1, 1, "frint64x">;
+  defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>;
+  defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>;
+  defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>;
+  defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>;
 } // HasFRInt3264
 
 defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
diff --git a/llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll b/llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll
new file mode 100644 (file)
index 0000000..3a4dd73
--- /dev/null
@@ -0,0 +1,83 @@
+; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.5a  | FileCheck %s
+
+declare <2 x float> @llvm.aarch64.neon.frint32x.v2f32(<2 x float>)
+declare <4 x float> @llvm.aarch64.neon.frint32x.v4f32(<4 x float>)
+declare <2 x float> @llvm.aarch64.neon.frint32z.v2f32(<2 x float>)
+declare <4 x float> @llvm.aarch64.neon.frint32z.v4f32(<4 x float>)
+
+define dso_local <2 x float> @t_vrnd32x_f32(<2 x float> %a) {
+; CHECK-LABEL: t_vrnd32x_f32:
+; CHECK:         frint32x v0.2s, v0.2s
+; CHECK-NEXT:    ret
+entry:
+  %val = tail call <2 x float> @llvm.aarch64.neon.frint32x.v2f32(<2 x float> %a)
+  ret <2 x float> %val
+}
+
+define dso_local <4 x float> @t_vrnd32xq_f32(<4 x float> %a) {
+; CHECK-LABEL: t_vrnd32xq_f32:
+; CHECK:         frint32x v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %val = tail call <4 x float> @llvm.aarch64.neon.frint32x.v4f32(<4 x float> %a)
+  ret <4 x float> %val
+}
+
+define dso_local <2 x float> @t_vrnd32z_f32(<2 x float> %a) {
+; CHECK-LABEL: t_vrnd32z_f32:
+; CHECK:         frint32z v0.2s, v0.2s
+; CHECK-NEXT:    ret
+entry:
+  %val = tail call <2 x float> @llvm.aarch64.neon.frint32z.v2f32(<2 x float> %a)
+  ret <2 x float> %val
+}
+
+define dso_local <4 x float> @t_vrnd32zq_f32(<4 x float> %a) {
+; CHECK-LABEL: t_vrnd32zq_f32:
+; CHECK:         frint32z v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %val = tail call <4 x float> @llvm.aarch64.neon.frint32z.v4f32(<4 x float> %a)
+  ret <4 x float> %val
+}
+
+declare <2 x float> @llvm.aarch64.neon.frint64x.v2f32(<2 x float>)
+declare <4 x float> @llvm.aarch64.neon.frint64x.v4f32(<4 x float>)
+declare <2 x float> @llvm.aarch64.neon.frint64z.v2f32(<2 x float>)
+declare <4 x float> @llvm.aarch64.neon.frint64z.v4f32(<4 x float>)
+
+define dso_local <2 x float> @t_vrnd64x_f32(<2 x float> %a) {
+; CHECK-LABEL: t_vrnd64x_f32:
+; CHECK:         frint64x v0.2s, v0.2s
+; CHECK-NEXT:    ret
+entry:
+  %val = tail call <2 x float> @llvm.aarch64.neon.frint64x.v2f32(<2 x float> %a)
+  ret <2 x float> %val
+}
+
+define dso_local <4 x float> @t_vrnd64xq_f32(<4 x float> %a) {
+; CHECK-LABEL: t_vrnd64xq_f32:
+; CHECK:         frint64x v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %val = tail call <4 x float> @llvm.aarch64.neon.frint64x.v4f32(<4 x float> %a)
+  ret <4 x float> %val
+}
+
+define dso_local <2 x float> @t_vrnd64z_f32(<2 x float> %a) {
+; CHECK-LABEL: t_vrnd64z_f32:
+; CHECK:         frint64z v0.2s, v0.2s
+; CHECK-NEXT:    ret
+entry:
+  %val = tail call <2 x float> @llvm.aarch64.neon.frint64z.v2f32(<2 x float> %a)
+  ret <2 x float> %val
+}
+
+define dso_local <4 x float> @t_vrnd64zq_f32(<4 x float> %a) {
+; CHECK-LABEL: t_vrnd64zq_f32:
+; CHECK:         frint64z v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %val = tail call <4 x float> @llvm.aarch64.neon.frint64z.v4f32(<4 x float> %a)
+  ret <4 x float> %val
+}