aarch64: Add Armv8.6 SVE bfloat16 support

author Richard Sandiford <richard.sandiford@arm.com>

Thu, 30 Jan 2020 15:46:28 +0000 (15:46 +0000)

committer Richard Sandiford <richard.sandiford@arm.com>

Fri, 31 Jan 2020 17:40:40 +0000 (17:40 +0000)
author Richard Sandiford <richard.sandiford@arm.com>
Thu, 30 Jan 2020 15:46:28 +0000 (15:46 +0000)
committer Richard Sandiford <richard.sandiford@arm.com>
Fri, 31 Jan 2020 17:40:40 +0000 (17:40 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index d10ae92..234e328 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,51 @@
  2020-01-31  Richard Sandiford  <richard.sandiford@arm.com>
  
+       * config/aarch64/aarch64.h (TARGET_SVE_BF16): New macro.
+       * config/aarch64/aarch64-sve-builtins-sve2.h (svcvtnt): Move to
+       aarch64-sve-builtins-base.h.
+       * config/aarch64/aarch64-sve-builtins-sve2.cc (svcvtnt): Move to
+       aarch64-sve-builtins-base.cc.
+       * config/aarch64/aarch64-sve-builtins-base.h (svbfdot, svbfdot_lane)
+       (svbfmlalb, svbfmlalb_lane, svbfmlalt, svbfmlalt_lane, svbfmmla)
+       (svcvtnt): Declare.
+       * config/aarch64/aarch64-sve-builtins-base.cc (svbfdot, svbfdot_lane)
+       (svbfmlalb, svbfmlalb_lane, svbfmlalt, svbfmlalt_lane, svbfmmla)
+       (svcvtnt): New functions.
+       * config/aarch64/aarch64-sve-builtins-base.def (svbfdot, svbfdot_lane)
+       (svbfmlalb, svbfmlalb_lane, svbfmlalt, svbfmlalt_lane, svbfmmla)
+       (svcvtnt): New functions.
+       (svcvt): Add a form that converts f32 to bf16.
+       * config/aarch64/aarch64-sve-builtins-shapes.h (ternary_bfloat)
+       (ternary_bfloat_lane, ternary_bfloat_lanex2, ternary_bfloat_opt_n):
+       Declare.
+       * config/aarch64/aarch64-sve-builtins-shapes.cc (parse_element_type):
+       Treat B as bfloat16_t.
+       (ternary_bfloat_lane_base): New class.
+       (ternary_bfloat_def): Likewise.
+       (ternary_bfloat): New shape.
+       (ternary_bfloat_lane_def): New class.
+       (ternary_bfloat_lane): New shape.
+       (ternary_bfloat_lanex2_def): New class.
+       (ternary_bfloat_lanex2): New shape.
+       (ternary_bfloat_opt_n_def): New class.
+       (ternary_bfloat_opt_n): New shape.
+       * config/aarch64/aarch64-sve-builtins.cc (TYPES_cvt_bfloat): New macro.
+       * config/aarch64/aarch64-sve.md (@aarch64_sve_<sve_fp_op>vnx4sf)
+       (@aarch64_sve_<sve_fp_op>_lanevnx4sf): New patterns.
+       (@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>)
+       (@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>): Likewise.
+       (*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>): Likewise.
+       (@aarch64_sve_cvtnt<VNx8BF_ONLY:mode>): Likewise.
+       * config/aarch64/aarch64-sve2.md (@aarch64_sve2_cvtnt<mode>): Key
+       the pattern off the narrow mode instead of the wider one.
+       * config/aarch64/iterators.md (VNx8BF_ONLY): New mode iterator.
+       (UNSPEC_BFMLALB, UNSPEC_BFMLALT, UNSPEC_BFMMLA): New unspecs.
+       (sve_fp_op): Handle them.
+       (SVE_BFLOAT_TERNARY_LONG): New int itertor.
+       (SVE_BFLOAT_TERNARY_LONG_LANE): Likewise.
+
+2020-01-31  Richard Sandiford  <richard.sandiford@arm.com>
+
         * config/aarch64/arm_sve.h: Include arm_bf16.h.
         * config/aarch64/aarch64-modes.def (BF): Move definition before
         VECTOR_MODES.  Remove separate VECTOR_MODES for V4BF and V8BF.
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc

index 9ae143c..9b63ea7 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -2544,6 +2544,16 @@ FUNCTION (svandv, reduction, (UNSPEC_ANDV))
  FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT))
  FUNCTION (svasr_wide, shift_wide, (ASHIFTRT, UNSPEC_ASHIFTRT_WIDE))
  FUNCTION (svasrd, unspec_based_function, (UNSPEC_ASRD, -1, -1))
+FUNCTION (svbfdot, fixed_insn_function, (CODE_FOR_aarch64_sve_bfdotvnx4sf))
+FUNCTION (svbfdot_lane, fixed_insn_function,
+         (CODE_FOR_aarch64_sve_bfdot_lanevnx4sf))
+FUNCTION (svbfmlalb, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlalbvnx4sf))
+FUNCTION (svbfmlalb_lane, fixed_insn_function,
+         (CODE_FOR_aarch64_sve_bfmlalb_lanevnx4sf))
+FUNCTION (svbfmlalt, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlaltvnx4sf))
+FUNCTION (svbfmlalt_lane, fixed_insn_function,
+         (CODE_FOR_aarch64_sve_bfmlalt_lanevnx4sf))
+FUNCTION (svbfmmla, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmmlavnx4sf))
  FUNCTION (svbic, svbic_impl,)
  FUNCTION (svbrka, svbrk_unary_impl, (UNSPEC_BRKA))
  FUNCTION (svbrkb, svbrk_unary_impl, (UNSPEC_BRKB))
@@ -2592,6 +2602,7 @@ FUNCTION (svcreate2, svcreate_impl, (2))
  FUNCTION (svcreate3, svcreate_impl, (3))
  FUNCTION (svcreate4, svcreate_impl, (4))
  FUNCTION (svcvt, svcvt_impl,)
+FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),)
  FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV))
  FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
  FUNCTION (svdot, svdot_impl,)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def

index 332555b..27ab05d 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def
@@ -318,6 +318,18 @@ DEF_SVE_FUNCTION (svzip2, binary, all_data, none)
  DEF_SVE_FUNCTION (svzip2, binary_pred, all_pred, none)
  #undef REQUIRED_EXTENSIONS
  
+#define REQUIRED_EXTENSIONS AARCH64_FL_BF16
+DEF_SVE_FUNCTION (svbfdot, ternary_bfloat_opt_n, s_float, none)
+DEF_SVE_FUNCTION (svbfdot_lane, ternary_bfloat_lanex2, s_float, none)
+DEF_SVE_FUNCTION (svbfmlalb, ternary_bfloat_opt_n, s_float, none)
+DEF_SVE_FUNCTION (svbfmlalb_lane, ternary_bfloat_lane, s_float, none)
+DEF_SVE_FUNCTION (svbfmlalt, ternary_bfloat_opt_n, s_float, none)
+DEF_SVE_FUNCTION (svbfmlalt_lane, ternary_bfloat_lane, s_float, none)
+DEF_SVE_FUNCTION (svbfmmla, ternary_bfloat, s_float, none)
+DEF_SVE_FUNCTION (svcvt, unary_convert, cvt_bfloat, mxz)
+DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, mx)
+#undef REQUIRED_EXTENSIONS
+
  #define REQUIRED_EXTENSIONS AARCH64_FL_I8MM
  DEF_SVE_FUNCTION (svmmla, mmla, s_integer, none)
  DEF_SVE_FUNCTION (svusmmla, ternary_uintq_intq, s_signed, none)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.h b/gcc/config/aarch64/aarch64-sve-builtins-base.h

index 5c19b7d..957ace8 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.h
@@ -42,6 +42,13 @@ namespace aarch64_sve
      extern const function_base *const svasr;
      extern const function_base *const svasr_wide;
      extern const function_base *const svasrd;
+    extern const function_base *const svbfdot;
+    extern const function_base *const svbfdot_lane;
+    extern const function_base *const svbfmlalb;
+    extern const function_base *const svbfmlalb_lane;
+    extern const function_base *const svbfmlalt;
+    extern const function_base *const svbfmlalt_lane;
+    extern const function_base *const svbfmmla;
      extern const function_base *const svbic;
      extern const function_base *const svbrka;
      extern const function_base *const svbrkb;
@@ -84,6 +91,7 @@ namespace aarch64_sve
      extern const function_base *const svcreate3;
      extern const function_base *const svcreate4;
      extern const function_base *const svcvt;
+    extern const function_base *const svcvtnt;
      extern const function_base *const svdiv;
      extern const function_base *const svdivr;
      extern const function_base *const svdot;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc

index 1ea3caa..5f8c85d 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
@@ -78,6 +78,7 @@ apply_predication (const function_instance &instance, tree return_type,
     [01]    - the element type in type suffix 0 or 1 of INSTANCE
     f<bits> - a floating-point type with the given number of bits
     f[01]   - a floating-point type with the same width as type suffix 0 or 1
+   B       - bfloat16_t
     h<elt>  - a half-sized version of <elt>
     p       - a predicate (represented as TYPE_SUFFIX_b)
     q<elt>  - a quarter-sized version of <elt>
@@ -117,6 +118,9 @@ parse_element_type (const function_instance &instance, const char *&format)
    if (ch == 'p')
      return TYPE_SUFFIX_b;
  
+  if (ch == 'B')
+    return TYPE_SUFFIX_bf16;
+
    if (ch == 'q')
      {
        type_suffix_index suffix = parse_element_type (instance, format);
@@ -921,6 +925,26 @@ struct ternary_resize2_lane_base : public overloaded_base<0>
    }
  };
  
+/* A specialization of ternary_resize2_lane_base for bfloat16 elements,
+   indexed in groups of N elements.  */
+template<unsigned int N>
+struct ternary_bfloat_lane_base
+  : public ternary_resize2_lane_base<16, TYPE_bfloat, TYPE_bfloat>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vB,vB,su64", group, MODE_none);
+  }
+
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    return c.require_immediate_lane_index (3, N);
+  }
+};
+
  /* A specialization of ternary_resize2_lane_base for quarter-sized
     elements.  */
  template<type_class_index TYPE_CLASS2 = function_resolver::SAME_TYPE_CLASS,
@@ -2695,6 +2719,48 @@ struct tbl_tuple_def : public overloaded_base<0>
  };
  SHAPE (tbl_tuple)
  
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloatt16_t, svbfloat16_t).  */
+struct ternary_bfloat_def
+  : public ternary_resize2_base<16, TYPE_bfloat, TYPE_bfloat>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vB,vB", group, MODE_none);
+  }
+};
+SHAPE (ternary_bfloat)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloat16_t, svbfloat16_t, uint64_t)
+
+   where the final argument is an integer constant expression in the range
+   [0, 7].  */
+typedef ternary_bfloat_lane_base<1> ternary_bfloat_lane_def;
+SHAPE (ternary_bfloat_lane)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloat16_t, svbfloat16_t, uint64_t)
+
+   where the final argument is an integer constant expression in the range
+   [0, 3].  */
+typedef ternary_bfloat_lane_base<2> ternary_bfloat_lanex2_def;
+SHAPE (ternary_bfloat_lanex2)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloatt16_t, svbfloat16_t)
+   sv<t0>_t svfoo[_n_t0](sv<t0>_t, svbfloat16_t, bfloat16_t).  */
+struct ternary_bfloat_opt_n_def
+  : public ternary_resize2_opt_n_base<16, TYPE_bfloat, TYPE_bfloat>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vB,vB", group, MODE_none);
+    build_all (b, "v0,v0,vB,sB", group, MODE_n);
+  }
+};
+SHAPE (ternary_bfloat_opt_n)
+
  /* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:int:quarter>_t, sv<t0:uint:quarter>_t,
                        uint64_t)
  
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h

index 1ce0997..3a19982 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
@@ -148,6 +148,10 @@ namespace aarch64_sve
      extern const function_shape *const store_scatter_offset;
      extern const function_shape *const store_scatter_offset_restricted;
      extern const function_shape *const tbl_tuple;
+    extern const function_shape *const ternary_bfloat;
+    extern const function_shape *const ternary_bfloat_lane;
+    extern const function_shape *const ternary_bfloat_lanex2;
+    extern const function_shape *const ternary_bfloat_opt_n;
      extern const function_shape *const ternary_intq_uintq_lane;
      extern const function_shape *const ternary_intq_uintq_opt_n;
      extern const function_shape *const ternary_lane;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc

index 53b1651..9e7219c 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -487,7 +487,6 @@ FUNCTION (svbsl2n, CODE_FOR_MODE0 (aarch64_sve2_bsl2n),)
  FUNCTION (svcdot, svcdot_impl,)
  FUNCTION (svcdot_lane, svcdot_lane_impl,)
  FUNCTION (svcvtlt, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTLT))
-FUNCTION (svcvtnt, CODE_FOR_MODE1 (aarch64_sve2_cvtnt),)
  FUNCTION (svcvtx, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTX))
  FUNCTION (svcvtxnt, CODE_FOR_MODE1 (aarch64_sve2_cvtxnt),)
  FUNCTION (sveor3, CODE_FOR_MODE0 (aarch64_sve2_eor3),)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h

index 90e29fc..06d4a93 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
@@ -54,7 +54,6 @@ namespace aarch64_sve
      extern const function_base *const svcdot;
      extern const function_base *const svcdot_lane;
      extern const function_base *const svcvtlt;
-    extern const function_base *const svcvtnt;
      extern const function_base *const svcvtx;
      extern const function_base *const svcvtxnt;
      extern const function_base *const sveor3;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc

index d4d201d..2c5543b 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -354,6 +354,10 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
    D (u32, f16), D (u32, f32), D (u32, f64), \
    D (u64, f16), D (u64, f32), D (u64, f64)
  
+/* _bf16_f32.  */
+#define TYPES_cvt_bfloat(S, D) \
+  D (bf16, f32)
+
  /* _f32_f16
     _f64_f32.  */
  #define TYPES_cvt_long(S, D) \
@@ -471,6 +475,7 @@ DEF_SVE_TYPES_ARRAY (d_unsigned);
  DEF_SVE_TYPES_ARRAY (d_integer);
  DEF_SVE_TYPES_ARRAY (d_data);
  DEF_SVE_TYPES_ARRAY (cvt);
+DEF_SVE_TYPES_ARRAY (cvt_bfloat);
  DEF_SVE_TYPES_ARRAY (cvt_long);
  DEF_SVE_TYPES_ARRAY (cvt_narrow_s);
  DEF_SVE_TYPES_ARRAY (cvt_narrow);
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md

index abaac11..fa38529 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -115,6 +115,7 @@
  ;; ---- [FP] General ternary arithmetic corresponding to unspecs
  ;; ---- [FP] Complex multiply-add
  ;; ---- [FP] Trigonometric multiply-add
+;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
  ;; ---- [FP] Matrix multiply-accumulate
  ;;
  ;; == Comparisons and selects
@@ -150,6 +151,7 @@
  ;; ---- [FP<-INT] Packs
  ;; ---- [FP<-INT] Unpacks
  ;; ---- [FP<-FP] Packs
+;; ---- [FP<-FP] Packs (bfloat16)
  ;; ---- [FP<-FP] Unpacks
  ;; ---- [PRED<-PRED] Packs
  ;; ---- [PRED<-PRED] Unpacks
@@ -6548,6 +6550,46 @@
  )
  
  ;; -------------------------------------------------------------------------
+;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BFDOT (BF16)
+;; - BFMLALB (BF16)
+;; - BFMLALT (BF16)
+;; - BFMMLA (BF16)
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_fp_op>vnx4sf"
+  [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
+       (unspec:VNx4SF
+         [(match_operand:VNx4SF 1 "register_operand" "0, w")
+          (match_operand:VNx8BF 2 "register_operand" "w, w")
+          (match_operand:VNx8BF 3 "register_operand" "w, w")]
+         SVE_BFLOAT_TERNARY_LONG))]
+  "TARGET_SVE_BF16"
+  "@
+   <sve_fp_op>\t%0.s, %2.h, %3.h
+   movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; The immediate range is enforced before generating the instruction.
+(define_insn "@aarch64_sve_<sve_fp_op>_lanevnx4sf"
+  [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
+       (unspec:VNx4SF
+         [(match_operand:VNx4SF 1 "register_operand" "0, w")
+          (match_operand:VNx8BF 2 "register_operand" "w, w")
+          (match_operand:VNx8BF 3 "register_operand" "y, y")
+          (match_operand:SI 4 "const_int_operand")]
+         SVE_BFLOAT_TERNARY_LONG_LANE))]
+  "TARGET_SVE_BF16"
+  "@
+   <sve_fp_op>\t%0.s, %2.h, %3.h[%4]
+   movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
  ;; ---- [FP] Matrix multiply-accumulate
  ;; -------------------------------------------------------------------------
  ;; Includes:
@@ -8110,6 +8152,77 @@
  )
  
  ;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Packs (bfloat16)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BFCVT (BF16)
+;; - BFCVTNT (BF16)
+;; -------------------------------------------------------------------------
+
+;; Predicated BFCVT.
+(define_insn "@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
+  [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
+       (unspec:VNx8BF_ONLY
+         [(match_operand:VNx4BI 1 "register_operand" "Upl")
+          (match_operand:SI 3 "aarch64_sve_gp_strictness")
+          (match_operand:VNx4SF_ONLY 2 "register_operand" "w")]
+         SVE_COND_FCVT))]
+  "TARGET_SVE_BF16"
+  "bfcvt\t%0.h, %1/m, %2.s"
+)
+
+;; Predicated BFCVT with merging.
+(define_expand "@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
+  [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
+       (unspec:VNx8BF_ONLY
+         [(match_operand:VNx4BI 1 "register_operand")
+          (unspec:VNx8BF_ONLY
+            [(match_dup 1)
+             (const_int SVE_STRICT_GP)
+             (match_operand:VNx4SF_ONLY 2 "register_operand")]
+            SVE_COND_FCVT)
+          (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")]
+         UNSPEC_SEL))]
+  "TARGET_SVE_BF16"
+)
+
+(define_insn "*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
+  [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
+       (unspec:VNx8BF_ONLY
+         [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl, Upl")
+          (unspec:VNx8BF_ONLY
+            [(match_dup 1)
+             (match_operand:SI 4 "aarch64_sve_gp_strictness")
+             (match_operand:VNx4SF_ONLY 2 "register_operand" "w, w, w")]
+            SVE_COND_FCVT)
+          (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+         UNSPEC_SEL))]
+  "TARGET_SVE_BF16"
+  "@
+   bfcvt\t%0.h, %1/m, %2.s
+   movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s
+   movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s"
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; Predicated BFCVTNT.  This doesn't give a natural aarch64_pred_*/cond_*
+;; pair because the even elements always have to be supplied for active
+;; elements, even if the inactive elements don't matter.
+;;
+;; This instructions does not take MOVPRFX.
+(define_insn "@aarch64_sve_cvtnt<mode>"
+  [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
+       (unspec:VNx8BF_ONLY
+         [(match_operand:VNx4BI 2 "register_operand" "Upl")
+          (const_int SVE_STRICT_GP)
+          (match_operand:VNx8BF_ONLY 1 "register_operand" "0")
+          (match_operand:VNx4SF 3 "register_operand" "w")]
+         UNSPEC_COND_FCVTNT))]
+  "TARGET_SVE_BF16"
+  "bfcvtnt\t%0.h, %2/m, %3.s"
+)
+
+;; -------------------------------------------------------------------------
  ;; ---- [FP<-FP] Unpacks
  ;; -------------------------------------------------------------------------
  ;; Includes:
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md

index eaded5d..f82e60e 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1921,16 +1921,16 @@
  ;; elements, even if the inactive elements don't matter.
  ;;
  ;; These instructions do not take MOVPRFX.
-(define_insn "@aarch64_sve2_cvtnt<mode>"
-  [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
-       (unspec:<VNARROW>
-         [(match_operand:<VPRED> 2 "register_operand" "Upl")
+(define_insn "@aarch64_sve_cvtnt<mode>"
+  [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w")
+       (unspec:SVE_FULL_HSF
+         [(match_operand:<VWIDE_PRED> 2 "register_operand" "Upl")
            (const_int SVE_STRICT_GP)
-          (match_operand:<VNARROW> 1 "register_operand" "0")
-          (match_operand:SVE_FULL_SDF 3 "register_operand" "w")]
+          (match_operand:SVE_FULL_HSF 1 "register_operand" "0")
+          (match_operand:<VWIDE> 3 "register_operand" "w")]
           UNSPEC_COND_FCVTNT))]
    "TARGET_SVE2"
-  "fcvtnt\t%0.<Ventype>, %2/m, %3.<Vetype>"
+  "fcvtnt\t%0.<Vetype>, %2/m, %3.<Vewtype>"
  )
  
  ;; Predicated FCVTX (equivalent to what would be FCVTXNB, except that
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h

index 043e26a..8f08bad 100644 (file)
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -362,6 +362,7 @@ extern unsigned aarch64_architecture_version;
  /* BF16 instructions are enabled through +bf16.  */
  #define TARGET_BF16_FP (AARCH64_ISA_BF16)
  #define TARGET_BF16_SIMD (AARCH64_ISA_BF16 && TARGET_SIMD)
+#define TARGET_SVE_BF16 (TARGET_SVE && AARCH64_ISA_BF16)
  
  /* Make sure this is always defined so we don't have to check for ifdefs
     but rather use normal ifs.  */
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index d5b60e0..3e3fd9d 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -314,6 +314,7 @@
  ;; Iterators for single modes, for "@" patterns.
  (define_mode_iterator VNx16QI_ONLY [VNx16QI])
  (define_mode_iterator VNx8HI_ONLY [VNx8HI])
+(define_mode_iterator VNx8BF_ONLY [VNx8BF])
  (define_mode_iterator VNx4SI_ONLY [VNx4SI])
  (define_mode_iterator VNx4SF_ONLY [VNx4SF])
  (define_mode_iterator VNx2DI_ONLY [VNx2DI])
@@ -820,6 +821,9 @@
      UNSPEC_USDOT       ; Used in aarch64-simd.md.
      UNSPEC_SUDOT       ; Used in aarch64-simd.md.
      UNSPEC_BFDOT       ; Used in aarch64-simd.md.
+    UNSPEC_BFMLALB     ; Used in aarch64-sve.md.
+    UNSPEC_BFMLALT     ; Used in aarch64-sve.md.
+    UNSPEC_BFMMLA      ; Used in aarch64-sve.md.
  ])
  
  ;; ------------------------------------------------------------------
@@ -2235,6 +2239,15 @@
  
  (define_int_iterator SVE_FP_BINARY_INT [UNSPEC_FTSMUL UNSPEC_FTSSEL])
  
+(define_int_iterator SVE_BFLOAT_TERNARY_LONG [UNSPEC_BFDOT
+                                             UNSPEC_BFMLALB
+                                             UNSPEC_BFMLALT
+                                             UNSPEC_BFMMLA])
+
+(define_int_iterator SVE_BFLOAT_TERNARY_LONG_LANE [UNSPEC_BFDOT
+                                                  UNSPEC_BFMLALB
+                                                  UNSPEC_BFMLALT])
+
  (define_int_iterator SVE_INT_REDUCTION [UNSPEC_ANDV
                                         UNSPEC_IORV
                                         UNSPEC_SMAXV
@@ -3225,7 +3238,11 @@
                                   (UNSPEC_SQDMULLBT "sqdmlslbt")
                                   (UNSPEC_SQDMULLT "sqdmlslt")])
  
-(define_int_attr sve_fp_op [(UNSPEC_FRECPE "frecpe")
+(define_int_attr sve_fp_op [(UNSPEC_BFDOT "bfdot")
+                           (UNSPEC_BFMLALB "bfmlalb")
+                           (UNSPEC_BFMLALT "bfmlalt")
+                           (UNSPEC_BFMMLA "bfmmla")
+                           (UNSPEC_FRECPE "frecpe")
                             (UNSPEC_FRECPS "frecps")
                             (UNSPEC_RSQRTE "frsqrte")
                             (UNSPEC_RSQRTS "frsqrts")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 5d002d9..b239734 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,26 @@
  2020-01-31  Richard Sandiford  <richard.sandiford@arm.com>
  
+       * lib/target-supports.exp (check_effective_target_aarch64_asm_bf16_ok):
+       New proc.
+       * gcc.target/aarch64/sve/acle/asm/bfdot_f32.c: New test.
+       * gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c: Likweise.
+       * gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c: Likweise.
+       * gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c: Likweise.
+       * gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c: Likweise.
+       * gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c: Likweise.
+       * gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c: Likweise.
+       * gcc.target/aarch64/sve/acle/asm/cvt_bf16.c: Likweise.
+       * gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c: Likweise.
+       * gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c: Likweise.
+       * gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c:
+       Likweise.
+       * gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c:
+       Likweise.
+       * gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c:
+       Likweise.
+
+2020-01-31  Richard Sandiford  <richard.sandiford@arm.com>
+
         * g++.target/aarch64/sve/acle/general-c++/mangle_1.C: Test mangling
         of svbfloat16_t.
         * g++.target/aarch64/sve/acle/general-c++/mangle_2.C: Likewise for
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_f32.c

new file mode 100644 (file)

index 0000000..376622d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_f32.c
@@ -0,0 +1,67 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfdot_f32_tied1:
+**     bfdot   z0\.s, z4\.h, z5\.h
+**     ret
+*/
+TEST_DUAL_Z (bfdot_f32_tied1, svfloat32_t, svbfloat16_t,
+            z0 = svbfdot_f32 (z0, z4, z5),
+            z0 = svbfdot (z0, z4, z5))
+
+/*
+** bfdot_f32_tied2:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfdot   z0\.s, \1\.h, z1\.h
+**     ret
+*/
+TEST_DUAL_Z_REV (bfdot_f32_tied2, svfloat32_t, svbfloat16_t,
+                z0_res = svbfdot_f32 (z4, z0, z1),
+                z0_res = svbfdot (z4, z0, z1))
+
+/*
+** bfdot_f32_tied3:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfdot   z0\.s, z1\.h, \1\.h
+**     ret
+*/
+TEST_DUAL_Z_REV (bfdot_f32_tied3, svfloat32_t, svbfloat16_t,
+                z0_res = svbfdot_f32 (z4, z1, z0),
+                z0_res = svbfdot (z4, z1, z0))
+
+/*
+** bfdot_f32_untied:
+**     movprfx z0, z1
+**     bfdot   z0\.s, z4\.h, z5\.h
+**     ret
+*/
+TEST_DUAL_Z (bfdot_f32_untied, svfloat32_t, svbfloat16_t,
+            z0 = svbfdot_f32 (z1, z4, z5),
+            z0 = svbfdot (z1, z4, z5))
+
+/*
+** bfdot_h7_f32_tied1:
+**     mov     (z[0-9]+\.h), h7
+**     bfdot   z0\.s, z4\.h, \1
+**     ret
+*/
+TEST_DUAL_ZD (bfdot_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
+             z0 = svbfdot_n_f32 (z0, z4, d7),
+             z0 = svbfdot (z0, z4, d7))
+
+/*
+** bfdot_h7_f32_untied:
+**     mov     (z[0-9]+\.h), h7
+**     movprfx z0, z1
+**     bfdot   z0\.s, z4\.h, \1
+**     ret
+*/
+TEST_DUAL_ZD (bfdot_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
+             z0 = svbfdot_n_f32 (z1, z4, d7),
+             z0 = svbfdot (z1, z4, d7))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c

new file mode 100644 (file)

index 0000000..0f624fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c
@@ -0,0 +1,86 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfdot_lane_0_f32_tied1:
+**     bfdot   z0\.s, z4\.h, z5\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z (bfdot_lane_0_f32_tied1, svfloat32_t, svbfloat16_t,
+            z0 = svbfdot_lane_f32 (z0, z4, z5, 0),
+            z0 = svbfdot_lane (z0, z4, z5, 0))
+
+/*
+** bfdot_lane_0_f32_tied2:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfdot   z0\.s, \1\.h, z1\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z_REV (bfdot_lane_0_f32_tied2, svfloat32_t, svbfloat16_t,
+                z0_res = svbfdot_lane_f32 (z4, z0, z1, 0),
+                z0_res = svbfdot_lane (z4, z0, z1, 0))
+
+/*
+** bfdot_lane_0_f32_tied3:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfdot   z0\.s, z1\.h, \1\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z_REV (bfdot_lane_0_f32_tied3, svfloat32_t, svbfloat16_t,
+                z0_res = svbfdot_lane_f32 (z4, z1, z0, 0),
+                z0_res = svbfdot_lane (z4, z1, z0, 0))
+
+/*
+** bfdot_lane_0_f32_untied:
+**     movprfx z0, z1
+**     bfdot   z0\.s, z4\.h, z5\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z (bfdot_lane_0_f32_untied, svfloat32_t, svbfloat16_t,
+            z0 = svbfdot_lane_f32 (z1, z4, z5, 0),
+            z0 = svbfdot_lane (z1, z4, z5, 0))
+
+/*
+** bfdot_lane_1_f32:
+**     bfdot   z0\.s, z4\.h, z5\.h\[1\]
+**     ret
+*/
+TEST_DUAL_Z (bfdot_lane_1_f32, svfloat32_t, svbfloat16_t,
+            z0 = svbfdot_lane_f32 (z0, z4, z5, 1),
+            z0 = svbfdot_lane (z0, z4, z5, 1))
+
+/*
+** bfdot_lane_3_f32:
+**     bfdot   z0\.s, z4\.h, z5\.h\[3\]
+**     ret
+*/
+TEST_DUAL_Z (bfdot_lane_3_f32, svfloat32_t, svbfloat16_t,
+            z0 = svbfdot_lane_f32 (z0, z4, z5, 3),
+            z0 = svbfdot_lane (z0, z4, z5, 3))
+
+/*
+** bfdot_lane_z8_f32:
+**     str     d8, \[sp, -16\]!
+**     mov     (z[0-7])\.d, z8\.d
+**     bfdot   z0\.s, z1\.h, \1\.h\[1\]
+**     ldr     d8, \[sp\], 16
+**     ret
+*/
+TEST_DUAL_LANE_REG (bfdot_lane_z8_f32, svfloat32_t, svbfloat16_t, z8,
+                   z0 = svbfdot_lane_f32 (z0, z1, z8, 1),
+                   z0 = svbfdot_lane (z0, z1, z8, 1))
+
+/*
+** bfdot_lane_z16_f32:
+**     mov     (z[0-7])\.d, z16\.d
+**     bfdot   z0\.s, z1\.h, \1\.h\[1\]
+**     ret
+*/
+TEST_DUAL_LANE_REG (bfdot_lane_z16_f32, svfloat32_t, svbfloat16_t, z16,
+                   z0 = svbfdot_lane_f32 (z0, z1, z16, 1),
+                   z0 = svbfdot_lane (z0, z1, z16, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c

new file mode 100644 (file)

index 0000000..0f81011
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c
@@ -0,0 +1,67 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfmlalb_f32_tied1:
+**     bfmlalb z0\.s, z4\.h, z5\.h
+**     ret
+*/
+TEST_DUAL_Z (bfmlalb_f32_tied1, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalb_f32 (z0, z4, z5),
+            z0 = svbfmlalb (z0, z4, z5))
+
+/*
+** bfmlalb_f32_tied2:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfmlalb z0\.s, \1\.h, z1\.h
+**     ret
+*/
+TEST_DUAL_Z_REV (bfmlalb_f32_tied2, svfloat32_t, svbfloat16_t,
+                z0_res = svbfmlalb_f32 (z4, z0, z1),
+                z0_res = svbfmlalb (z4, z0, z1))
+
+/*
+** bfmlalb_f32_tied3:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfmlalb z0\.s, z1\.h, \1\.h
+**     ret
+*/
+TEST_DUAL_Z_REV (bfmlalb_f32_tied3, svfloat32_t, svbfloat16_t,
+                z0_res = svbfmlalb_f32 (z4, z1, z0),
+                z0_res = svbfmlalb (z4, z1, z0))
+
+/*
+** bfmlalb_f32_untied:
+**     movprfx z0, z1
+**     bfmlalb z0\.s, z4\.h, z5\.h
+**     ret
+*/
+TEST_DUAL_Z (bfmlalb_f32_untied, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalb_f32 (z1, z4, z5),
+            z0 = svbfmlalb (z1, z4, z5))
+
+/*
+** bfmlalb_h7_f32_tied1:
+**     mov     (z[0-9]+\.h), h7
+**     bfmlalb z0\.s, z4\.h, \1
+**     ret
+*/
+TEST_DUAL_ZD (bfmlalb_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
+             z0 = svbfmlalb_n_f32 (z0, z4, d7),
+             z0 = svbfmlalb (z0, z4, d7))
+
+/*
+** bfmlalb_h7_f32_untied:
+**     mov     (z[0-9]+\.h), h7
+**     movprfx z0, z1
+**     bfmlalb z0\.s, z4\.h, \1
+**     ret
+*/
+TEST_DUAL_ZD (bfmlalb_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
+             z0 = svbfmlalb_n_f32 (z1, z4, d7),
+             z0 = svbfmlalb (z1, z4, d7))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c

new file mode 100644 (file)

index 0000000..b0ec088
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c
@@ -0,0 +1,86 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfmlalb_lane_0_f32_tied1:
+**     bfmlalb z0\.s, z4\.h, z5\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z (bfmlalb_lane_0_f32_tied1, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalb_lane_f32 (z0, z4, z5, 0),
+            z0 = svbfmlalb_lane (z0, z4, z5, 0))
+
+/*
+** bfmlalb_lane_0_f32_tied2:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfmlalb z0\.s, \1\.h, z1\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z_REV (bfmlalb_lane_0_f32_tied2, svfloat32_t, svbfloat16_t,
+                z0_res = svbfmlalb_lane_f32 (z4, z0, z1, 0),
+                z0_res = svbfmlalb_lane (z4, z0, z1, 0))
+
+/*
+** bfmlalb_lane_0_f32_tied3:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfmlalb z0\.s, z1\.h, \1\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z_REV (bfmlalb_lane_0_f32_tied3, svfloat32_t, svbfloat16_t,
+                z0_res = svbfmlalb_lane_f32 (z4, z1, z0, 0),
+                z0_res = svbfmlalb_lane (z4, z1, z0, 0))
+
+/*
+** bfmlalb_lane_0_f32_untied:
+**     movprfx z0, z1
+**     bfmlalb z0\.s, z4\.h, z5\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z (bfmlalb_lane_0_f32_untied, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalb_lane_f32 (z1, z4, z5, 0),
+            z0 = svbfmlalb_lane (z1, z4, z5, 0))
+
+/*
+** bfmlalb_lane_1_f32:
+**     bfmlalb z0\.s, z4\.h, z5\.h\[1\]
+**     ret
+*/
+TEST_DUAL_Z (bfmlalb_lane_1_f32, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalb_lane_f32 (z0, z4, z5, 1),
+            z0 = svbfmlalb_lane (z0, z4, z5, 1))
+
+/*
+** bfmlalb_lane_7_f32:
+**     bfmlalb z0\.s, z4\.h, z5\.h\[7\]
+**     ret
+*/
+TEST_DUAL_Z (bfmlalb_lane_7_f32, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalb_lane_f32 (z0, z4, z5, 7),
+            z0 = svbfmlalb_lane (z0, z4, z5, 7))
+
+/*
+** bfmlalb_lane_z8_f32:
+**     str     d8, \[sp, -16\]!
+**     mov     (z[0-7])\.d, z8\.d
+**     bfmlalb z0\.s, z1\.h, \1\.h\[1\]
+**     ldr     d8, \[sp\], 16
+**     ret
+*/
+TEST_DUAL_LANE_REG (bfmlalb_lane_z8_f32, svfloat32_t, svbfloat16_t, z8,
+                   z0 = svbfmlalb_lane_f32 (z0, z1, z8, 1),
+                   z0 = svbfmlalb_lane (z0, z1, z8, 1))
+
+/*
+** bfmlalb_lane_z16_f32:
+**     mov     (z[0-7])\.d, z16\.d
+**     bfmlalb z0\.s, z1\.h, \1\.h\[1\]
+**     ret
+*/
+TEST_DUAL_LANE_REG (bfmlalb_lane_z16_f32, svfloat32_t, svbfloat16_t, z16,
+                   z0 = svbfmlalb_lane_f32 (z0, z1, z16, 1),
+                   z0 = svbfmlalb_lane (z0, z1, z16, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c

new file mode 100644 (file)

index 0000000..2a583fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c
@@ -0,0 +1,67 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfmlalt_f32_tied1:
+**     bfmlalt z0\.s, z4\.h, z5\.h
+**     ret
+*/
+TEST_DUAL_Z (bfmlalt_f32_tied1, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalt_f32 (z0, z4, z5),
+            z0 = svbfmlalt (z0, z4, z5))
+
+/*
+** bfmlalt_f32_tied2:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfmlalt z0\.s, \1\.h, z1\.h
+**     ret
+*/
+TEST_DUAL_Z_REV (bfmlalt_f32_tied2, svfloat32_t, svbfloat16_t,
+                z0_res = svbfmlalt_f32 (z4, z0, z1),
+                z0_res = svbfmlalt (z4, z0, z1))
+
+/*
+** bfmlalt_f32_tied3:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfmlalt z0\.s, z1\.h, \1\.h
+**     ret
+*/
+TEST_DUAL_Z_REV (bfmlalt_f32_tied3, svfloat32_t, svbfloat16_t,
+                z0_res = svbfmlalt_f32 (z4, z1, z0),
+                z0_res = svbfmlalt (z4, z1, z0))
+
+/*
+** bfmlalt_f32_untied:
+**     movprfx z0, z1
+**     bfmlalt z0\.s, z4\.h, z5\.h
+**     ret
+*/
+TEST_DUAL_Z (bfmlalt_f32_untied, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalt_f32 (z1, z4, z5),
+            z0 = svbfmlalt (z1, z4, z5))
+
+/*
+** bfmlalt_h7_f32_tied1:
+**     mov     (z[0-9]+\.h), h7
+**     bfmlalt z0\.s, z4\.h, \1
+**     ret
+*/
+TEST_DUAL_ZD (bfmlalt_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
+             z0 = svbfmlalt_n_f32 (z0, z4, d7),
+             z0 = svbfmlalt (z0, z4, d7))
+
+/*
+** bfmlalt_h7_f32_untied:
+**     mov     (z[0-9]+\.h), h7
+**     movprfx z0, z1
+**     bfmlalt z0\.s, z4\.h, \1
+**     ret
+*/
+TEST_DUAL_ZD (bfmlalt_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
+             z0 = svbfmlalt_n_f32 (z1, z4, d7),
+             z0 = svbfmlalt (z1, z4, d7))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c

new file mode 100644 (file)

index 0000000..3af3997
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c
@@ -0,0 +1,86 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfmlalt_lane_0_f32_tied1:
+**     bfmlalt z0\.s, z4\.h, z5\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z (bfmlalt_lane_0_f32_tied1, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalt_lane_f32 (z0, z4, z5, 0),
+            z0 = svbfmlalt_lane (z0, z4, z5, 0))
+
+/*
+** bfmlalt_lane_0_f32_tied2:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfmlalt z0\.s, \1\.h, z1\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z_REV (bfmlalt_lane_0_f32_tied2, svfloat32_t, svbfloat16_t,
+                z0_res = svbfmlalt_lane_f32 (z4, z0, z1, 0),
+                z0_res = svbfmlalt_lane (z4, z0, z1, 0))
+
+/*
+** bfmlalt_lane_0_f32_tied3:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfmlalt z0\.s, z1\.h, \1\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z_REV (bfmlalt_lane_0_f32_tied3, svfloat32_t, svbfloat16_t,
+                z0_res = svbfmlalt_lane_f32 (z4, z1, z0, 0),
+                z0_res = svbfmlalt_lane (z4, z1, z0, 0))
+
+/*
+** bfmlalt_lane_0_f32_untied:
+**     movprfx z0, z1
+**     bfmlalt z0\.s, z4\.h, z5\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z (bfmlalt_lane_0_f32_untied, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalt_lane_f32 (z1, z4, z5, 0),
+            z0 = svbfmlalt_lane (z1, z4, z5, 0))
+
+/*
+** bfmlalt_lane_1_f32:
+**     bfmlalt z0\.s, z4\.h, z5\.h\[1\]
+**     ret
+*/
+TEST_DUAL_Z (bfmlalt_lane_1_f32, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalt_lane_f32 (z0, z4, z5, 1),
+            z0 = svbfmlalt_lane (z0, z4, z5, 1))
+
+/*
+** bfmlalt_lane_7_f32:
+**     bfmlalt z0\.s, z4\.h, z5\.h\[7\]
+**     ret
+*/
+TEST_DUAL_Z (bfmlalt_lane_7_f32, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalt_lane_f32 (z0, z4, z5, 7),
+            z0 = svbfmlalt_lane (z0, z4, z5, 7))
+
+/*
+** bfmlalt_lane_z8_f32:
+**     str     d8, \[sp, -16\]!
+**     mov     (z[0-7])\.d, z8\.d
+**     bfmlalt z0\.s, z1\.h, \1\.h\[1\]
+**     ldr     d8, \[sp\], 16
+**     ret
+*/
+TEST_DUAL_LANE_REG (bfmlalt_lane_z8_f32, svfloat32_t, svbfloat16_t, z8,
+                   z0 = svbfmlalt_lane_f32 (z0, z1, z8, 1),
+                   z0 = svbfmlalt_lane (z0, z1, z8, 1))
+
+/*
+** bfmlalt_lane_z16_f32:
+**     mov     (z[0-7])\.d, z16\.d
+**     bfmlalt z0\.s, z1\.h, \1\.h\[1\]
+**     ret
+*/
+TEST_DUAL_LANE_REG (bfmlalt_lane_z16_f32, svfloat32_t, svbfloat16_t, z16,
+                   z0 = svbfmlalt_lane_f32 (z0, z1, z16, 1),
+                   z0 = svbfmlalt_lane (z0, z1, z16, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c

new file mode 100644 (file)

index 0000000..b1d98fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c
@@ -0,0 +1,46 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfmmla_f32_tied1:
+**     bfmmla  z0\.s, z4\.h, z5\.h
+**     ret
+*/
+TEST_DUAL_Z (bfmmla_f32_tied1, svfloat32_t, svbfloat16_t,
+            z0 = svbfmmla_f32 (z0, z4, z5),
+            z0 = svbfmmla (z0, z4, z5))
+
+/*
+** bfmmla_f32_tied2:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfmmla  z0\.s, \1\.h, z1\.h
+**     ret
+*/
+TEST_DUAL_Z_REV (bfmmla_f32_tied2, svfloat32_t, svbfloat16_t,
+                z0_res = svbfmmla_f32 (z4, z0, z1),
+                z0_res = svbfmmla (z4, z0, z1))
+
+/*
+** bfmmla_f32_tied3:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfmmla  z0\.s, z1\.h, \1\.h
+**     ret
+*/
+TEST_DUAL_Z_REV (bfmmla_f32_tied3, svfloat32_t, svbfloat16_t,
+                z0_res = svbfmmla_f32 (z4, z1, z0),
+                z0_res = svbfmmla (z4, z1, z0))
+
+/*
+** bfmmla_f32_untied:
+**     movprfx z0, z1
+**     bfmmla  z0\.s, z4\.h, z5\.h
+**     ret
+*/
+TEST_DUAL_Z (bfmmla_f32_untied, svfloat32_t, svbfloat16_t,
+            z0 = svbfmmla_f32 (z1, z4, z5),
+            z0 = svbfmmla (z1, z4, z5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_bf16.c

new file mode 100644 (file)

index 0000000..52baa1f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_bf16.c
@@ -0,0 +1,96 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** cvt_bf16_f32_m_tied1:
+**     bfcvt   z0\.h, p0/m, z4\.s
+**     ret
+*/
+TEST_DUAL_Z (cvt_bf16_f32_m_tied1, svbfloat16_t, svfloat32_t,
+            z0 = svcvt_bf16_f32_m (z0, p0, z4),
+            z0 = svcvt_bf16_m (z0, p0, z4))
+
+/*
+** cvt_bf16_f32_m_tied2:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfcvt   z0\.h, p0/m, \1\.s
+**     ret
+*/
+TEST_DUAL_Z_REV (cvt_bf16_f32_m_tied2, svbfloat16_t, svfloat32_t,
+                z0_res = svcvt_bf16_f32_m (z4, p0, z0),
+                z0_res = svcvt_bf16_m (z4, p0, z0))
+
+/*
+** cvt_bf16_f32_m_untied:
+**     movprfx z0, z1
+**     bfcvt   z0\.h, p0/m, z4\.s
+**     ret
+*/
+TEST_DUAL_Z (cvt_bf16_f32_m_untied, svbfloat16_t, svfloat32_t,
+            z0 = svcvt_bf16_f32_m (z1, p0, z4),
+            z0 = svcvt_bf16_m (z1, p0, z4))
+
+/*
+** cvt_bf16_f32_z_tied1:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0\.s, p0/z, \1\.s
+**     bfcvt   z0\.h, p0/m, \1\.s
+**     ret
+*/
+TEST_DUAL_Z_REV (cvt_bf16_f32_z_tied1, svbfloat16_t, svfloat32_t,
+                z0_res = svcvt_bf16_f32_z (p0, z0),
+                z0_res = svcvt_bf16_z (p0, z0))
+
+/*
+** cvt_bf16_f32_z_untied:
+**     movprfx z0\.s, p0/z, z4\.s
+**     bfcvt   z0\.h, p0/m, z4\.s
+**     ret
+*/
+TEST_DUAL_Z (cvt_bf16_f32_z_untied, svbfloat16_t, svfloat32_t,
+            z0 = svcvt_bf16_f32_z (p0, z4),
+            z0 = svcvt_bf16_z (p0, z4))
+
+/*
+** cvt_bf16_f32_x_tied1:
+**     bfcvt   z0\.h, p0/m, z0\.s
+**     ret
+*/
+TEST_DUAL_Z_REV (cvt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t,
+                z0_res = svcvt_bf16_f32_x (p0, z0),
+                z0_res = svcvt_bf16_x (p0, z0))
+
+/*
+** cvt_bf16_f32_x_untied:
+**     bfcvt   z0\.h, p0/m, z4\.s
+**     ret
+*/
+TEST_DUAL_Z (cvt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t,
+            z0 = svcvt_bf16_f32_x (p0, z4),
+            z0 = svcvt_bf16_x (p0, z4))
+
+/*
+** ptrue_cvt_bf16_f32_x_tied1:
+**     ...
+**     ptrue   p[0-9]+\.b[^\n]*
+**     ...
+**     ret
+*/
+TEST_DUAL_Z_REV (ptrue_cvt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t,
+                z0_res = svcvt_bf16_f32_x (svptrue_b32 (), z0),
+                z0_res = svcvt_bf16_x (svptrue_b32 (), z0))
+
+/*
+** ptrue_cvt_bf16_f32_x_untied:
+**     ...
+**     ptrue   p[0-9]+\.b[^\n]*
+**     ...
+**     ret
+*/
+TEST_DUAL_Z (ptrue_cvt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t,
+            z0 = svcvt_bf16_f32_x (svptrue_b32 (), z4),
+            z0 = svcvt_bf16_x (svptrue_b32 (), z4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c

new file mode 100644 (file)

index 0000000..54614c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c
@@ -0,0 +1,90 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** cvtnt_bf16_f32_m_tied1:
+**     bfcvtnt z0\.h, p0/m, z4\.s
+**     ret
+*/
+TEST_DUAL_Z (cvtnt_bf16_f32_m_tied1, svbfloat16_t, svfloat32_t,
+            z0 = svcvtnt_bf16_f32_m (z0, p0, z4),
+            z0 = svcvtnt_bf16_m (z0, p0, z4))
+
+/* Bad RA choice: no preferred output sequence.  */
+TEST_DUAL_Z_REV (cvtnt_bf16_f32_m_tied2, svbfloat16_t, svfloat32_t,
+                z0_res = svcvtnt_bf16_f32_m (z4, p0, z0),
+                z0_res = svcvtnt_bf16_m (z4, p0, z0))
+
+/*
+** cvtnt_bf16_f32_m_untied:
+** (
+**     mov     z0\.d, z1\.d
+**     bfcvtnt z0\.h, p0/m, z4\.s
+** |
+**     bfcvtnt z1\.h, p0/m, z4\.s
+**     mov     z0\.d, z1\.d
+** )
+**     ret
+*/
+TEST_DUAL_Z (cvtnt_bf16_f32_m_untied, svbfloat16_t, svfloat32_t,
+            z0 = svcvtnt_bf16_f32_m (z1, p0, z4),
+            z0 = svcvtnt_bf16_m (z1, p0, z4))
+
+/*
+** cvtnt_bf16_f32_x_tied1:
+**     bfcvtnt z0\.h, p0/m, z4\.s
+**     ret
+*/
+TEST_DUAL_Z (cvtnt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t,
+            z0 = svcvtnt_bf16_f32_x (z0, p0, z4),
+            z0 = svcvtnt_bf16_x (z0, p0, z4))
+
+/* Bad RA choice: no preferred output sequence.  */
+TEST_DUAL_Z_REV (cvtnt_bf16_f32_x_tied2, svbfloat16_t, svfloat32_t,
+                z0_res = svcvtnt_bf16_f32_x (z4, p0, z0),
+                z0_res = svcvtnt_bf16_x (z4, p0, z0))
+
+/*
+** cvtnt_bf16_f32_x_untied:
+** (
+**     mov     z0\.d, z1\.d
+**     bfcvtnt z0\.h, p0/m, z4\.s
+** |
+**     bfcvtnt z1\.h, p0/m, z4\.s
+**     mov     z0\.d, z1\.d
+** )
+**     ret
+*/
+TEST_DUAL_Z (cvtnt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t,
+            z0 = svcvtnt_bf16_f32_x (z1, p0, z4),
+            z0 = svcvtnt_bf16_x (z1, p0, z4))
+
+/*
+** ptrue_cvtnt_bf16_f32_x_tied1:
+**     ...
+**     ptrue   p[0-9]+\.b[^\n]*
+**     ...
+**     ret
+*/
+TEST_DUAL_Z (ptrue_cvtnt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t,
+            z0 = svcvtnt_bf16_f32_x (z0, svptrue_b32 (), z4),
+            z0 = svcvtnt_bf16_x (z0, svptrue_b32 (), z4))
+
+/* Bad RA choice: no preferred output sequence.  */
+TEST_DUAL_Z_REV (ptrue_cvtnt_bf16_f32_x_tied2, svbfloat16_t, svfloat32_t,
+                z0_res = svcvtnt_bf16_f32_x (z4, svptrue_b32 (), z0),
+                z0_res = svcvtnt_bf16_x (z4, svptrue_b32 (), z0))
+
+/*
+** ptrue_cvtnt_bf16_f32_x_untied:
+**     ...
+**     ptrue   p[0-9]+\.b[^\n]*
+**     ...
+**     ret
+*/
+TEST_DUAL_Z (ptrue_cvtnt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t,
+            z0 = svcvtnt_bf16_f32_x (z1, svptrue_b32 (), z4),
+            z0 = svcvtnt_bf16_x (z1, svptrue_b32 (), z4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c

new file mode 100644 (file)

index 0000000..a923332
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv8.2-a+sve+bf16")
+
+void
+f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32,
+    svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, bfloat16_t bf)
+{
+  svbfmmla (f32, bf16); /* { dg-error {too few arguments to function 'svbfmmla'} } */
+  svbfmmla (f32, bf16, bf16, 0); /* { dg-error {too many arguments to function 'svbfmmla'} } */
+  svbfmmla (0, bf16, bf16); /* { dg-error {passing 'int' to argument 1 of 'svbfmmla', which expects an SVE vector type} } */
+  svbfmmla (pg, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svbool_t' arguments} } */
+  svbfmmla (u8, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svuint8_t' arguments} } */
+  svbfmmla (u16, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svuint16_t' arguments} } */
+  svbfmmla (f64, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svfloat64_t' arguments} } */
+  svbfmmla (f32, bf16, bf16);
+  svbfmmla (f32, 0, bf16); /* { dg-error {passing 'int' to argument 2 of 'svbfmmla', which expects 'svbfloat16_t'} } */
+  svbfmmla (f32, f32, bf16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfmmla', which expects 'svbfloat16_t'} } */
+  svbfmmla (f32, bf16, 0); /* { dg-error {passing 'int' to argument 3 of 'svbfmmla', which expects 'svbfloat16_t'} } */
+  svbfmmla (f32, bf16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfmmla', which expects 'svbfloat16_t'} } */
+  svbfmmla (f32, bf16, bf); /* { dg-error {passing 'bfloat16_t'[^\n]* to argument 3 of 'svbfmmla', which expects 'svbfloat16_t'} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c

new file mode 100644 (file)

index 0000000..23f027f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv8.2-a+sve+bf16")
+
+void
+f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32,
+    svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, int i)
+{
+  svbfmlalb_lane (f32, bf16, bf16); /* { dg-error {too few arguments to function 'svbfmlalb_lane'} } */
+  svbfmlalb_lane (f32, bf16, bf16, 0, 0); /* { dg-error {too many arguments to function 'svbfmlalb_lane'} } */
+  svbfmlalb_lane (0, bf16, bf16, 0); /* { dg-error {passing 'int' to argument 1 of 'svbfmlalb_lane', which expects an SVE vector type} } */
+  svbfmlalb_lane (pg, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svbool_t' arguments} } */
+  svbfmlalb_lane (u8, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svuint8_t' arguments} } */
+  svbfmlalb_lane (u16, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svuint16_t' arguments} } */
+  svbfmlalb_lane (f64, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svfloat64_t' arguments} } */
+  svbfmlalb_lane (f32, bf16, bf16, 0);
+  svbfmlalb_lane (f32, 0, bf16, 0); /* { dg-error {passing 'int' to argument 2 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */
+  svbfmlalb_lane (f32, f32, bf16, 0); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */
+  svbfmlalb_lane (f32, bf16, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */
+  svbfmlalb_lane (f32, bf16, f32, 0); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */
+  svbfmlalb_lane (f32, bf16, bf16, s32); /* { dg-error {argument 4 of 'svbfmlalb_lane' must be an integer constant expression} } */
+  svbfmlalb_lane (f32, bf16, bf16, i); /* { dg-error {argument 4 of 'svbfmlalb_lane' must be an integer constant expression} } */
+
+  svbfmlalb_lane (f32, bf16, bf16, 0);
+  svbfmlalb_lane (f32, bf16, bf16, 7);
+  svbfmlalb_lane (f32, bf16, bf16, 8); /* { dg-error {passing 8 to argument 4 of 'svbfmlalb_lane', which expects a value in the range \[0, 7\]} } */
+  svbfmlalb_lane (f32, bf16, bf16, -1); /* { dg-error {passing -1 to argument 4 of 'svbfmlalb_lane', which expects a value in the range \[0, 7\]} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c

new file mode 100644 (file)

index 0000000..4755ca7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv8.2-a+sve+bf16")
+
+void
+f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32,
+    svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, int i)
+{
+  svbfdot_lane (f32, bf16, bf16); /* { dg-error {too few arguments to function 'svbfdot_lane'} } */
+  svbfdot_lane (f32, bf16, bf16, 0, 0); /* { dg-error {too many arguments to function 'svbfdot_lane'} } */
+  svbfdot_lane (0, bf16, bf16, 0); /* { dg-error {passing 'int' to argument 1 of 'svbfdot_lane', which expects an SVE vector type} } */
+  svbfdot_lane (pg, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svbool_t' arguments} } */
+  svbfdot_lane (u8, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svuint8_t' arguments} } */
+  svbfdot_lane (u16, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svuint16_t' arguments} } */
+  svbfdot_lane (f64, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svfloat64_t' arguments} } */
+  svbfdot_lane (f32, bf16, bf16, 0);
+  svbfdot_lane (f32, 0, bf16, 0); /* { dg-error {passing 'int' to argument 2 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */
+  svbfdot_lane (f32, f32, bf16, 0); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */
+  svbfdot_lane (f32, bf16, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */
+  svbfdot_lane (f32, bf16, f32, 0); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */
+  svbfdot_lane (f32, bf16, bf16, s32); /* { dg-error {argument 4 of 'svbfdot_lane' must be an integer constant expression} } */
+  svbfdot_lane (f32, bf16, bf16, i); /* { dg-error {argument 4 of 'svbfdot_lane' must be an integer constant expression} } */
+
+  svbfdot_lane (f32, bf16, bf16, 0);
+  svbfdot_lane (f32, bf16, bf16, 3);
+  svbfdot_lane (f32, bf16, bf16, 4); /* { dg-error {passing 4 to argument 4 of 'svbfdot_lane', which expects a value in the range \[0, 3\]} } */
+  svbfdot_lane (f32, bf16, bf16, -1); /* { dg-error {passing -1 to argument 4 of 'svbfdot_lane', which expects a value in the range \[0, 3\]} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c

new file mode 100644 (file)

index 0000000..2d09a8e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv8.2-a+sve+bf16")
+
+void
+f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32,
+    svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, bfloat16_t bf)
+{
+  svbfdot (f32, bf16); /* { dg-error {too few arguments to function 'svbfdot'} } */
+  svbfdot (f32, bf16, bf16, 0); /* { dg-error {too many arguments to function 'svbfdot'} } */
+  svbfdot (0, bf16, bf16); /* { dg-error {passing 'int' to argument 1 of 'svbfdot', which expects an SVE vector type} } */
+  svbfdot (pg, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svbool_t' arguments} } */
+  svbfdot (u8, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svuint8_t' arguments} } */
+  svbfdot (u16, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svuint16_t' arguments} } */
+  svbfdot (f64, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svfloat64_t' arguments} } */
+  svbfdot (f32, bf16, bf16);
+  svbfdot (f32, 0, bf16); /* { dg-error {passing 'int' to argument 2 of 'svbfdot', which expects 'svbfloat16_t'} } */
+  svbfdot (f32, f32, bf16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfdot', which expects 'svbfloat16_t'} } */
+  svbfdot (f32, bf16, 0); /* { dg-error {invalid conversion to type 'bfloat16_t'} } */
+  svbfdot (f32, bf16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfdot', which expects 'svbfloat16_t'} } */
+  svbfdot (f32, bf16, bf);
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp

index 6c06faf..5377d7b 100644 (file)
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -8996,7 +8996,7 @@ proc check_effective_target_aarch64_tiny { } {
  # various architecture extensions via the .arch_extension pseudo-op.
  
  foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve"
-                         "i8mm" "f32mm" "f64mm" } {
+                         "i8mm" "f32mm" "f64mm" "bf16" } {
      eval [string map [list FUNC $aarch64_ext] {
         proc check_effective_target_aarch64_asm_FUNC_ok { } {
           if { [istarget aarch64*-*-*] } {
author	Richard Sandiford <richard.sandiford@arm.com>
	Thu, 30 Jan 2020 15:46:28 +0000 (15:46 +0000)
committer	Richard Sandiford <richard.sandiford@arm.com>
	Fri, 31 Jan 2020 17:40:40 +0000 (17:40 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64-sve-builtins-base.cc		patch \| blob \| history
gcc/config/aarch64/aarch64-sve-builtins-base.def		patch \| blob \| history
gcc/config/aarch64/aarch64-sve-builtins-base.h		patch \| blob \| history
gcc/config/aarch64/aarch64-sve-builtins-shapes.cc		patch \| blob \| history
gcc/config/aarch64/aarch64-sve-builtins-shapes.h		patch \| blob \| history
gcc/config/aarch64/aarch64-sve-builtins-sve2.cc		patch \| blob \| history
gcc/config/aarch64/aarch64-sve-builtins-sve2.h		patch \| blob \| history
gcc/config/aarch64/aarch64-sve-builtins.cc		patch \| blob \| history
gcc/config/aarch64/aarch64-sve.md		patch \| blob \| history
gcc/config/aarch64/aarch64-sve2.md		patch \| blob \| history
gcc/config/aarch64/aarch64.h		patch \| blob \| history
gcc/config/aarch64/iterators.md		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_f32.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_bf16.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/lib/target-supports.exp		patch \| blob \| history