This adds support for 5 new builtins.
gcc/ChangeLog:
* config/s390/s390-builtin-types.def (BT_FN_V4SF_V8HI_UINT): New
builtin signature.
(BT_FN_V8HI_V8HI_UINT): Likewise.
(BT_FN_V8HI_V4SF_V4SF_UINT): Likewise.
* config/s390/s390-builtins.def (B_NNPA): New macro definition.
(s390_vclfnhs, s390_vclfnls, s390_vcrnfs, s390_vcfn, s390_vcnf):
New builtin definitions.
* config/s390/s390-c.c (s390_cpu_cpp_builtins_internal): Bump
vector extension version.
* config/s390/s390.c (s390_expand_builtin): Check if builtins are
available with current -march level.
* config/s390/s390.md (UNSPEC_NNPA_VCLFNHS_V8HI)
(UNSPEC_NNPA_VCLFNLS_V8HI, UNSPEC_NNPA_VCRNFS_V8HI)
(UNSPEC_NNPA_VCFN_V8HI, UNSPEC_NNPA_VCNF_V8HI): New constants.
* config/s390/vecintrin.h (vec_extend_to_fp32_hi): New macro.
(vec_extend_to_fp32_lo): Likewise.
(vec_round_from_fp32): Likewise.
(vec_convert_to_fp16): Likewise.
(vec_convert_from_fp16): Likewise.
* config/s390/vx-builtins.md (vclfnhs_v8hi): New insn pattern.
(vclfnls_v8hi): Likewise.
(vcrnfs_v8hi): Likewise.
(vcfn_v8hi): Likewise.
(vcnf_v8hi): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/s390/zvector/vec-nnpa-fp16-convert.c: New test.
* gcc.target/s390/zvector/vec-nnpa-fp32-convert-1.c: New test.
* gcc.target/s390/zvector/vec_convert_from_fp16.c: New test.
* gcc.target/s390/zvector/vec_convert_to_fp16.c: New test.
* gcc.target/s390/zvector/vec_extend_to_fp32_hi.c: New test.
* gcc.target/s390/zvector/vec_extend_to_fp32_lo.c: New test.
* gcc.target/s390/zvector/vec_round_from_fp32.c: New test.
DEF_FN_TYPE_2 (BT_FN_V4SF_FLT_INT, BT_V4SF, BT_FLT, BT_INT)
DEF_FN_TYPE_2 (BT_FN_V4SF_V4SF_UCHAR, BT_V4SF, BT_V4SF, BT_UCHAR)
DEF_FN_TYPE_2 (BT_FN_V4SF_V4SF_V4SF, BT_V4SF, BT_V4SF, BT_V4SF)
+DEF_FN_TYPE_2 (BT_FN_V4SF_V8HI_UINT, BT_V4SF, BT_V8HI, BT_UINT)
DEF_FN_TYPE_2 (BT_FN_V4SI_BV4SI_V4SI, BT_V4SI, BT_BV4SI, BT_V4SI)
DEF_FN_TYPE_2 (BT_FN_V4SI_INT_VOIDCONSTPTR, BT_V4SI, BT_INT, BT_VOIDCONSTPTR)
DEF_FN_TYPE_2 (BT_FN_V4SI_UV4SI_UV4SI, BT_V4SI, BT_UV4SI, BT_UV4SI)
DEF_FN_TYPE_2 (BT_FN_V8HI_UV8HI_UV8HI, BT_V8HI, BT_UV8HI, BT_UV8HI)
DEF_FN_TYPE_2 (BT_FN_V8HI_V16QI_V16QI, BT_V8HI, BT_V16QI, BT_V16QI)
DEF_FN_TYPE_2 (BT_FN_V8HI_V4SI_V4SI, BT_V8HI, BT_V4SI, BT_V4SI)
+DEF_FN_TYPE_2 (BT_FN_V8HI_V8HI_UINT, BT_V8HI, BT_V8HI, BT_UINT)
DEF_FN_TYPE_2 (BT_FN_V8HI_V8HI_V8HI, BT_V8HI, BT_V8HI, BT_V8HI)
DEF_FN_TYPE_2 (BT_FN_VOID_UINT64PTR_UINT64, BT_VOID, BT_UINT64PTR, BT_UINT64)
DEF_FN_TYPE_2 (BT_FN_VOID_V2DF_FLTPTR, BT_VOID, BT_V2DF, BT_FLTPTR)
DEF_FN_TYPE_3 (BT_FN_V4SI_V8HI_V8HI_V4SI, BT_V4SI, BT_V8HI, BT_V8HI, BT_V4SI)
DEF_FN_TYPE_3 (BT_FN_V8HI_UV8HI_UV8HI_INTPTR, BT_V8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR)
DEF_FN_TYPE_3 (BT_FN_V8HI_V16QI_V16QI_V8HI, BT_V8HI, BT_V16QI, BT_V16QI, BT_V8HI)
+DEF_FN_TYPE_3 (BT_FN_V8HI_V4SF_V4SF_UINT, BT_V8HI, BT_V4SF, BT_V4SF, BT_UINT)
DEF_FN_TYPE_3 (BT_FN_V8HI_V4SI_V4SI_INTPTR, BT_V8HI, BT_V4SI, BT_V4SI, BT_INTPTR)
DEF_FN_TYPE_3 (BT_FN_V8HI_V8HI_V8HI_INTPTR, BT_V8HI, BT_V8HI, BT_V8HI, BT_INTPTR)
DEF_FN_TYPE_3 (BT_FN_V8HI_V8HI_V8HI_V8HI, BT_V8HI, BT_V8HI, BT_V8HI, BT_V8HI)
#undef B_VXE
#undef B_VXE2
#undef B_DEP
+#undef B_NNPA
#undef BFLAGS_MASK_INIT
#define BFLAGS_MASK_INIT (B_INT)
#define B_VXE (1 << 3) /* Builtins requiring the z14 vector extensions. */
#define B_VXE2 (1 << 4) /* Builtins requiring the z15 vector extensions. */
#define B_DEP (1 << 5) /* Builtin has been deprecated and a warning should be issued. */
+#define B_NNPA (1 << 6) /* Builtins requiring the NNPA Facility. */
/* B_DEF defines a standard (not overloaded) builtin
B_DEF (<builtin name>, <RTL expander name>, <function attributes>, <builtin flags>, <operand flags, see above>, <fntype>)
B_DEF (s390_vstrszb, vstrszv16qi, 0, B_VXE2, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INTPTR)
B_DEF (s390_vstrszh, vstrszv8hi, 0, B_VXE2, 0, BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INTPTR)
B_DEF (s390_vstrszf, vstrszv4si, 0, B_VXE2, 0, BT_FN_UV4SI_UV4SI_UV4SI_UV8HI_INTPTR)
+
+/* arch 14 builtins */
+
+B_DEF (s390_vclfnhs, vclfnhs_v8hi, 0, B_NNPA, O3_U4, BT_FN_V4SF_V8HI_UINT)
+B_DEF (s390_vclfnls, vclfnls_v8hi, 0, B_NNPA, O3_U4, BT_FN_V4SF_V8HI_UINT)
+
+B_DEF (s390_vcrnfs, vcrnfs_v8hi, 0, B_NNPA, O4_U4, BT_FN_V8HI_V4SF_V4SF_UINT)
+
+B_DEF (s390_vcfn, vcfn_v8hi, 0, B_NNPA, O3_U4, BT_FN_V8HI_V8HI_UINT)
+B_DEF (s390_vcnf, vcnf_v8hi, 0, B_NNPA, O3_U4, BT_FN_V8HI_V8HI_UINT)
s390_def_or_undef_macro (pfile, target_flag_set_p (MASK_OPT_VX), old_opts,
opts, "__VX__", "__VX__");
s390_def_or_undef_macro (pfile, target_flag_set_p (MASK_ZVECTOR), old_opts,
- opts, "__VEC__=10303", "__VEC__");
+ opts, "__VEC__=10304", "__VEC__");
s390_def_or_undef_macro (pfile, target_flag_set_p (MASK_ZVECTOR), old_opts,
opts, "__vector=__attribute__((vector_size(16)))",
"__vector__");
error ("Builtin %qF requires z15 or higher.", fndecl);
return const0_rtx;
}
+
+ if ((bflags & B_NNPA) && !TARGET_NNPA)
+ {
+ error ("Builtin %qF requires arch14 or higher.", fndecl);
+ return const0_rtx;
+ }
}
if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
&& fcode < S390_ALL_BUILTIN_MAX)
UNSPEC_VEC_VFMAX
UNSPEC_VEC_ELTSWAP
+
+ UNSPEC_NNPA_VCLFNHS_V8HI
+ UNSPEC_NNPA_VCLFNLS_V8HI
+ UNSPEC_NNPA_VCRNFS_V8HI
+
+ UNSPEC_NNPA_VCFN_V8HI
+ UNSPEC_NNPA_VCNF_V8HI
])
;;
#define vec_vsterg vec_vlerh
#define vec_vsterf_flt vec_vlerf_flt
#define vec_vsterg_dbl vec_vlerg_dbl
+
+#define vec_extend_to_fp32_hi __builtin_s390_vclfnhs
+#define vec_extend_to_fp32_lo __builtin_s390_vclfnls
+#define vec_round_from_fp32 __builtin_s390_vcrnfs
+#define vec_convert_to_fp16 __builtin_s390_vcfn
+#define vec_convert_from_fp16 __builtin_s390_vcnf
#define vec_gather_element __builtin_s390_vec_gather_element
#define vec_xl __builtin_s390_vec_xl
#define vec_xld2 __builtin_s390_vec_xld2
"TARGET_VXE2 && UINTVAL (operands[2]) < GET_MODE_NUNITS (<V_HW_HSD:MODE>mode)"
"vstebr<bhfgq>\t%v1,%0,%2"
[(set_attr "op_type" "VRX")])
+
+
+;;
+;; NNPA Facility
+;;
+
+(define_insn "vclfnhs_v8hi"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (unspec:V4SF [(vec_select:V4HI
+ (match_operand:V8HI 1 "register_operand" "v")
+ (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))
+ (match_operand:QI 2 "const_mask_operand" "C")]
+ UNSPEC_NNPA_VCLFNHS_V8HI))]
+ "TARGET_NNPA"
+ "vclfnh\t%v0,%v1,2,%2"
+ [(set_attr "op_type" "VRR")])
+
+(define_insn "vclfnls_v8hi"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (unspec:V4SF [(vec_select:V4HI
+ (match_operand:V8HI 1 "register_operand" "v")
+ (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7)]))
+ (match_operand:QI 2 "const_mask_operand" "C")]
+ UNSPEC_NNPA_VCLFNLS_V8HI))]
+ "TARGET_NNPA"
+ "vclfnl\t%v0,%v1,2,%2"
+ [(set_attr "op_type" "VRR")])
+
+(define_insn "vcrnfs_v8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
+ (unspec:V8HI [(match_operand:V4SF 1 "register_operand" "v")
+ (match_operand:V4SF 2 "register_operand" "v")
+ (match_operand:QI 3 "const_mask_operand" "C")]
+ UNSPEC_NNPA_VCRNFS_V8HI))]
+ "TARGET_NNPA"
+ "vcrnf\t%v0,%v1,%v2,%3,2"
+ [(set_attr "op_type" "VRR")])
+
+(define_insn "vcfn_v8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
+ (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+ (match_operand:QI 2 "const_mask_operand" "C")]
+ UNSPEC_NNPA_VCFN_V8HI))]
+ "TARGET_NNPA"
+ "vcfn\t%v0,%v1,1,%2"
+ [(set_attr "op_type" "VRR")])
+
+(define_insn "vcnf_v8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
+ (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+ (match_operand:QI 2 "const_mask_operand" "C")]
+ UNSPEC_NNPA_VCNF_V8HI))]
+ "TARGET_NNPA"
+ "vcnf\t%v0,%v1,%2,1"
+ [(set_attr "op_type" "VRR")])
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target s390_nnpa } */
+/* { dg-options "-O3 -mzarch -march=arch14 -mzvector --save-temps" } */
+
+#include <vecintrin.h>
+
+int
+main ()
+{
+ vector float fp1 = (vector float){ 1.0f, 2.0f, 3.0f, 4.0f };
+ vector float fp2 = (vector float){ 5.0f, 6.0f, 7.0f, 8.0f };
+
+ vector short int tmp1 = vec_round_from_fp32 (fp1, fp2, 0);
+
+ vector short int tmp2 = vec_convert_to_fp16 (tmp1, 0);
+ vector short int tmp3 = vec_convert_from_fp16 (tmp2, 0);
+
+ vector float fp1_ret = vec_extend_to_fp32_hi (tmp3, 0);
+ vector float fp2_ret = vec_extend_to_fp32_lo (tmp3, 0);
+
+ if (vec_any_ne (fp1, fp1_ret))
+ __builtin_abort ();
+
+ if (vec_any_ne (fp2, fp2_ret))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "vcrnf\t" 1 } } */
+/* { dg-final { scan-assembler-times "vcfn\t" 1 } } */
+/* { dg-final { scan-assembler-times "vcnf\t" 1 } } */
+/* { dg-final { scan-assembler-times "vclfnh\t" 1 } } */
+/* { dg-final { scan-assembler-times "vclfnl\t" 1 } } */
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target s390_nnpa } */
+/* { dg-options "-O3 -mzarch -march=arch14 -mzvector --save-temps" } */
+
+#include <vecintrin.h>
+
+int
+main ()
+{
+ vector float fp1 = (vector float){ 1.0f, 2.0f, 3.0f, 4.0f };
+ vector float fp2 = (vector float){ 5.0f, 6.0f, 7.0f, 8.0f };
+ vector short int conv = vec_round_from_fp32 (fp1, fp2, 0);
+ vector float fp1_ret = vec_extend_to_fp32_hi (conv, 0);
+ vector float fp2_ret = vec_extend_to_fp32_lo (conv, 0);
+
+ if (vec_any_ne (fp1, fp1_ret))
+ __builtin_abort ();
+
+ if (vec_any_ne (fp2, fp2_ret))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "vcrnf\t" 1 } } */
+/* { dg-final { scan-assembler-times "vclfnh\t" 1 } } */
+/* { dg-final { scan-assembler-times "vclfnl\t" 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=arch14 -mzvector" } */
+
+#include <vecintrin.h>
+
+vector short int
+test_vec_convert_from_fp16 (vector short int a)
+{
+ return vec_convert_from_fp16 (a, 0);
+}
+
+/* { dg-final { scan-assembler-times "vcnf\t" 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=arch14 -mzvector" } */
+
+#include <vecintrin.h>
+
+vector short int
+test_vec_convert_to_fp16 (vector short int a)
+{
+ return vec_convert_to_fp16 (a, 0);
+}
+
+/* { dg-final { scan-assembler-times "vcfn\t" 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=arch14 -mzvector" } */
+
+#include <vecintrin.h>
+
+vector float
+test_vec_extend_to_fp32_hi (vector short int a)
+{
+ return vec_extend_to_fp32_hi (a, 0);
+}
+
+/* { dg-final { scan-assembler-times "vclfnh\t" 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=arch14 -mzvector" } */
+
+#include <vecintrin.h>
+
+vector float
+test_vec_extend_to_fp32_lo (vector short int a)
+{
+ return vec_extend_to_fp32_lo (a, 0);
+}
+
+/* { dg-final { scan-assembler-times "vclfnl\t" 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=arch14 -mzvector" } */
+
+#include <vecintrin.h>
+
+vector short int
+test_vec_round_from_fp32 (vector float hi, vector float lo)
+{
+ return vec_round_from_fp32 (hi, lo, 0);
+}
+
+/* { dg-final { scan-assembler-times "vcrnf\t" 1 } } */