From 29e6733c20c98fe73c79ca9cac2dd758f3b3d67e Mon Sep 17 00:00:00 2001 From: Michael Meissner Date: Thu, 30 Jul 2009 20:48:17 +0000 Subject: [PATCH] Add patch 5/6 for full power7/VSX support Co-Authored-By: Pat Haugen Co-Authored-By: Revital Eres From-SVN: r150271 --- gcc/ChangeLog | 200 +++ gcc/config/rs6000/altivec.h | 59 +- gcc/config/rs6000/altivec.md | 67 +- gcc/config/rs6000/power7.md | 318 +++++ gcc/config/rs6000/ppc-asm.h | 139 +- gcc/config/rs6000/predicates.md | 10 + gcc/config/rs6000/rs6000-c.c | 242 +++- gcc/config/rs6000/rs6000.c | 676 +++++++++- gcc/config/rs6000/rs6000.h | 11 + gcc/config/rs6000/rs6000.md | 233 +++- gcc/config/rs6000/rs6000.opt | 4 + gcc/config/rs6000/t-rs6000 | 2 + gcc/config/rs6000/vector.md | 388 ++++-- gcc/config/rs6000/vsx.md | 1339 ++++++++++++++++++++ gcc/doc/extend.texi | 105 +- gcc/doc/invoke.texi | 24 +- gcc/doc/md.texi | 17 +- gcc/testsuite/ChangeLog | 50 + gcc/testsuite/gcc.dg/optimize-bswapdi-1.c | 2 +- gcc/testsuite/gcc.dg/vmx/vmx.exp | 2 +- gcc/testsuite/gcc.target/powerpc/altivec-32.c | 59 + gcc/testsuite/gcc.target/powerpc/altivec-6.c | 4 +- gcc/testsuite/gcc.target/powerpc/bswap-run.c | 102 ++ gcc/testsuite/gcc.target/powerpc/bswap16.c | 8 + gcc/testsuite/gcc.target/powerpc/bswap32.c | 8 + gcc/testsuite/gcc.target/powerpc/bswap64-1.c | 9 + gcc/testsuite/gcc.target/powerpc/bswap64-2.c | 10 + gcc/testsuite/gcc.target/powerpc/bswap64-3.c | 10 + .../gcc.target/powerpc/optimize-bswapdi-2.c | 36 + .../gcc.target/powerpc/optimize-bswapdi-3.c | 36 + .../gcc.target/powerpc/optimize-bswapsi-2.c | 55 + gcc/testsuite/gcc.target/powerpc/popcount-2.c | 9 + gcc/testsuite/gcc.target/powerpc/popcount-3.c | 9 + gcc/testsuite/gcc.target/powerpc/pr39457.c | 56 + gcc/testsuite/gcc.target/powerpc/vsx-builtin-1.c | 38 + gcc/testsuite/gcc.target/powerpc/vsx-builtin-2.c | 38 + gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c | 212 ++++ gcc/testsuite/gcc.target/powerpc/vsx-builtin-4.c | 142 +++ gcc/testsuite/gcc.target/powerpc/vsx-builtin-5.c | 14 + gcc/testsuite/gcc.target/powerpc/vsx-builtin-6.c | 146 +++ gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c | 150 +++ gcc/testsuite/gcc.target/powerpc/vsx-vector-1.c | 152 +++ gcc/testsuite/gcc.target/powerpc/vsx-vector-2.c | 152 +++ gcc/testsuite/gcc.target/powerpc/vsx-vector-3.c | 48 + gcc/testsuite/gcc.target/powerpc/vsx-vector-4.c | 48 + gcc/testsuite/gcc.target/powerpc/vsx-vector-5.c | 392 ++++++ gcc/testsuite/gcc.target/powerpc/vsx-vector-6.c | 81 ++ gcc/testsuite/lib/target-supports.exp | 60 +- 48 files changed, 5733 insertions(+), 239 deletions(-) create mode 100644 gcc/config/rs6000/power7.md create mode 100644 gcc/config/rs6000/vsx.md create mode 100644 gcc/testsuite/gcc.target/powerpc/altivec-32.c create mode 100644 gcc/testsuite/gcc.target/powerpc/bswap-run.c create mode 100644 gcc/testsuite/gcc.target/powerpc/bswap16.c create mode 100644 gcc/testsuite/gcc.target/powerpc/bswap32.c create mode 100644 gcc/testsuite/gcc.target/powerpc/bswap64-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/bswap64-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/bswap64-3.c create mode 100644 gcc/testsuite/gcc.target/powerpc/optimize-bswapdi-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/optimize-bswapdi-3.c create mode 100644 gcc/testsuite/gcc.target/powerpc/optimize-bswapsi-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/popcount-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/popcount-3.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr39457.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-builtin-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-builtin-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-builtin-4.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-builtin-5.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-builtin-6.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-vector-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-vector-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-vector-3.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-vector-4.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-vector-5.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-vector-6.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4784578..4358c84 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,203 @@ +2009-07-30 Michael Meissner + Pat Haugen + Revital Eres + + * config/rs6000/vector.md (VEC_F): Add VSX support. + (VEC_A): Ditto. + (VEC_N): Ditto. + (mov): Ditto. + (vector_load_): Ditto. + (vector_store_): Ditto. + (vector GPR move split): Ditto. + (vec_reload_and_plus_): Ditto. + (vec_reload_and_reg_): Ditto. + (add3): Ditto. + (sub3): Ditto. + (mul3): Ditto. + (neg2): Ditto. + (abs2): Ditto. + (smin3): Ditto. + (smax3): Ditto. + (vector_eq): Ditto. + (vector_gt): Ditto. + (vector_ge): Ditto. + (vector_gtu): Ditto. + (vector_select__uns): Ditto. + (vector_eq__p): Ditto. + (vector_gt__p): Ditto. + (vector_ge__p): Ditto. + (vector_gtu__p): Ditto. + (cr6_test_for_zero): Ditto. + (cr6_test_for_zero_reverse): Ditto. + (cr6_test_for_lt): Ditto. + (cr6_test_for_lt_reverse): Ditto. + (xor3): Ditto. + (ior3): Ditto. + (and3): Ditto. + (one_cmpl2): Ditto. + (nor2): Ditto. + (andc2): Ditto. + (float2): Ditto. + (unsigned_float2): Ditto. + (fix_trunc2): Ditto. + (fixuns_trunc2): Ditto. + (vec_init): + (vec_set): Ditto. + (vec_extract): Ditto. + (vec_interleave_highv4sf): Ditto. + (vec_interleave_lowv4sf): Ditto. + (vec_realign_load_): Ditto. + (vec_shl_): Ditto. + (vec_shr_): Ditto. + (div3): New patterns for VSX. + (vec_interleave_highv2df): Ditto. + (vec_interleave_lowv2df): Ditto. + (vec_pack_trunc_v2df): Ditto. + (vec_pack_sfix_trunc_v2df): Ditto. + (vec_pack_ufix_trunc_v2df): Ditto. + (vec_unpacks_hi_v4sf): Ditto. + (vec_unpacks_lo_v4sf): Ditto. + (vec_unpacks_float_hi_v4si): Ditto. + (vec_unpacku_float_lo_v4si): Ditto. + (vec_unpacku_float_hi_v4si): Ditto. + (vec_unpacks_float_lo_v4si): Ditto. + (movmisalign): Ditto. + (vector_ceil2): New patterns for vectorizing math library. + (vector_floor2): Ditto. + (vector_btrunc2): Ditto. + (vector_copysign3): Ditto. + + * config/rs6000/predicates.md (easy_vector_constant_msb): New + predicate for setting the high bit in each word, used for + copysign. + + * config/rs6000/ppc-asm.h (f19): Whitespace. + (f32-f63): Define if VSX. + (v0-v31): Define if Altivec. + (vs0-vs63): Define if VSX. + + * config/rs6000/t-rs6000 (MD_INCLUDES): Add power7.md and vsx.md. + + * config/rs6000/power7.md: New file, provide tuning parameters for + -mcpu=power7. + + * config/rs6000/rs6000-c.c (rs6000_macro_to_expand): Add VSX + support. + (rs6000_cpu_cpp_builtins): Ditto. + (altivec_overloaded_builtins): Ditto. + (altivec_resolve_overloaded_builtin): Ditto. + + * config/rs6000/rs6000.opt (-mno-vectorize-builtins): Add new + debug switch to disable vectorizing simple math builtin + functions. + + * config/rs6000/rs6000.c (rs6000_builtin_vectorized_function): + Vectorize simple math builtin functions. + (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Define target + hook to vectorize math builtins. + (rs6000_override_options): Enable -mvsx on -mcpu=power7. + (rs6000_builtin_conversion): Add VSX/power7 support. + (rs6000_builtin_vec_perm): Ditto. + (vsplits_constant): Add support for loading up a vector constant + with just the high bit set in each part. + (rs6000_expand_vector_init): Add VSX/power7 support. + (rs6000_expand_vector_set): Ditto. + (rs6000_expand_vector_extract): Ditto. + (rs6000_emit_move): Ditto. + (bdesc_3arg): Ditto. + (bdesc_2arg): Ditto. + (bdesc_1arg): Ditto. + (rs6000_expand_ternop_builtin): Ditto. + (altivec_expand_builtin): Ditto. + (rs6000_expand_unop_builtin): Ditto. + (rs6000_init_builtins): Ditto. + (altivec_init_builtins): Ditto. + (builtin_function_type): Ditto. + (rs6000_common_init_builtins): Ditto. + (rs6000_handle_altivec_attribute); Ditto. + (rs6000_mangle_type): Ditto. + (rs6000_vector_mode_supported_p): Ditto. + (rs6000_mode_dependent_address): Altivec addresses with AND -16 + are mode dependent. + + * config/rs6000/vsx.md: New file for VSX support. + + * config/rs6000/rs6000.h (EASY_VECTOR_MSB): New macro for + identifing values with just the most significant bit set. + (enum rs6000_builtins): Add builtins for VSX. Add simple math + vectorized builtins. + + * config/rs6000/altivec.md (UNSPEC_VRFIP): Delete. + (UNSPEC_VRFIM): Delete. + (splitter for loading up vector with most significant bit): New + splitter for vectorizing copysign. + (altivec_vrfiz): Rename from altivec_fturncv4sf2. Add support for + vectorizing simple math functions. + (altivec_vrfip): Add support for vectorizing simple math + functions. + (altivec_vrfim): Ditto. + (altivec_copysign_v4sf3): New insn for Altivec copysign support. + + * config/rs6000/rs6000.md (UNSPEC_BPERM): New constant. + (power7.md, vsx.md): Include for power7 support. + (copysigndf3): Use VSX instructions if -mvsx. + (negdf2_fpr): Ditto. + (absdf2_fpr): Ditto. + (nabsdf2_fpr): Ditto. + (adddf3_fpr): Ditto. + (subdf3_fpr): Ditto. + (muldf3_fpr): Ditto. + (divdf3_fpr): Ditto. + (fix_truncdfdi2_fpr): Ditto. + (cmpdf_internal1): Ditto. + (fred, fred_fpr): Convert into expander/insn to add VSX support. + (btruncdf2, btruncdf2_fpr): Ditto. + (ceildf2, ceildf2_fpr): Ditto. + (floordf2, floordf2_fpr): Ditto. + (floatdidf2, floatdidf2_fpr): Ditto. + (fmadddf4_fpr): Name insn. Use VSX instructions if -mvsx. + (fmsubdf4_fpr): Ditto. + (fnmadddf4_fpr_1): Ditto. + (fnmadddf4_fpr_2): Ditto. + (fnmsubdf4_fpr_1): Ditto. + (fnmsubdf4_fpr_2): Ditto. + (fixuns_truncdfdi2): Add expander for VSX support. + (fix_truncdfdi2): Ditto. + (fix_truncdfsi2): Ditto. + (ftruncdf2): Ditto. + (btruncsf2): Whitespace. + (movdf_hardfloat32): Add support for VSX registers. + (movdf_softfloat32): Ditto. + (movdf_hardfloat64): Ditto. + (movdf_hardfloat64_mfpgpr): Ditto. + (movdf_softfloat64): Ditto. + (movti splitters): Add check for vector registers supporting + TImode in the future. + (bpermd): Add power7 bpermd instruction. + + * config/rs6000/altivec.h (vec_div): Define if VSX. + (vec_mul): Ditto. + (vec_msub): Ditto. + (vec_nmadd): Ditto. + (vec_nearbyint): Ditto. + (vec_rint): Ditto. + (vec_sqrt): Ditto. + (all predicates): Use the generic builtin function, and not the + V4SF specific function so that the predicates will work with + VSX's V2DF. + (vec_all_*): Ditto. + (vec_any_*): Ditto. + + * doc/extend.texi (PowerPC Altivec/VSX Built-in Functions): + Document new VSX functions and types. + + * doc/invoke.texi (PowerPc options): Document -mpopcntd, -mvsx + switches. + + * doc/md.texi (PowerPC constraints): Document "wd", "wf", "ws", + "wa", and "j" constraints. Modify "v" to talk about Altivec + instead of just vector. + 2009-07-30 Andrew MacLeod PR debug/26475 diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 7b39799..bc4f30f 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -306,6 +306,17 @@ #define vec_splats __builtin_vec_splats #define vec_promote __builtin_vec_promote +#ifdef __VSX__ +/* VSX additions */ +#define vec_div __builtin_vec_div +#define vec_mul __builtin_vec_mul +#define vec_msub __builtin_vec_msub +#define vec_nmadd __builtin_vec_nmadd +#define vec_nearbyint __builtin_vec_nearbyint +#define vec_rint __builtin_vec_rint +#define vec_sqrt __builtin_vec_sqrt +#endif + /* Predicates. For C++, we use templates in order to allow non-parenthesized arguments. For C, instead, we use macros since non-parenthesized arguments were @@ -356,14 +367,14 @@ __altivec_scalar_pred(vec_any_out, __builtin_altivec_vcmpbfp_p (__CR6_EQ_REV, a1, a2)) __altivec_unary_pred(vec_all_nan, - __builtin_altivec_vcmpeqfp_p (__CR6_EQ, a1, a1)) + __builtin_altivec_vcmpeq_p (__CR6_EQ, a1, a1)) __altivec_unary_pred(vec_any_nan, - __builtin_altivec_vcmpeqfp_p (__CR6_LT_REV, a1, a1)) + __builtin_altivec_vcmpeq_p (__CR6_LT_REV, a1, a1)) __altivec_unary_pred(vec_all_numeric, - __builtin_altivec_vcmpeqfp_p (__CR6_LT, a1, a1)) + __builtin_altivec_vcmpeq_p (__CR6_LT, a1, a1)) __altivec_unary_pred(vec_any_numeric, - __builtin_altivec_vcmpeqfp_p (__CR6_EQ_REV, a1, a1)) + __builtin_altivec_vcmpeq_p (__CR6_EQ_REV, a1, a1)) __altivec_scalar_pred(vec_all_eq, __builtin_vec_vcmpeq_p (__CR6_LT, a1, a2)) @@ -384,13 +395,13 @@ __altivec_scalar_pred(vec_any_lt, __builtin_vec_vcmpgt_p (__CR6_EQ_REV, a2, a1)) __altivec_scalar_pred(vec_all_ngt, - __builtin_altivec_vcmpgtfp_p (__CR6_EQ, a1, a2)) + __builtin_altivec_vcmpgt_p (__CR6_EQ, a1, a2)) __altivec_scalar_pred(vec_all_nlt, - __builtin_altivec_vcmpgtfp_p (__CR6_EQ, a2, a1)) + __builtin_altivec_vcmpgt_p (__CR6_EQ, a2, a1)) __altivec_scalar_pred(vec_any_ngt, - __builtin_altivec_vcmpgtfp_p (__CR6_LT_REV, a1, a2)) + __builtin_altivec_vcmpgt_p (__CR6_LT_REV, a1, a2)) __altivec_scalar_pred(vec_any_nlt, - __builtin_altivec_vcmpgtfp_p (__CR6_LT_REV, a2, a1)) + __builtin_altivec_vcmpgt_p (__CR6_LT_REV, a2, a1)) /* __builtin_vec_vcmpge_p is vcmpgefp for floating-point vector types, while for integer types it is converted to __builtin_vec_vcmpgt_p, @@ -405,13 +416,13 @@ __altivec_scalar_pred(vec_any_ge, __builtin_vec_vcmpge_p (__CR6_EQ_REV, a1, a2)) __altivec_scalar_pred(vec_all_nge, - __builtin_altivec_vcmpgefp_p (__CR6_EQ, a1, a2)) + __builtin_altivec_vcmpge_p (__CR6_EQ, a1, a2)) __altivec_scalar_pred(vec_all_nle, - __builtin_altivec_vcmpgefp_p (__CR6_EQ, a2, a1)) + __builtin_altivec_vcmpge_p (__CR6_EQ, a2, a1)) __altivec_scalar_pred(vec_any_nge, - __builtin_altivec_vcmpgefp_p (__CR6_LT_REV, a1, a2)) + __builtin_altivec_vcmpge_p (__CR6_LT_REV, a1, a2)) __altivec_scalar_pred(vec_any_nle, - __builtin_altivec_vcmpgefp_p (__CR6_LT_REV, a2, a1)) + __builtin_altivec_vcmpge_p (__CR6_LT_REV, a2, a1)) #undef __altivec_scalar_pred #undef __altivec_unary_pred @@ -423,11 +434,11 @@ __altivec_scalar_pred(vec_any_nle, #define vec_all_in(a1, a2) __builtin_altivec_vcmpbfp_p (__CR6_EQ, (a1), (a2)) #define vec_any_out(a1, a2) __builtin_altivec_vcmpbfp_p (__CR6_EQ_REV, (a1), (a2)) -#define vec_all_nan(a1) __builtin_altivec_vcmpeqfp_p (__CR6_EQ, (a1), (a1)) -#define vec_any_nan(a1) __builtin_altivec_vcmpeqfp_p (__CR6_LT_REV, (a1), (a1)) +#define vec_all_nan(a1) __builtin_vec_vcmpeq_p (__CR6_EQ, (a1), (a1)) +#define vec_any_nan(a1) __builtin_vec_vcmpeq_p (__CR6_LT_REV, (a1), (a1)) -#define vec_all_numeric(a1) __builtin_altivec_vcmpeqfp_p (__CR6_LT, (a1), (a1)) -#define vec_any_numeric(a1) __builtin_altivec_vcmpeqfp_p (__CR6_EQ_REV, (a1), (a1)) +#define vec_all_numeric(a1) __builtin_vec_vcmpeq_p (__CR6_LT, (a1), (a1)) +#define vec_any_numeric(a1) __builtin_vec_vcmpeq_p (__CR6_EQ_REV, (a1), (a1)) #define vec_all_eq(a1, a2) __builtin_vec_vcmpeq_p (__CR6_LT, (a1), (a2)) #define vec_all_ne(a1, a2) __builtin_vec_vcmpeq_p (__CR6_EQ, (a1), (a2)) @@ -439,10 +450,10 @@ __altivec_scalar_pred(vec_any_nle, #define vec_any_gt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_EQ_REV, (a1), (a2)) #define vec_any_lt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_EQ_REV, (a2), (a1)) -#define vec_all_ngt(a1, a2) __builtin_altivec_vcmpgtfp_p (__CR6_EQ, (a1), (a2)) -#define vec_all_nlt(a1, a2) __builtin_altivec_vcmpgtfp_p (__CR6_EQ, (a2), (a1)) -#define vec_any_ngt(a1, a2) __builtin_altivec_vcmpgtfp_p (__CR6_LT_REV, (a1), (a2)) -#define vec_any_nlt(a1, a2) __builtin_altivec_vcmpgtfp_p (__CR6_LT_REV, (a2), (a1)) +#define vec_all_ngt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_EQ, (a1), (a2)) +#define vec_all_nlt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_EQ, (a2), (a1)) +#define vec_any_ngt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_LT_REV, (a1), (a2)) +#define vec_any_nlt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_LT_REV, (a2), (a1)) /* __builtin_vec_vcmpge_p is vcmpgefp for floating-point vector types, while for integer types it is converted to __builtin_vec_vcmpgt_p, @@ -452,10 +463,10 @@ __altivec_scalar_pred(vec_any_nle, #define vec_any_le(a1, a2) __builtin_vec_vcmpge_p (__CR6_EQ_REV, (a2), (a1)) #define vec_any_ge(a1, a2) __builtin_vec_vcmpge_p (__CR6_EQ_REV, (a1), (a2)) -#define vec_all_nge(a1, a2) __builtin_altivec_vcmpgefp_p (__CR6_EQ, (a1), (a2)) -#define vec_all_nle(a1, a2) __builtin_altivec_vcmpgefp_p (__CR6_EQ, (a2), (a1)) -#define vec_any_nge(a1, a2) __builtin_altivec_vcmpgefp_p (__CR6_LT_REV, (a1), (a2)) -#define vec_any_nle(a1, a2) __builtin_altivec_vcmpgefp_p (__CR6_LT_REV, (a2), (a1)) +#define vec_all_nge(a1, a2) __builtin_vec_vcmpge_p (__CR6_EQ, (a1), (a2)) +#define vec_all_nle(a1, a2) __builtin_vec_vcmpge_p (__CR6_EQ, (a2), (a1)) +#define vec_any_nge(a1, a2) __builtin_vec_vcmpge_p (__CR6_LT_REV, (a1), (a2)) +#define vec_any_nle(a1, a2) __builtin_vec_vcmpge_p (__CR6_LT_REV, (a2), (a1)) #endif /* These do not accept vectors, so they do not have a __builtin_vec_* diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 58af47c..53b1054 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -20,8 +20,8 @@ ;; . (define_constants - [(UNSPEC_VCMPBFP 50) ;; 51-62 deleted + [(UNSPEC_VCMPBFP 64) (UNSPEC_VMSUMU 65) (UNSPEC_VMSUMM 66) (UNSPEC_VMSUMSHM 68) @@ -66,9 +66,9 @@ (UNSPEC_VSUMSWS 135) (UNSPEC_VPERM 144) (UNSPEC_VPERM_UNS 145) - (UNSPEC_VRFIP 148) + ;; 148 deleted (UNSPEC_VRFIN 149) - (UNSPEC_VRFIM 150) + ;; 150 deleted (UNSPEC_VCFUX 151) (UNSPEC_VCFSX 152) (UNSPEC_VCTUXS 153) @@ -220,6 +220,35 @@ } [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,vecsimple,*")]) +;; Load up a vector with the most significant bit set by loading up -1 and +;; doing a shift left +(define_split + [(set (match_operand:VM 0 "altivec_register_operand" "") + (match_operand:VM 1 "easy_vector_constant_msb" ""))] + "VECTOR_UNIT_ALTIVEC_P (mode) && reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + enum machine_mode mode = GET_MODE (operands[0]); + rtvec v; + int i, num_elements; + + if (mode == V4SFmode) + { + mode = V4SImode; + dest = gen_lowpart (V4SImode, dest); + } + + num_elements = GET_MODE_NUNITS (mode); + v = rtvec_alloc (num_elements); + for (i = 0; i < num_elements; i++) + RTVEC_ELT (v, i) = constm1_rtx; + + emit_insn (gen_vec_initv4si (dest, gen_rtx_PARALLEL (mode, v))); + emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_ASHIFT (mode, dest, dest))); + DONE; +}) + (define_split [(set (match_operand:VM 0 "altivec_register_operand" "") (match_operand:VM 1 "easy_vector_constant_add_self" ""))] @@ -1310,7 +1339,7 @@ "vspltis %0,%1" [(set_attr "type" "vecperm")]) -(define_insn "*altivec_ftruncv4sf2" +(define_insn "*altivec_vrfiz" [(set (match_operand:V4SF 0 "register_operand" "=v") (fix:V4SF (match_operand:V4SF 1 "register_operand" "v")))] "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" @@ -1337,10 +1366,10 @@ "vperm %0,%1,%2,%3" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vrfip" +(define_insn "altivec_vrfip" ; ceil [(set (match_operand:V4SF 0 "register_operand" "=v") (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] - UNSPEC_VRFIP))] + UNSPEC_FRIP))] "TARGET_ALTIVEC" "vrfip %0,%1" [(set_attr "type" "vecfloat")]) @@ -1353,10 +1382,10 @@ "vrfin %0,%1" [(set_attr "type" "vecfloat")]) -(define_insn "altivec_vrfim" +(define_insn "*altivec_vrfim" ; floor [(set (match_operand:V4SF 0 "register_operand" "=v") (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] - UNSPEC_VRFIM))] + UNSPEC_FRIM))] "TARGET_ALTIVEC" "vrfim %0,%1" [(set_attr "type" "vecfloat")]) @@ -1431,6 +1460,28 @@ "vrefp %0,%1" [(set_attr "type" "vecfloat")]) +(define_expand "altivec_copysign_v4sf3" + [(use (match_operand:V4SF 0 "register_operand" "")) + (use (match_operand:V4SF 1 "register_operand" "")) + (use (match_operand:V4SF 2 "register_operand" ""))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + " +{ + rtx mask = gen_reg_rtx (V4SImode); + rtvec v = rtvec_alloc (4); + unsigned HOST_WIDE_INT mask_val = ((unsigned HOST_WIDE_INT)1) << 31; + + RTVEC_ELT (v, 0) = GEN_INT (mask_val); + RTVEC_ELT (v, 1) = GEN_INT (mask_val); + RTVEC_ELT (v, 2) = GEN_INT (mask_val); + RTVEC_ELT (v, 3) = GEN_INT (mask_val); + + emit_insn (gen_vec_initv4si (mask, gen_rtx_PARALLEL (V4SImode, v))); + emit_insn (gen_vector_select_v4sf (operands[0], operands[1], operands[2], + gen_lowpart (V4SFmode, mask))); + DONE; +}") + (define_insn "altivec_vsldoi_" [(set (match_operand:VM 0 "register_operand" "=v") (unspec:VM [(match_operand:VM 1 "register_operand" "v") diff --git a/gcc/config/rs6000/power7.md b/gcc/config/rs6000/power7.md new file mode 100644 index 0000000..3b6a95e --- /dev/null +++ b/gcc/config/rs6000/power7.md @@ -0,0 +1,318 @@ +;; Scheduling description for IBM POWER7 processor. +;; Copyright (C) 2009 Free Software Foundation, Inc. +;; +;; Contributed by Pat Haugen (pthaugen@us.ibm.com). + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "power7iu,power7lsu,power7vsu,power7misc") + +(define_cpu_unit "iu1_power7,iu2_power7" "power7iu") +(define_cpu_unit "lsu1_power7,lsu2_power7" "power7lsu") +(define_cpu_unit "vsu1_power7,vsu2_power7" "power7vsu") +(define_cpu_unit "bpu_power7,cru_power7" "power7misc") +(define_cpu_unit "du1_power7,du2_power7,du3_power7,du4_power7,du5_power7" + "power7misc") + + +(define_reservation "DU_power7" + "du1_power7|du2_power7|du3_power7|du4_power7") + +(define_reservation "DU2F_power7" + "du1_power7+du2_power7") + +(define_reservation "DU4_power7" + "du1_power7+du2_power7+du3_power7+du4_power7") + +(define_reservation "FXU_power7" + "iu1_power7|iu2_power7") + +(define_reservation "VSU_power7" + "vsu1_power7|vsu2_power7") + +(define_reservation "LSU_power7" + "lsu1_power7|lsu2_power7") + + +; Dispatch slots are allocated in order conforming to program order. +(absence_set "du1_power7" "du2_power7,du3_power7,du4_power7,du5_power7") +(absence_set "du2_power7" "du3_power7,du4_power7,du5_power7") +(absence_set "du3_power7" "du4_power7,du5_power7") +(absence_set "du4_power7" "du5_power7") + + +; LS Unit +(define_insn_reservation "power7-load" 2 + (and (eq_attr "type" "load") + (eq_attr "cpu" "power7")) + "DU_power7,LSU_power7") + +(define_insn_reservation "power7-load-ext" 3 + (and (eq_attr "type" "load_ext") + (eq_attr "cpu" "power7")) + "DU2F_power7,LSU_power7,FXU_power7") + +(define_insn_reservation "power7-load-update" 2 + (and (eq_attr "type" "load_u") + (eq_attr "cpu" "power7")) + "DU2F_power7,LSU_power7+FXU_power7") + +(define_insn_reservation "power7-load-update-indexed" 3 + (and (eq_attr "type" "load_ux") + (eq_attr "cpu" "power7")) + "DU4_power7,FXU_power7,LSU_power7+FXU_power7") + +(define_insn_reservation "power7-load-ext-update" 4 + (and (eq_attr "type" "load_ext_u") + (eq_attr "cpu" "power7")) + "DU2F_power7,LSU_power7+FXU_power7,FXU_power7") + +(define_insn_reservation "power7-load-ext-update-indexed" 4 + (and (eq_attr "type" "load_ext_ux") + (eq_attr "cpu" "power7")) + "DU4_power7,FXU_power7,LSU_power7+FXU_power7,FXU_power7") + +(define_insn_reservation "power7-fpload" 3 + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "power7")) + "DU_power7,LSU_power7") + +(define_insn_reservation "power7-fpload-update" 3 + (and (eq_attr "type" "fpload_u,fpload_ux") + (eq_attr "cpu" "power7")) + "DU2F_power7,LSU_power7+FXU_power7") + +(define_insn_reservation "power7-store" 6 ; store-forwarding latency + (and (eq_attr "type" "store") + (eq_attr "cpu" "power7")) + "DU_power7,LSU_power7+FXU_power7") + +(define_insn_reservation "power7-store-update" 6 + (and (eq_attr "type" "store_u") + (eq_attr "cpu" "power7")) + "DU2F_power7,LSU_power7+FXU_power7,FXU_power7") + +(define_insn_reservation "power7-store-update-indexed" 6 + (and (eq_attr "type" "store_ux") + (eq_attr "cpu" "power7")) + "DU4_power7,LSU_power7+FXU_power7,FXU_power7") + +(define_insn_reservation "power7-fpstore" 6 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "power7")) + "DU_power7,LSU_power7+VSU_power7") + +(define_insn_reservation "power7-fpstore-update" 6 + (and (eq_attr "type" "fpstore_u,fpstore_ux") + (eq_attr "cpu" "power7")) + "DU_power7,LSU_power7+VSU_power7+FXU_power7") + +(define_insn_reservation "power7-larx" 3 + (and (eq_attr "type" "load_l") + (eq_attr "cpu" "power7")) + "DU4_power7,LSU_power7") + +(define_insn_reservation "power7-stcx" 10 + (and (eq_attr "type" "store_c") + (eq_attr "cpu" "power7")) + "DU4_power7,LSU_power7") + +(define_insn_reservation "power7-vecload" 3 + (and (eq_attr "type" "vecload") + (eq_attr "cpu" "power7")) + "DU_power7,LSU_power7") + +(define_insn_reservation "power7-vecstore" 6 + (and (eq_attr "type" "vecstore") + (eq_attr "cpu" "power7")) + "DU_power7,LSU_power7+VSU_power7") + +(define_insn_reservation "power7-sync" 11 + (and (eq_attr "type" "sync") + (eq_attr "cpu" "power7")) + "DU4_power7,LSU_power7") + + +; FX Unit +(define_insn_reservation "power7-integer" 1 + (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\ + var_shift_rotate,exts") + (eq_attr "cpu" "power7")) + "DU_power7,FXU_power7") + +(define_insn_reservation "power7-cntlz" 2 + (and (eq_attr "type" "cntlz") + (eq_attr "cpu" "power7")) + "DU_power7,FXU_power7") + +(define_insn_reservation "power7-two" 2 + (and (eq_attr "type" "two") + (eq_attr "cpu" "power7")) + "DU_power7+DU_power7,FXU_power7,FXU_power7") + +(define_insn_reservation "power7-three" 3 + (and (eq_attr "type" "three") + (eq_attr "cpu" "power7")) + "DU_power7+DU_power7+DU_power7,FXU_power7,FXU_power7,FXU_power7") + +(define_insn_reservation "power7-cmp" 1 + (and (eq_attr "type" "cmp,fast_compare") + (eq_attr "cpu" "power7")) + "DU_power7,FXU_power7") + +(define_insn_reservation "power7-compare" 2 + (and (eq_attr "type" "compare,delayed_compare,var_delayed_compare") + (eq_attr "cpu" "power7")) + "DU2F_power7,FXU_power7,FXU_power7") + +(define_bypass 3 "power7-cmp,power7-compare" "power7-crlogical,power7-delayedcr") + +(define_insn_reservation "power7-mul" 4 + (and (eq_attr "type" "imul,imul2,imul3,lmul") + (eq_attr "cpu" "power7")) + "DU_power7,FXU_power7") + +(define_insn_reservation "power7-mul-compare" 5 + (and (eq_attr "type" "imul_compare,lmul_compare") + (eq_attr "cpu" "power7")) + "DU2F_power7,FXU_power7,nothing*3,FXU_power7") + +(define_insn_reservation "power7-idiv" 36 + (and (eq_attr "type" "idiv") + (eq_attr "cpu" "power7")) + "DU2F_power7,iu1_power7*36|iu2_power7*36") + +(define_insn_reservation "power7-ldiv" 68 + (and (eq_attr "type" "ldiv") + (eq_attr "cpu" "power7")) + "DU2F_power7,iu1_power7*68|iu2_power7*68") + +(define_insn_reservation "power7-isync" 1 ; + (and (eq_attr "type" "isync") + (eq_attr "cpu" "power7")) + "DU4_power7,FXU_power7") + + +; CR Unit +(define_insn_reservation "power7-mtjmpr" 4 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "power7")) + "du1_power7,FXU_power7") + +(define_insn_reservation "power7-mfjmpr" 5 + (and (eq_attr "type" "mfjmpr") + (eq_attr "cpu" "power7")) + "du1_power7,cru_power7+FXU_power7") + +(define_insn_reservation "power7-crlogical" 3 + (and (eq_attr "type" "cr_logical") + (eq_attr "cpu" "power7")) + "du1_power7,cru_power7") + +(define_insn_reservation "power7-delayedcr" 3 + (and (eq_attr "type" "delayed_cr") + (eq_attr "cpu" "power7")) + "du1_power7,cru_power7") + +(define_insn_reservation "power7-mfcr" 6 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "power7")) + "du1_power7,cru_power7") + +(define_insn_reservation "power7-mfcrf" 3 + (and (eq_attr "type" "mfcrf") + (eq_attr "cpu" "power7")) + "du1_power7,cru_power7") + +(define_insn_reservation "power7-mtcr" 3 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "power7")) + "DU4_power7,cru_power7+FXU_power7") + + +; BR Unit +; Branches take dispatch Slot 4. The presence_sets prevent other insn from +; grabbing previous dispatch slots once this is assigned. +(define_insn_reservation "power7-branch" 3 + (and (eq_attr "type" "jmpreg,branch") + (eq_attr "cpu" "power7")) + "(du5_power7\ + |du4_power7+du5_power7\ + |du3_power7+du4_power7+du5_power7\ + |du2_power7+du3_power7+du4_power7+du5_power7\ + |du1_power7+du2_power7+du3_power7+du4_power7+du5_power7),bpu_power7") + + +; VS Unit (includes FP/VSX/VMX/DFP) +(define_insn_reservation "power7-fp" 6 + (and (eq_attr "type" "fp,dmul") + (eq_attr "cpu" "power7")) + "DU_power7,VSU_power7") + +(define_bypass 8 "power7-fp" "power7-branch") + +(define_insn_reservation "power7-fpcompare" 4 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "power7")) + "DU_power7,VSU_power7") + +(define_insn_reservation "power7-sdiv" 26 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "power7")) + "DU_power7,VSU_power7") + +(define_insn_reservation "power7-ddiv" 32 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "power7")) + "DU_power7,VSU_power7") + +(define_insn_reservation "power7-sqrt" 31 + (and (eq_attr "type" "ssqrt") + (eq_attr "cpu" "power7")) + "DU_power7,VSU_power7") + +(define_insn_reservation "power7-dsqrt" 43 + (and (eq_attr "type" "dsqrt") + (eq_attr "cpu" "power7")) + "DU_power7,VSU_power7") + +(define_insn_reservation "power7-vecsimple" 2 + (and (eq_attr "type" "vecsimple") + (eq_attr "cpu" "power7")) + "du1_power7,VSU_power7") + +(define_insn_reservation "power7-veccmp" 7 + (and (eq_attr "type" "veccmp") + (eq_attr "cpu" "power7")) + "du1_power7,VSU_power7") + +(define_insn_reservation "power7-vecfloat" 7 + (and (eq_attr "type" "vecfloat") + (eq_attr "cpu" "power7")) + "du1_power7,VSU_power7") + +(define_bypass 6 "power7-vecfloat" "power7-vecfloat") + +(define_insn_reservation "power7-veccomplex" 7 + (and (eq_attr "type" "veccomplex") + (eq_attr "cpu" "power7")) + "du1_power7,VSU_power7") + +(define_insn_reservation "power7-vecperm" 3 + (and (eq_attr "type" "vecperm") + (eq_attr "cpu" "power7")) + "du2_power7,VSU_power7") diff --git a/gcc/config/rs6000/ppc-asm.h b/gcc/config/rs6000/ppc-asm.h index 147f109..c963eb9 100644 --- a/gcc/config/rs6000/ppc-asm.h +++ b/gcc/config/rs6000/ppc-asm.h @@ -87,7 +87,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define f16 16 #define f17 17 #define f18 18 -#define f19 19 +#define f19 19 #define f20 20 #define f21 21 #define f22 22 @@ -101,6 +101,143 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define f30 30 #define f31 31 +#ifdef __VSX__ +#define f32 32 +#define f33 33 +#define f34 34 +#define f35 35 +#define f36 36 +#define f37 37 +#define f38 38 +#define f39 39 +#define f40 40 +#define f41 41 +#define f42 42 +#define f43 43 +#define f44 44 +#define f45 45 +#define f46 46 +#define f47 47 +#define f48 48 +#define f49 49 +#define f50 30 +#define f51 51 +#define f52 52 +#define f53 53 +#define f54 54 +#define f55 55 +#define f56 56 +#define f57 57 +#define f58 58 +#define f59 59 +#define f60 60 +#define f61 61 +#define f62 62 +#define f63 63 +#endif + +#ifdef __ALTIVEC__ +#define v0 0 +#define v1 1 +#define v2 2 +#define v3 3 +#define v4 4 +#define v5 5 +#define v6 6 +#define v7 7 +#define v8 8 +#define v9 9 +#define v10 10 +#define v11 11 +#define v12 12 +#define v13 13 +#define v14 14 +#define v15 15 +#define v16 16 +#define v17 17 +#define v18 18 +#define v19 19 +#define v20 20 +#define v21 21 +#define v22 22 +#define v23 23 +#define v24 24 +#define v25 25 +#define v26 26 +#define v27 27 +#define v28 28 +#define v29 29 +#define v30 30 +#define v31 31 +#endif + +#ifdef __VSX__ +#define vs0 0 +#define vs1 1 +#define vs2 2 +#define vs3 3 +#define vs4 4 +#define vs5 5 +#define vs6 6 +#define vs7 7 +#define vs8 8 +#define vs9 9 +#define vs10 10 +#define vs11 11 +#define vs12 12 +#define vs13 13 +#define vs14 14 +#define vs15 15 +#define vs16 16 +#define vs17 17 +#define vs18 18 +#define vs19 19 +#define vs20 20 +#define vs21 21 +#define vs22 22 +#define vs23 23 +#define vs24 24 +#define vs25 25 +#define vs26 26 +#define vs27 27 +#define vs28 28 +#define vs29 29 +#define vs30 30 +#define vs31 31 +#define vs32 32 +#define vs33 33 +#define vs34 34 +#define vs35 35 +#define vs36 36 +#define vs37 37 +#define vs38 38 +#define vs39 39 +#define vs40 40 +#define vs41 41 +#define vs42 42 +#define vs43 43 +#define vs44 44 +#define vs45 45 +#define vs46 46 +#define vs47 47 +#define vs48 48 +#define vs49 49 +#define vs50 30 +#define vs51 51 +#define vs52 52 +#define vs53 53 +#define vs54 54 +#define vs55 55 +#define vs56 56 +#define vs57 57 +#define vs58 58 +#define vs59 59 +#define vs60 60 +#define vs61 61 +#define vs62 62 +#define vs63 63 +#endif + /* * Macros to glue together two tokens. */ diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 3e5c1a1..cf25cb7 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -377,6 +377,16 @@ return EASY_VECTOR_15_ADD_SELF (val); }) +;; Same as easy_vector_constant but only for EASY_VECTOR_MSB. +(define_predicate "easy_vector_constant_msb" + (and (match_code "const_vector") + (and (match_test "TARGET_ALTIVEC") + (match_test "easy_altivec_constant (op, mode)"))) +{ + HOST_WIDE_INT val = const_vector_elt_as_int (op, GET_MODE_NUNITS (mode) - 1); + return EASY_VECTOR_MSB (val, GET_MODE_INNER (mode)); +}) + ;; Return 1 if operand is constant zero (scalars and vectors). (define_predicate "zero_constant" (and (match_code "const_int,const_double,const_vector") diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index 3b3ba96..9435452 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -214,7 +214,8 @@ rs6000_macro_to_expand (cpp_reader *pfile, const cpp_token *tok) if (rid_code == RID_UNSIGNED || rid_code == RID_LONG || rid_code == RID_SHORT || rid_code == RID_SIGNED || rid_code == RID_INT || rid_code == RID_CHAR - || rid_code == RID_FLOAT) + || rid_code == RID_FLOAT + || (rid_code == RID_DOUBLE && TARGET_VSX)) { expand_this = C_CPP_HASHNODE (__vector_keyword); /* If the next keyword is bool or pixel, it @@ -329,7 +330,42 @@ rs6000_cpu_cpp_builtins (cpp_reader *pfile) if (TARGET_NO_LWSYNC) builtin_define ("__NO_LWSYNC__"); if (TARGET_VSX) - builtin_define ("__VSX__"); + { + builtin_define ("__VSX__"); + + /* For the VSX builtin functions identical to Altivec functions, just map + the altivec builtin into the vsx version (the altivec functions + generate VSX code if -mvsx). */ + builtin_define ("__builtin_vsx_xxland=__builtin_vec_and"); + builtin_define ("__builtin_vsx_xxlandc=__builtin_vec_andc"); + builtin_define ("__builtin_vsx_xxlnor=__builtin_vec_nor"); + builtin_define ("__builtin_vsx_xxlor=__builtin_vec_or"); + builtin_define ("__builtin_vsx_xxlxor=__builtin_vec_xor"); + builtin_define ("__builtin_vsx_xxsel=__builtin_vec_sel"); + builtin_define ("__builtin_vsx_vperm=__builtin_vec_perm"); + + /* Also map the a and m versions of the multiply/add instructions to the + builtin for people blindly going off the instruction manual. */ + builtin_define ("__builtin_vsx_xvmaddadp=__builtin_vsx_xvmadddp"); + builtin_define ("__builtin_vsx_xvmaddmdp=__builtin_vsx_xvmadddp"); + builtin_define ("__builtin_vsx_xvmaddasp=__builtin_vsx_xvmaddsp"); + builtin_define ("__builtin_vsx_xvmaddmsp=__builtin_vsx_xvmaddsp"); + builtin_define ("__builtin_vsx_xvmsubadp=__builtin_vsx_xvmsubdp"); + builtin_define ("__builtin_vsx_xvmsubmdp=__builtin_vsx_xvmsubdp"); + builtin_define ("__builtin_vsx_xvmsubasp=__builtin_vsx_xvmsubsp"); + builtin_define ("__builtin_vsx_xvmsubmsp=__builtin_vsx_xvmsubsp"); + builtin_define ("__builtin_vsx_xvnmaddadp=__builtin_vsx_xvnmadddp"); + builtin_define ("__builtin_vsx_xvnmaddmdp=__builtin_vsx_xvnmadddp"); + builtin_define ("__builtin_vsx_xvnmaddasp=__builtin_vsx_xvnmaddsp"); + builtin_define ("__builtin_vsx_xvnmaddmsp=__builtin_vsx_xvnmaddsp"); + builtin_define ("__builtin_vsx_xvnmsubadp=__builtin_vsx_xvnmsubdp"); + builtin_define ("__builtin_vsx_xvnmsubmdp=__builtin_vsx_xvnmsubdp"); + builtin_define ("__builtin_vsx_xvnmsubasp=__builtin_vsx_xvnmsubsp"); + builtin_define ("__builtin_vsx_xvnmsubmsp=__builtin_vsx_xvnmsubsp"); + } + + /* Tell users they can use __builtin_bswap{16,64}. */ + builtin_define ("__HAVE_BSWAP__"); /* May be overridden by target configuration. */ RS6000_CPU_CPP_ENDIAN_BUILTINS(); @@ -393,7 +429,7 @@ struct altivec_builtin_types }; const struct altivec_builtin_types altivec_overloaded_builtins[] = { - /* Unary AltiVec builtins. */ + /* Unary AltiVec/VSX builtins. */ { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V8HI, @@ -402,6 +438,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_ABS, VSX_BUILTIN_XVABSDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_ABSS, ALTIVEC_BUILTIN_ABSS_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_ABSS, ALTIVEC_BUILTIN_ABSS_V8HI, @@ -410,8 +448,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_CEIL, ALTIVEC_BUILTIN_VRFIP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_CEIL, VSX_BUILTIN_XVRDPIP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_EXPTE, ALTIVEC_BUILTIN_VEXPTEFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_FLOOR, VSX_BUILTIN_XVRDPIM, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_FLOOR, ALTIVEC_BUILTIN_VRFIM, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_LOGE, ALTIVEC_BUILTIN_VLOGEFP, @@ -444,6 +486,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_TRUNC, ALTIVEC_BUILTIN_VRFIZ, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_TRUNC, VSX_BUILTIN_XVRDPIZ, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSB, RS6000_BTI_V8HI, RS6000_BTI_V16QI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSB, @@ -489,7 +533,7 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_VUPKLSB, ALTIVEC_BUILTIN_VUPKLSB, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V16QI, 0, 0 }, - /* Binary AltiVec builtins. */ + /* Binary AltiVec/VSX builtins. */ { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM, @@ -528,6 +572,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, VSX_BUILTIN_XVADDDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_VADDFP, ALTIVEC_BUILTIN_VADDFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM, @@ -673,9 +719,9 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, - RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, 0 }, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, - RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, RS6000_BTI_V2DF, 0 }, + RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, @@ -727,9 +773,9 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, - RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, 0 }, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, - RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, RS6000_BTI_V2DF, 0 }, + RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, @@ -812,6 +858,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQFP, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, VSX_BUILTIN_XVCMPEQDP, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_VCMPEQFP, ALTIVEC_BUILTIN_VCMPEQFP, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, @@ -832,6 +880,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_CMPGE, ALTIVEC_BUILTIN_VCMPGEFP, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_XVCMPGEDP, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTUB, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSB, @@ -846,6 +896,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTFP, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGT, VSX_BUILTIN_XVCMPGTDP, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_VCMPGTFP, ALTIVEC_BUILTIN_VCMPGTFP, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_VCMPGTSW, ALTIVEC_BUILTIN_VCMPGTSW, @@ -874,6 +926,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPLE, ALTIVEC_BUILTIN_VCMPGEFP, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_XVCMPGEDP, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTUB, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSB, @@ -888,6 +942,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTFP, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPLT, VSX_BUILTIN_XVCMPGTDP, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_COPYSIGN, VSX_BUILTIN_CPSGNDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_COPYSIGN, ALTIVEC_BUILTIN_COPYSIGN_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_CTF, ALTIVEC_BUILTIN_VCFUX, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 }, { ALTIVEC_BUILTIN_VEC_CTF, ALTIVEC_BUILTIN_VCFSX, @@ -900,6 +960,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 }, { ALTIVEC_BUILTIN_VEC_CTU, ALTIVEC_BUILTIN_VCTUXS, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 }, + { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVSP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX, @@ -1234,6 +1298,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, VSX_BUILTIN_XVMAXDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_VMAXFP, ALTIVEC_BUILTIN_VMAXFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_VMAXSW, ALTIVEC_BUILTIN_VMAXSW, @@ -1410,6 +1476,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, VSX_BUILTIN_XVMINDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_VMINFP, ALTIVEC_BUILTIN_VMINFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_VMINSW, ALTIVEC_BUILTIN_VMINSW, @@ -1460,6 +1528,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_VMINUB, ALTIVEC_BUILTIN_VMINUB, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { VSX_BUILTIN_VEC_MUL, VSX_BUILTIN_XVMULSP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { VSX_BUILTIN_VEC_MUL, VSX_BUILTIN_XVMULDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUB, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESB, @@ -1492,6 +1564,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V8HI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_VMULOUB, ALTIVEC_BUILTIN_VMULOUB, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_NEARBYINT, VSX_BUILTIN_XVRDPI, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_NEARBYINT, VSX_BUILTIN_XVRSPI, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, @@ -1523,9 +1599,9 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, - RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, 0 }, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, - RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, RS6000_BTI_V2DF, 0 }, + RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, @@ -1622,6 +1698,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VPKSHUS, ALTIVEC_BUILTIN_VPKSHUS, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_RINT, VSX_BUILTIN_XVRDPIC, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_RINT, VSX_BUILTIN_XVRSPIC, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLB, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLB, @@ -1658,6 +1738,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLW, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTSP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_VSLW, ALTIVEC_BUILTIN_VSLW, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VSLW, ALTIVEC_BUILTIN_VSLW, @@ -1984,6 +2068,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, VSX_BUILTIN_XVSUBDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_VSUBFP, ALTIVEC_BUILTIN_VSUBFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM, @@ -2145,9 +2231,9 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, - RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, 0 }, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, - RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, RS6000_BTI_V2DF, 0 }, + RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, @@ -2191,7 +2277,7 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, - /* Ternary AltiVec builtins. */ + /* Ternary AltiVec/VSX builtins. */ { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, RS6000_BTI_void, ~RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, @@ -2354,6 +2440,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_void, ~RS6000_BTI_float, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMADDFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, + { ALTIVEC_BUILTIN_VEC_MADD, VSX_BUILTIN_XVMADDDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, { ALTIVEC_BUILTIN_VEC_MADDS, ALTIVEC_BUILTIN_VMHADDSHS, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI }, { ALTIVEC_BUILTIN_VEC_MLADD, ALTIVEC_BUILTIN_VMLADDUHM, @@ -2366,6 +2454,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI }, { ALTIVEC_BUILTIN_VEC_MRADDS, ALTIVEC_BUILTIN_VMHRADDSHS, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI }, + { VSX_BUILTIN_VEC_MSUB, VSX_BUILTIN_XVMSUBSP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, + { VSX_BUILTIN_VEC_MSUB, VSX_BUILTIN_XVMSUBDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, { ALTIVEC_BUILTIN_VEC_MSUM, ALTIVEC_BUILTIN_VMSUMUBM, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI }, { ALTIVEC_BUILTIN_VEC_MSUM, ALTIVEC_BUILTIN_VMSUMMBM, @@ -2390,8 +2482,14 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V4SI }, { ALTIVEC_BUILTIN_VEC_VMSUMUHS, ALTIVEC_BUILTIN_VMSUMUHS, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI }, + { VSX_BUILTIN_VEC_NMADD, VSX_BUILTIN_XVNMADDSP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, + { VSX_BUILTIN_VEC_NMADD, VSX_BUILTIN_XVNMADDDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, { ALTIVEC_BUILTIN_VEC_NMSUB, ALTIVEC_BUILTIN_VNMSUBFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, + { ALTIVEC_BUILTIN_VEC_NMSUB, VSX_BUILTIN_XVNMSUBDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_unsigned_V16QI }, { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DI, @@ -2812,6 +2910,54 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI }, { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, + { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_NOT_OPAQUE }, /* Predicates. */ { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUB_P, @@ -2852,6 +2998,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI }, { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTFP_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, + { ALTIVEC_BUILTIN_VCMPGT_P, VSX_BUILTIN_XVCMPGTDP_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P, @@ -2900,6 +3048,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI }, { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQFP_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, + { ALTIVEC_BUILTIN_VCMPEQ_P, VSX_BUILTIN_XVCMPEQDP_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, /* cmpge is the same as cmpgt for all cases except floating point. @@ -2943,6 +3093,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI }, { ALTIVEC_BUILTIN_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGEFP_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, + { ALTIVEC_BUILTIN_VCMPGE_P, VSX_BUILTIN_XVCMPGEDP_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, { (enum rs6000_builtins) 0, (enum rs6000_builtins) 0, 0, 0, 0, 0 } }; @@ -3064,8 +3216,10 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, const struct altivec_builtin_types *desc; unsigned int n; - if (fcode < ALTIVEC_BUILTIN_OVERLOADED_FIRST - || fcode > ALTIVEC_BUILTIN_OVERLOADED_LAST) + if ((fcode < ALTIVEC_BUILTIN_OVERLOADED_FIRST + || fcode > ALTIVEC_BUILTIN_OVERLOADED_LAST) + && (fcode < VSX_BUILTIN_OVERLOADED_FIRST + || fcode > VSX_BUILTIN_OVERLOADED_LAST)) return NULL_TREE; /* For now treat vec_splats and vec_promote as the same. */ @@ -3105,11 +3259,12 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, && !INTEGRAL_TYPE_P (type)) goto bad; unsigned_p = TYPE_UNSIGNED (type); - if (type == long_long_unsigned_type_node - || type == long_long_integer_type_node) - goto bad; switch (TYPE_MODE (type)) { + case DImode: + type = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node); + size = 2; + break; case SImode: type = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node); size = 4; @@ -3123,6 +3278,7 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, size = 16; break; case SFmode: type = V4SF_type_node; size = 4; break; + case DFmode: type = V2DF_type_node; size = 2; break; default: goto bad; } @@ -3139,7 +3295,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, return build_constructor (type, vec); } - /* For now use pointer tricks to do the extaction. */ + /* For now use pointer tricks to do the extaction, unless we are on VSX + extracting a double from a constant offset. */ if (fcode == ALTIVEC_BUILTIN_VEC_EXTRACT) { tree arg1; @@ -3148,6 +3305,7 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, tree arg1_inner_type; tree decl, stmt; tree innerptrtype; + enum machine_mode mode; /* No second argument. */ if (nargs != 2) @@ -3164,6 +3322,25 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, goto bad; if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2))) goto bad; + + /* If we can use the VSX xxpermdi instruction, use that for extract. */ + mode = TYPE_MODE (arg1_type); + if ((mode == V2DFmode || mode == V2DImode) && VECTOR_MEM_VSX_P (mode) + && TREE_CODE (arg2) == INTEGER_CST + && TREE_INT_CST_HIGH (arg2) == 0 + && (TREE_INT_CST_LOW (arg2) == 0 || TREE_INT_CST_LOW (arg2) == 1)) + { + tree call = NULL_TREE; + + if (mode == V2DFmode) + call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DF]; + else if (mode == V2DImode) + call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI]; + + if (call) + return build_call_expr (call, 2, arg1, arg2); + } + /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2). */ arg1_inner_type = TREE_TYPE (arg1_type); arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2, @@ -3193,7 +3370,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, return stmt; } - /* For now use pointer tricks to do the insertation. */ + /* For now use pointer tricks to do the insertation, unless we are on VSX + inserting a double to a constant offset.. */ if (fcode == ALTIVEC_BUILTIN_VEC_INSERT) { tree arg0; @@ -3203,7 +3381,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, tree arg1_inner_type; tree decl, stmt; tree innerptrtype; - + enum machine_mode mode; + /* No second or third arguments. */ if (nargs != 3) { @@ -3220,6 +3399,27 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, goto bad; if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2))) goto bad; + + /* If we can use the VSX xxpermdi instruction, use that for insert. */ + mode = TYPE_MODE (arg1_type); + if ((mode == V2DFmode || mode == V2DImode) && VECTOR_UNIT_VSX_P (mode) + && TREE_CODE (arg2) == INTEGER_CST + && TREE_INT_CST_HIGH (arg2) == 0 + && (TREE_INT_CST_LOW (arg2) == 0 || TREE_INT_CST_LOW (arg2) == 1)) + { + tree call = NULL_TREE; + + if (mode == V2DFmode) + call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V2DF]; + else if (mode == V2DImode) + call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V2DI]; + + /* Note, __builtin_vec_insert_ has vector and scalar types + reversed. */ + if (call) + return build_call_expr (call, 3, arg1, arg0, arg2); + } + /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2) = arg0. */ arg1_inner_type = TREE_TYPE (arg1_type); arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2, diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index b077c83..25cacc4 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -837,6 +837,7 @@ static rtx rs6000_emit_stack_reset (rs6000_stack_t *, rtx, rtx, int, bool); static rtx rs6000_make_savres_rtx (rs6000_stack_t *, rtx, int, enum machine_mode, bool, bool, bool); static bool rs6000_reg_live_or_pic_offset_p (int); +static tree rs6000_builtin_vectorized_function (unsigned int, tree, tree); static int rs6000_savres_strategy (rs6000_stack_t *, bool, int, int); static void rs6000_restore_saved_cr (rtx, int); static void rs6000_output_function_prologue (FILE *, HOST_WIDE_INT); @@ -1395,6 +1396,10 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_HANDLE_OPTION #define TARGET_HANDLE_OPTION rs6000_handle_option +#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION +#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ + rs6000_builtin_vectorized_function + #undef TARGET_DEFAULT_TARGET_FLAGS #define TARGET_DEFAULT_TARGET_FLAGS \ (TARGET_DEFAULT) @@ -1871,20 +1876,14 @@ rs6000_init_hard_regno_mode_ok (void) } } - /* V2DImode, prefer vsx over altivec, since the main use will be for - vectorized floating point conversions. */ + /* V2DImode, only allow under VSX, which can do V2DI insert/splat/extract. + Altivec doesn't have 64-bit support. */ if (TARGET_VSX) { rs6000_vector_mem[V2DImode] = VECTOR_VSX; rs6000_vector_unit[V2DImode] = VECTOR_NONE; rs6000_vector_align[V2DImode] = align64; } - else if (TARGET_ALTIVEC) - { - rs6000_vector_mem[V2DImode] = VECTOR_ALTIVEC; - rs6000_vector_unit[V2DImode] = VECTOR_NONE; - rs6000_vector_align[V2DImode] = align64; - } /* DFmode, see if we want to use the VSX unit. */ if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE) @@ -2169,7 +2168,7 @@ rs6000_override_options (const char *default_cpu) {"power7", PROCESSOR_POWER7, POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD - /* | MASK_VSX */}, /* Don't add MASK_ISEL by default */ + | MASK_VSX}, /* Don't add MASK_ISEL by default */ {"powerpc", PROCESSOR_POWERPC, POWERPC_BASE_MASK}, {"powerpc64", PROCESSOR_POWERPC64, POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_POWERPC64}, @@ -2765,6 +2764,14 @@ rs6000_builtin_conversion (unsigned int tcode, tree type) case FIX_TRUNC_EXPR: switch (TYPE_MODE (type)) { + case V2DImode: + if (!VECTOR_UNIT_VSX_P (V2DFmode)) + return NULL_TREE; + + return TYPE_UNSIGNED (type) + ? rs6000_builtin_decls[VSX_BUILTIN_XVCVDPUXDS_UNS] + : rs6000_builtin_decls[VSX_BUILTIN_XVCVDPSXDS]; + case V4SImode: if (VECTOR_UNIT_NONE_P (V4SImode) || VECTOR_UNIT_NONE_P (V4SFmode)) return NULL_TREE; @@ -2780,6 +2787,14 @@ rs6000_builtin_conversion (unsigned int tcode, tree type) case FLOAT_EXPR: switch (TYPE_MODE (type)) { + case V2DImode: + if (!VECTOR_UNIT_VSX_P (V2DFmode)) + return NULL_TREE; + + return TYPE_UNSIGNED (type) + ? rs6000_builtin_decls[VSX_BUILTIN_XVCVUXDDP] + : rs6000_builtin_decls[VSX_BUILTIN_XVCVSXDDP]; + case V4SImode: if (VECTOR_UNIT_NONE_P (V4SImode) || VECTOR_UNIT_NONE_P (V4SFmode)) return NULL_TREE; @@ -2908,6 +2923,22 @@ rs6000_builtin_vec_perm (tree type, tree *mask_element_type) d = rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_4SF]; break; + case V2DFmode: + if (!TARGET_ALLOW_DF_PERMUTE) + return NULL_TREE; + + d = rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_2DF]; + break; + + case V2DImode: + if (!TARGET_ALLOW_DF_PERMUTE) + return NULL_TREE; + + d = (uns_p + ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_2DI_UNS] + : rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_2DI]); + break; + default: return NULL_TREE; } @@ -2981,6 +3012,136 @@ rs6000_parse_fpu_option (const char *option) return FPU_NONE; } +/* Returns a function decl for a vectorized version of the builtin function + with builtin function code FN and the result vector type TYPE, or NULL_TREE + if it is not available. */ + +static tree +rs6000_builtin_vectorized_function (unsigned int fn, tree type_out, + tree type_in) +{ + enum machine_mode in_mode, out_mode; + int in_n, out_n; + + if (TREE_CODE (type_out) != VECTOR_TYPE + || TREE_CODE (type_in) != VECTOR_TYPE + || !TARGET_VECTORIZE_BUILTINS) + return NULL_TREE; + + out_mode = TYPE_MODE (TREE_TYPE (type_out)); + out_n = TYPE_VECTOR_SUBPARTS (type_out); + in_mode = TYPE_MODE (TREE_TYPE (type_in)); + in_n = TYPE_VECTOR_SUBPARTS (type_in); + + switch (fn) + { + case BUILT_IN_COPYSIGN: + if (VECTOR_UNIT_VSX_P (V2DFmode) + && out_mode == DFmode && out_n == 2 + && in_mode == DFmode && in_n == 2) + return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP]; + break; + case BUILT_IN_COPYSIGNF: + if (out_mode != SFmode || out_n != 4 + || in_mode != SFmode || in_n != 4) + break; + if (VECTOR_UNIT_VSX_P (V4SFmode)) + return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP]; + if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)) + return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF]; + break; + case BUILT_IN_SQRT: + if (VECTOR_UNIT_VSX_P (V2DFmode) + && out_mode == DFmode && out_n == 2 + && in_mode == DFmode && in_n == 2) + return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTDP]; + break; + case BUILT_IN_SQRTF: + if (VECTOR_UNIT_VSX_P (V4SFmode) + && out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTSP]; + break; + case BUILT_IN_CEIL: + if (VECTOR_UNIT_VSX_P (V2DFmode) + && out_mode == DFmode && out_n == 2 + && in_mode == DFmode && in_n == 2) + return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP]; + break; + case BUILT_IN_CEILF: + if (out_mode != SFmode || out_n != 4 + || in_mode != SFmode || in_n != 4) + break; + if (VECTOR_UNIT_VSX_P (V4SFmode)) + return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP]; + if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)) + return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP]; + break; + case BUILT_IN_FLOOR: + if (VECTOR_UNIT_VSX_P (V2DFmode) + && out_mode == DFmode && out_n == 2 + && in_mode == DFmode && in_n == 2) + return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM]; + break; + case BUILT_IN_FLOORF: + if (out_mode != SFmode || out_n != 4 + || in_mode != SFmode || in_n != 4) + break; + if (VECTOR_UNIT_VSX_P (V4SFmode)) + return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM]; + if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)) + return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM]; + break; + case BUILT_IN_TRUNC: + if (VECTOR_UNIT_VSX_P (V2DFmode) + && out_mode == DFmode && out_n == 2 + && in_mode == DFmode && in_n == 2) + return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ]; + break; + case BUILT_IN_TRUNCF: + if (out_mode != SFmode || out_n != 4 + || in_mode != SFmode || in_n != 4) + break; + if (VECTOR_UNIT_VSX_P (V4SFmode)) + return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ]; + if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)) + return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ]; + break; + case BUILT_IN_NEARBYINT: + if (VECTOR_UNIT_VSX_P (V2DFmode) + && flag_unsafe_math_optimizations + && out_mode == DFmode && out_n == 2 + && in_mode == DFmode && in_n == 2) + return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI]; + break; + case BUILT_IN_NEARBYINTF: + if (VECTOR_UNIT_VSX_P (V4SFmode) + && flag_unsafe_math_optimizations + && out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI]; + break; + case BUILT_IN_RINT: + if (VECTOR_UNIT_VSX_P (V2DFmode) + && !flag_trapping_math + && out_mode == DFmode && out_n == 2 + && in_mode == DFmode && in_n == 2) + return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC]; + break; + case BUILT_IN_RINTF: + if (VECTOR_UNIT_VSX_P (V4SFmode) + && !flag_trapping_math + && out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC]; + break; + default: + break; + } + return NULL_TREE; +} + + /* Implement TARGET_HANDLE_OPTION. */ static bool @@ -3621,6 +3782,11 @@ vspltis_constant (rtx op, unsigned step, unsigned copies) && (splat_val >= 0 || (step == 1 && copies == 1))) ; + /* Also check if are loading up the most significant bit which can be done by + loading up -1 and shifting the value left by -1. */ + else if (EASY_VECTOR_MSB (splat_val, inner)) + ; + else return false; @@ -3971,8 +4137,6 @@ rs6000_expand_vector_init (rtx target, rtx vals) emit_insn (gen_rtx_SET (VOIDmode, target, const_vec)); return; } - else if (all_same && int_vector_p) - ; /* Splat vector element. */ else { /* Load from constant pool. */ @@ -3981,8 +4145,66 @@ rs6000_expand_vector_init (rtx target, rtx vals) } } - /* Store value to stack temp. Load vector element. Splat. */ - if (all_same) + /* Double word values on VSX can use xxpermdi or lxvdsx. */ + if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode)) + { + if (all_same) + { + rtx element = XVECEXP (vals, 0, 0); + if (mode == V2DFmode) + emit_insn (gen_vsx_splat_v2df (target, element)); + else + emit_insn (gen_vsx_splat_v2di (target, element)); + } + else + { + rtx op0 = copy_to_reg (XVECEXP (vals, 0, 0)); + rtx op1 = copy_to_reg (XVECEXP (vals, 0, 1)); + if (mode == V2DFmode) + emit_insn (gen_vsx_concat_v2df (target, op0, op1)); + else + emit_insn (gen_vsx_concat_v2di (target, op0, op1)); + } + return; + } + + /* With single precision floating point on VSX, know that internally single + precision is actually represented as a double, and either make 2 V2DF + vectors, and convert these vectors to single precision, or do one + conversion, and splat the result to the other elements. */ + if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode)) + { + if (all_same) + { + rtx freg = gen_reg_rtx (V4SFmode); + rtx sreg = copy_to_reg (XVECEXP (vals, 0, 0)); + + emit_insn (gen_vsx_xscvdpsp_scalar (freg, sreg)); + emit_insn (gen_vsx_xxspltw_v4sf (target, freg, const0_rtx)); + } + else + { + rtx dbl_even = gen_reg_rtx (V2DFmode); + rtx dbl_odd = gen_reg_rtx (V2DFmode); + rtx flt_even = gen_reg_rtx (V4SFmode); + rtx flt_odd = gen_reg_rtx (V4SFmode); + + emit_insn (gen_vsx_concat_v2sf (dbl_even, + copy_to_reg (XVECEXP (vals, 0, 0)), + copy_to_reg (XVECEXP (vals, 0, 1)))); + emit_insn (gen_vsx_concat_v2sf (dbl_odd, + copy_to_reg (XVECEXP (vals, 0, 2)), + copy_to_reg (XVECEXP (vals, 0, 3)))); + emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even)); + emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd)); + emit_insn (gen_vec_extract_evenv4sf (target, flt_even, flt_odd)); + } + return; + } + + /* Store value to stack temp. Load vector element. Splat. However, splat + of 64-bit items is not supported on Altivec. */ + if (all_same && GET_MODE_SIZE (mode) <= 4) { mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode), 0); emit_move_insn (adjust_address_nv (mem, inner_mode, 0), @@ -4040,6 +4262,14 @@ rs6000_expand_vector_set (rtx target, rtx val, int elt) int width = GET_MODE_SIZE (inner_mode); int i; + if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode)) + { + rtx (*set_func) (rtx, rtx, rtx, rtx) + = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di); + emit_insn (set_func (target, target, val, GEN_INT (elt))); + return; + } + /* Load single variable value. */ mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode), 0); emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val); @@ -4077,6 +4307,14 @@ rs6000_expand_vector_extract (rtx target, rtx vec, int elt) enum machine_mode inner_mode = GET_MODE_INNER (mode); rtx mem, x; + if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode)) + { + rtx (*extract_func) (rtx, rtx, rtx) + = ((mode == V2DFmode) ? gen_vsx_extract_v2df : gen_vsx_extract_v2di); + emit_insn (extract_func (target, vec, GEN_INT (elt))); + return; + } + /* Allocate mode-sized buffer. */ mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0); @@ -5447,6 +5685,10 @@ rs6000_mode_dependent_address (rtx addr) case PRE_MODIFY: return TARGET_UPDATE; + /* AND is only allowed in Altivec loads. */ + case AND: + return true; + default: break; } @@ -6048,6 +6290,8 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode) case V2SFmode: case V2SImode: case V1DImode: + case V2DFmode: + case V2DImode: if (CONSTANT_P (operands[1]) && !easy_vector_constant (operands[1], mode)) operands[1] = force_const_mem (mode, operands[1]); @@ -8192,6 +8436,59 @@ static const struct builtin_description bdesc_3arg[] = { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_perm", ALTIVEC_BUILTIN_VEC_PERM }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_sel", ALTIVEC_BUILTIN_VEC_SEL }, + { MASK_VSX, CODE_FOR_vsx_fmaddv2df4, "__builtin_vsx_xvmadddp", VSX_BUILTIN_XVMADDDP }, + { MASK_VSX, CODE_FOR_vsx_fmsubv2df4, "__builtin_vsx_xvmsubdp", VSX_BUILTIN_XVMSUBDP }, + { MASK_VSX, CODE_FOR_vsx_fnmaddv2df4, "__builtin_vsx_xvnmadddp", VSX_BUILTIN_XVNMADDDP }, + { MASK_VSX, CODE_FOR_vsx_fnmsubv2df4, "__builtin_vsx_xvnmsubdp", VSX_BUILTIN_XVNMSUBDP }, + + { MASK_VSX, CODE_FOR_vsx_fmaddv4sf4, "__builtin_vsx_xvmaddsp", VSX_BUILTIN_XVMADDSP }, + { MASK_VSX, CODE_FOR_vsx_fmsubv4sf4, "__builtin_vsx_xvmsubsp", VSX_BUILTIN_XVMSUBSP }, + { MASK_VSX, CODE_FOR_vsx_fnmaddv4sf4, "__builtin_vsx_xvnmaddsp", VSX_BUILTIN_XVNMADDSP }, + { MASK_VSX, CODE_FOR_vsx_fnmsubv4sf4, "__builtin_vsx_xvnmsubsp", VSX_BUILTIN_XVNMSUBSP }, + + { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_msub", VSX_BUILTIN_VEC_MSUB }, + { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_nmadd", VSX_BUILTIN_VEC_NMADD }, + + { MASK_VSX, CODE_FOR_vector_select_v2di, "__builtin_vsx_xxsel_2di", VSX_BUILTIN_XXSEL_2DI }, + { MASK_VSX, CODE_FOR_vector_select_v2df, "__builtin_vsx_xxsel_2df", VSX_BUILTIN_XXSEL_2DF }, + { MASK_VSX, CODE_FOR_vector_select_v4sf, "__builtin_vsx_xxsel_4sf", VSX_BUILTIN_XXSEL_4SF }, + { MASK_VSX, CODE_FOR_vector_select_v4si, "__builtin_vsx_xxsel_4si", VSX_BUILTIN_XXSEL_4SI }, + { MASK_VSX, CODE_FOR_vector_select_v8hi, "__builtin_vsx_xxsel_8hi", VSX_BUILTIN_XXSEL_8HI }, + { MASK_VSX, CODE_FOR_vector_select_v16qi, "__builtin_vsx_xxsel_16qi", VSX_BUILTIN_XXSEL_16QI }, + { MASK_VSX, CODE_FOR_vector_select_v2di_uns, "__builtin_vsx_xxsel_2di_uns", VSX_BUILTIN_XXSEL_2DI_UNS }, + { MASK_VSX, CODE_FOR_vector_select_v4si_uns, "__builtin_vsx_xxsel_4si_uns", VSX_BUILTIN_XXSEL_4SI_UNS }, + { MASK_VSX, CODE_FOR_vector_select_v8hi_uns, "__builtin_vsx_xxsel_8hi_uns", VSX_BUILTIN_XXSEL_8HI_UNS }, + { MASK_VSX, CODE_FOR_vector_select_v16qi_uns, "__builtin_vsx_xxsel_16qi_uns", VSX_BUILTIN_XXSEL_16QI_UNS }, + + { MASK_VSX, CODE_FOR_altivec_vperm_v2di, "__builtin_vsx_vperm_2di", VSX_BUILTIN_VPERM_2DI }, + { MASK_VSX, CODE_FOR_altivec_vperm_v2df, "__builtin_vsx_vperm_2df", VSX_BUILTIN_VPERM_2DF }, + { MASK_VSX, CODE_FOR_altivec_vperm_v4sf, "__builtin_vsx_vperm_4sf", VSX_BUILTIN_VPERM_4SF }, + { MASK_VSX, CODE_FOR_altivec_vperm_v4si, "__builtin_vsx_vperm_4si", VSX_BUILTIN_VPERM_4SI }, + { MASK_VSX, CODE_FOR_altivec_vperm_v8hi, "__builtin_vsx_vperm_8hi", VSX_BUILTIN_VPERM_8HI }, + { MASK_VSX, CODE_FOR_altivec_vperm_v16qi, "__builtin_vsx_vperm_16qi", VSX_BUILTIN_VPERM_16QI }, + { MASK_VSX, CODE_FOR_altivec_vperm_v2di_uns, "__builtin_vsx_vperm_2di_uns", VSX_BUILTIN_VPERM_2DI_UNS }, + { MASK_VSX, CODE_FOR_altivec_vperm_v4si_uns, "__builtin_vsx_vperm_4si_uns", VSX_BUILTIN_VPERM_4SI_UNS }, + { MASK_VSX, CODE_FOR_altivec_vperm_v8hi_uns, "__builtin_vsx_vperm_8hi_uns", VSX_BUILTIN_VPERM_8HI_UNS }, + { MASK_VSX, CODE_FOR_altivec_vperm_v16qi_uns, "__builtin_vsx_vperm_16qi_uns", VSX_BUILTIN_VPERM_16QI_UNS }, + + { MASK_VSX, CODE_FOR_vsx_xxpermdi_v2df, "__builtin_vsx_xxpermdi_2df", VSX_BUILTIN_XXPERMDI_2DF }, + { MASK_VSX, CODE_FOR_vsx_xxpermdi_v2di, "__builtin_vsx_xxpermdi_2di", VSX_BUILTIN_XXPERMDI_2DI }, + { MASK_VSX, CODE_FOR_vsx_xxpermdi_v4sf, "__builtin_vsx_xxpermdi_4sf", VSX_BUILTIN_XXPERMDI_4SF }, + { MASK_VSX, CODE_FOR_vsx_xxpermdi_v4si, "__builtin_vsx_xxpermdi_4si", VSX_BUILTIN_XXPERMDI_4SI }, + { MASK_VSX, CODE_FOR_vsx_xxpermdi_v8hi, "__builtin_vsx_xxpermdi_8hi", VSX_BUILTIN_XXPERMDI_8HI }, + { MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi, "__builtin_vsx_xxpermdi_16qi", VSX_BUILTIN_XXPERMDI_16QI }, + { MASK_VSX, CODE_FOR_nothing, "__builtin_vsx_xxpermdi", VSX_BUILTIN_VEC_XXPERMDI }, + { MASK_VSX, CODE_FOR_vsx_set_v2df, "__builtin_vsx_set_2df", VSX_BUILTIN_SET_2DF }, + { MASK_VSX, CODE_FOR_vsx_set_v2di, "__builtin_vsx_set_2di", VSX_BUILTIN_SET_2DI }, + + { MASK_VSX, CODE_FOR_vsx_xxsldwi_v2di, "__builtin_vsx_xxsldwi_2di", VSX_BUILTIN_XXSLDWI_2DI }, + { MASK_VSX, CODE_FOR_vsx_xxsldwi_v2df, "__builtin_vsx_xxsldwi_2df", VSX_BUILTIN_XXSLDWI_2DF }, + { MASK_VSX, CODE_FOR_vsx_xxsldwi_v4sf, "__builtin_vsx_xxsldwi_4sf", VSX_BUILTIN_XXSLDWI_4SF }, + { MASK_VSX, CODE_FOR_vsx_xxsldwi_v4si, "__builtin_vsx_xxsldwi_4si", VSX_BUILTIN_XXSLDWI_4SI }, + { MASK_VSX, CODE_FOR_vsx_xxsldwi_v8hi, "__builtin_vsx_xxsldwi_8hi", VSX_BUILTIN_XXSLDWI_8HI }, + { MASK_VSX, CODE_FOR_vsx_xxsldwi_v16qi, "__builtin_vsx_xxsldwi_16qi", VSX_BUILTIN_XXSLDWI_16QI }, + { MASK_VSX, CODE_FOR_nothing, "__builtin_vsx_xxsldwi", VSX_BUILTIN_VEC_XXSLDWI }, + { 0, CODE_FOR_paired_msub, "__builtin_paired_msub", PAIRED_BUILTIN_MSUB }, { 0, CODE_FOR_paired_madd, "__builtin_paired_madd", PAIRED_BUILTIN_MADD }, { 0, CODE_FOR_paired_madds0, "__builtin_paired_madds0", PAIRED_BUILTIN_MADDS0 }, @@ -8337,9 +8634,50 @@ static struct builtin_description bdesc_2arg[] = { MASK_ALTIVEC, CODE_FOR_altivec_vsum2sws, "__builtin_altivec_vsum2sws", ALTIVEC_BUILTIN_VSUM2SWS }, { MASK_ALTIVEC, CODE_FOR_altivec_vsumsws, "__builtin_altivec_vsumsws", ALTIVEC_BUILTIN_VSUMSWS }, { MASK_ALTIVEC, CODE_FOR_xorv4si3, "__builtin_altivec_vxor", ALTIVEC_BUILTIN_VXOR }, - - { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_add", ALTIVEC_BUILTIN_VEC_ADD }, - { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vaddfp", ALTIVEC_BUILTIN_VEC_VADDFP }, + { MASK_ALTIVEC, CODE_FOR_vector_copysignv4sf3, "__builtin_altivec_copysignfp", ALTIVEC_BUILTIN_COPYSIGN_V4SF }, + + { MASK_VSX, CODE_FOR_addv2df3, "__builtin_vsx_xvadddp", VSX_BUILTIN_XVADDDP }, + { MASK_VSX, CODE_FOR_subv2df3, "__builtin_vsx_xvsubdp", VSX_BUILTIN_XVSUBDP }, + { MASK_VSX, CODE_FOR_mulv2df3, "__builtin_vsx_xvmuldp", VSX_BUILTIN_XVMULDP }, + { MASK_VSX, CODE_FOR_divv2df3, "__builtin_vsx_xvdivdp", VSX_BUILTIN_XVDIVDP }, + { MASK_VSX, CODE_FOR_sminv2df3, "__builtin_vsx_xvmindp", VSX_BUILTIN_XVMINDP }, + { MASK_VSX, CODE_FOR_smaxv2df3, "__builtin_vsx_xvmaxdp", VSX_BUILTIN_XVMAXDP }, + { MASK_VSX, CODE_FOR_vsx_tdivv2df3_fe, "__builtin_vsx_xvtdivdp_fe", VSX_BUILTIN_XVTDIVDP_FE }, + { MASK_VSX, CODE_FOR_vsx_tdivv2df3_fg, "__builtin_vsx_xvtdivdp_fg", VSX_BUILTIN_XVTDIVDP_FG }, + { MASK_VSX, CODE_FOR_vector_eqv2df, "__builtin_vsx_xvcmpeqdp", VSX_BUILTIN_XVCMPEQDP }, + { MASK_VSX, CODE_FOR_vector_gtv2df, "__builtin_vsx_xvcmpgtdp", VSX_BUILTIN_XVCMPGTDP }, + { MASK_VSX, CODE_FOR_vector_gev2df, "__builtin_vsx_xvcmpgedp", VSX_BUILTIN_XVCMPGEDP }, + + { MASK_VSX, CODE_FOR_addv4sf3, "__builtin_vsx_xvaddsp", VSX_BUILTIN_XVADDSP }, + { MASK_VSX, CODE_FOR_subv4sf3, "__builtin_vsx_xvsubsp", VSX_BUILTIN_XVSUBSP }, + { MASK_VSX, CODE_FOR_mulv4sf3, "__builtin_vsx_xvmulsp", VSX_BUILTIN_XVMULSP }, + { MASK_VSX, CODE_FOR_divv4sf3, "__builtin_vsx_xvdivsp", VSX_BUILTIN_XVDIVSP }, + { MASK_VSX, CODE_FOR_sminv4sf3, "__builtin_vsx_xvminsp", VSX_BUILTIN_XVMINSP }, + { MASK_VSX, CODE_FOR_smaxv4sf3, "__builtin_vsx_xvmaxsp", VSX_BUILTIN_XVMAXSP }, + { MASK_VSX, CODE_FOR_vsx_tdivv4sf3_fe, "__builtin_vsx_xvtdivsp_fe", VSX_BUILTIN_XVTDIVSP_FE }, + { MASK_VSX, CODE_FOR_vsx_tdivv4sf3_fg, "__builtin_vsx_xvtdivsp_fg", VSX_BUILTIN_XVTDIVSP_FG }, + { MASK_VSX, CODE_FOR_vector_eqv4sf, "__builtin_vsx_xvcmpeqsp", VSX_BUILTIN_XVCMPEQSP }, + { MASK_VSX, CODE_FOR_vector_gtv4sf, "__builtin_vsx_xvcmpgtsp", VSX_BUILTIN_XVCMPGTSP }, + { MASK_VSX, CODE_FOR_vector_gev4sf, "__builtin_vsx_xvcmpgesp", VSX_BUILTIN_XVCMPGESP }, + + { MASK_VSX, CODE_FOR_smindf3, "__builtin_vsx_xsmindp", VSX_BUILTIN_XSMINDP }, + { MASK_VSX, CODE_FOR_smaxdf3, "__builtin_vsx_xsmaxdp", VSX_BUILTIN_XSMAXDP }, + { MASK_VSX, CODE_FOR_vsx_tdivdf3_fe, "__builtin_vsx_xstdivdp_fe", VSX_BUILTIN_XSTDIVDP_FE }, + { MASK_VSX, CODE_FOR_vsx_tdivdf3_fg, "__builtin_vsx_xstdivdp_fg", VSX_BUILTIN_XSTDIVDP_FG }, + { MASK_VSX, CODE_FOR_vector_copysignv2df3, "__builtin_vsx_cpsgndp", VSX_BUILTIN_CPSGNDP }, + { MASK_VSX, CODE_FOR_vector_copysignv4sf3, "__builtin_vsx_cpsgnsp", VSX_BUILTIN_CPSGNSP }, + + { MASK_VSX, CODE_FOR_vsx_concat_v2df, "__builtin_vsx_concat_2df", VSX_BUILTIN_CONCAT_2DF }, + { MASK_VSX, CODE_FOR_vsx_concat_v2di, "__builtin_vsx_concat_2di", VSX_BUILTIN_CONCAT_2DI }, + { MASK_VSX, CODE_FOR_vsx_splat_v2df, "__builtin_vsx_splat_2df", VSX_BUILTIN_SPLAT_2DF }, + { MASK_VSX, CODE_FOR_vsx_splat_v2di, "__builtin_vsx_splat_2di", VSX_BUILTIN_SPLAT_2DI }, + { MASK_VSX, CODE_FOR_vsx_xxmrghw_v4sf, "__builtin_vsx_xxmrghw", VSX_BUILTIN_XXMRGHW_4SF }, + { MASK_VSX, CODE_FOR_vsx_xxmrghw_v4si, "__builtin_vsx_xxmrghw_4si", VSX_BUILTIN_XXMRGHW_4SI }, + { MASK_VSX, CODE_FOR_vsx_xxmrglw_v4sf, "__builtin_vsx_xxmrglw", VSX_BUILTIN_XXMRGLW_4SF }, + { MASK_VSX, CODE_FOR_vsx_xxmrglw_v4si, "__builtin_vsx_xxmrglw_4si", VSX_BUILTIN_XXMRGLW_4SI }, + + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_add", ALTIVEC_BUILTIN_VEC_ADD }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_vaddfp", ALTIVEC_BUILTIN_VEC_VADDFP }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vadduwm", ALTIVEC_BUILTIN_VEC_VADDUWM }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vadduhm", ALTIVEC_BUILTIN_VEC_VADDUHM }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vaddubm", ALTIVEC_BUILTIN_VEC_VADDUBM }, @@ -8377,6 +8715,7 @@ static struct builtin_description bdesc_2arg[] = { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vcmpgtub", ALTIVEC_BUILTIN_VEC_VCMPGTUB }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_cmple", ALTIVEC_BUILTIN_VEC_CMPLE }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_cmplt", ALTIVEC_BUILTIN_VEC_CMPLT }, + { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_copysign", ALTIVEC_BUILTIN_VEC_COPYSIGN }, { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_max", ALTIVEC_BUILTIN_VEC_MAX }, { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_vmaxfp", ALTIVEC_BUILTIN_VEC_VMAXFP }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmaxsw", ALTIVEC_BUILTIN_VEC_VMAXSW }, @@ -8466,6 +8805,9 @@ static struct builtin_description bdesc_2arg[] = { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_sums", ALTIVEC_BUILTIN_VEC_SUMS }, { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_xor", ALTIVEC_BUILTIN_VEC_XOR }, + { MASK_VSX, CODE_FOR_nothing, "__builtin_vec_mul", VSX_BUILTIN_VEC_MUL }, + { MASK_VSX, CODE_FOR_nothing, "__builtin_vec_div", VSX_BUILTIN_VEC_DIV }, + { 0, CODE_FOR_divv2sf3, "__builtin_paired_divv2sf3", PAIRED_BUILTIN_DIVV2SF3 }, { 0, CODE_FOR_addv2sf3, "__builtin_paired_addv2sf3", PAIRED_BUILTIN_ADDV2SF3 }, { 0, CODE_FOR_subv2sf3, "__builtin_paired_subv2sf3", PAIRED_BUILTIN_SUBV2SF3 }, @@ -8661,6 +9003,19 @@ static const struct builtin_description_predicates bdesc_altivec_preds[] = { MASK_ALTIVEC, CODE_FOR_vector_gtu_v16qi_p, "__builtin_altivec_vcmpgtub_p", ALTIVEC_BUILTIN_VCMPGTUB_P }, + { MASK_VSX, CODE_FOR_vector_eq_v4sf_p, "__builtin_vsx_xvcmpeqsp_p", + VSX_BUILTIN_XVCMPEQSP_P }, + { MASK_VSX, CODE_FOR_vector_ge_v4sf_p, "__builtin_vsx_xvcmpgesp_p", + VSX_BUILTIN_XVCMPGESP_P }, + { MASK_VSX, CODE_FOR_vector_gt_v4sf_p, "__builtin_vsx_xvcmpgtsp_p", + VSX_BUILTIN_XVCMPGTSP_P }, + { MASK_VSX, CODE_FOR_vector_eq_v2df_p, "__builtin_vsx_xvcmpeqdp_p", + VSX_BUILTIN_XVCMPEQDP_P }, + { MASK_VSX, CODE_FOR_vector_ge_v2df_p, "__builtin_vsx_xvcmpgedp_p", + VSX_BUILTIN_XVCMPGEDP_P }, + { MASK_VSX, CODE_FOR_vector_gt_v2df_p, "__builtin_vsx_xvcmpgtdp_p", + VSX_BUILTIN_XVCMPGTDP_P }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_vcmpeq_p", ALTIVEC_BUILTIN_VCMPEQ_P }, { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_vcmpgt_p", @@ -8724,7 +9079,11 @@ static const struct builtin_description bdesc_abs[] = { MASK_ALTIVEC, CODE_FOR_absv16qi2, "__builtin_altivec_abs_v16qi", ALTIVEC_BUILTIN_ABS_V16QI }, { MASK_ALTIVEC, CODE_FOR_altivec_abss_v4si, "__builtin_altivec_abss_v4si", ALTIVEC_BUILTIN_ABSS_V4SI }, { MASK_ALTIVEC, CODE_FOR_altivec_abss_v8hi, "__builtin_altivec_abss_v8hi", ALTIVEC_BUILTIN_ABSS_V8HI }, - { MASK_ALTIVEC, CODE_FOR_altivec_abss_v16qi, "__builtin_altivec_abss_v16qi", ALTIVEC_BUILTIN_ABSS_V16QI } + { MASK_ALTIVEC, CODE_FOR_altivec_abss_v16qi, "__builtin_altivec_abss_v16qi", ALTIVEC_BUILTIN_ABSS_V16QI }, + { MASK_VSX, CODE_FOR_absv2df2, "__builtin_vsx_xvabsdp", VSX_BUILTIN_XVABSDP }, + { MASK_VSX, CODE_FOR_vsx_nabsv2df2, "__builtin_vsx_xvnabsdp", VSX_BUILTIN_XVNABSDP }, + { MASK_VSX, CODE_FOR_absv4sf2, "__builtin_vsx_xvabssp", VSX_BUILTIN_XVABSSP }, + { MASK_VSX, CODE_FOR_vsx_nabsv4sf2, "__builtin_vsx_xvnabssp", VSX_BUILTIN_XVNABSSP }, }; /* Simple unary operations: VECb = foo (unsigned literal) or VECb = @@ -8735,10 +9094,10 @@ static struct builtin_description bdesc_1arg[] = { MASK_ALTIVEC, CODE_FOR_altivec_vexptefp, "__builtin_altivec_vexptefp", ALTIVEC_BUILTIN_VEXPTEFP }, { MASK_ALTIVEC, CODE_FOR_altivec_vlogefp, "__builtin_altivec_vlogefp", ALTIVEC_BUILTIN_VLOGEFP }, { MASK_ALTIVEC, CODE_FOR_altivec_vrefp, "__builtin_altivec_vrefp", ALTIVEC_BUILTIN_VREFP }, - { MASK_ALTIVEC, CODE_FOR_altivec_vrfim, "__builtin_altivec_vrfim", ALTIVEC_BUILTIN_VRFIM }, + { MASK_ALTIVEC, CODE_FOR_vector_floorv4sf2, "__builtin_altivec_vrfim", ALTIVEC_BUILTIN_VRFIM }, { MASK_ALTIVEC, CODE_FOR_altivec_vrfin, "__builtin_altivec_vrfin", ALTIVEC_BUILTIN_VRFIN }, - { MASK_ALTIVEC, CODE_FOR_altivec_vrfip, "__builtin_altivec_vrfip", ALTIVEC_BUILTIN_VRFIP }, - { MASK_ALTIVEC, CODE_FOR_ftruncv4sf2, "__builtin_altivec_vrfiz", ALTIVEC_BUILTIN_VRFIZ }, + { MASK_ALTIVEC, CODE_FOR_vector_ceilv4sf2, "__builtin_altivec_vrfip", ALTIVEC_BUILTIN_VRFIP }, + { MASK_ALTIVEC, CODE_FOR_vector_btruncv4sf2, "__builtin_altivec_vrfiz", ALTIVEC_BUILTIN_VRFIZ }, { MASK_ALTIVEC, CODE_FOR_altivec_vrsqrtefp, "__builtin_altivec_vrsqrtefp", ALTIVEC_BUILTIN_VRSQRTEFP }, { MASK_ALTIVEC, CODE_FOR_altivec_vspltisb, "__builtin_altivec_vspltisb", ALTIVEC_BUILTIN_VSPLTISB }, { MASK_ALTIVEC, CODE_FOR_altivec_vspltish, "__builtin_altivec_vspltish", ALTIVEC_BUILTIN_VSPLTISH }, @@ -8750,6 +9109,65 @@ static struct builtin_description bdesc_1arg[] = { MASK_ALTIVEC, CODE_FOR_altivec_vupklpx, "__builtin_altivec_vupklpx", ALTIVEC_BUILTIN_VUPKLPX }, { MASK_ALTIVEC, CODE_FOR_altivec_vupklsh, "__builtin_altivec_vupklsh", ALTIVEC_BUILTIN_VUPKLSH }, + { MASK_VSX, CODE_FOR_negv2df2, "__builtin_vsx_xvnegdp", VSX_BUILTIN_XVNEGDP }, + { MASK_VSX, CODE_FOR_sqrtv2df2, "__builtin_vsx_xvsqrtdp", VSX_BUILTIN_XVSQRTDP }, + { MASK_VSX, CODE_FOR_vsx_rsqrtev2df2, "__builtin_vsx_xvrsqrtedp", VSX_BUILTIN_XVRSQRTEDP }, + { MASK_VSX, CODE_FOR_vsx_tsqrtv2df2_fe, "__builtin_vsx_xvtsqrtdp_fe", VSX_BUILTIN_XVTSQRTDP_FE }, + { MASK_VSX, CODE_FOR_vsx_tsqrtv2df2_fg, "__builtin_vsx_xvtsqrtdp_fg", VSX_BUILTIN_XVTSQRTDP_FG }, + { MASK_VSX, CODE_FOR_vsx_frev2df2, "__builtin_vsx_xvredp", VSX_BUILTIN_XVREDP }, + + { MASK_VSX, CODE_FOR_negv4sf2, "__builtin_vsx_xvnegsp", VSX_BUILTIN_XVNEGSP }, + { MASK_VSX, CODE_FOR_sqrtv4sf2, "__builtin_vsx_xvsqrtsp", VSX_BUILTIN_XVSQRTSP }, + { MASK_VSX, CODE_FOR_vsx_rsqrtev4sf2, "__builtin_vsx_xvrsqrtesp", VSX_BUILTIN_XVRSQRTESP }, + { MASK_VSX, CODE_FOR_vsx_tsqrtv4sf2_fe, "__builtin_vsx_xvtsqrtsp_fe", VSX_BUILTIN_XVTSQRTSP_FE }, + { MASK_VSX, CODE_FOR_vsx_tsqrtv4sf2_fg, "__builtin_vsx_xvtsqrtsp_fg", VSX_BUILTIN_XVTSQRTSP_FG }, + { MASK_VSX, CODE_FOR_vsx_frev4sf2, "__builtin_vsx_xvresp", VSX_BUILTIN_XVRESP }, + + { MASK_VSX, CODE_FOR_vsx_xscvdpsp, "__builtin_vsx_xscvdpsp", VSX_BUILTIN_XSCVDPSP }, + { MASK_VSX, CODE_FOR_vsx_xscvdpsp, "__builtin_vsx_xscvspdp", VSX_BUILTIN_XSCVSPDP }, + { MASK_VSX, CODE_FOR_vsx_xvcvdpsp, "__builtin_vsx_xvcvdpsp", VSX_BUILTIN_XVCVDPSP }, + { MASK_VSX, CODE_FOR_vsx_xvcvspdp, "__builtin_vsx_xvcvspdp", VSX_BUILTIN_XVCVSPDP }, + { MASK_VSX, CODE_FOR_vsx_tsqrtdf2_fe, "__builtin_vsx_xstsqrtdp_fe", VSX_BUILTIN_XSTSQRTDP_FE }, + { MASK_VSX, CODE_FOR_vsx_tsqrtdf2_fg, "__builtin_vsx_xstsqrtdp_fg", VSX_BUILTIN_XSTSQRTDP_FG }, + + { MASK_VSX, CODE_FOR_vsx_fix_truncv2dfv2di2, "__builtin_vsx_xvcvdpsxds", VSX_BUILTIN_XVCVDPSXDS }, + { MASK_VSX, CODE_FOR_vsx_fixuns_truncv2dfv2di2, "__builtin_vsx_xvcvdpuxds", VSX_BUILTIN_XVCVDPUXDS }, + { MASK_VSX, CODE_FOR_vsx_fixuns_truncv2dfv2di2, "__builtin_vsx_xvcvdpuxds_uns", VSX_BUILTIN_XVCVDPUXDS_UNS }, + { MASK_VSX, CODE_FOR_vsx_floatv2div2df2, "__builtin_vsx_xvcvsxddp", VSX_BUILTIN_XVCVSXDDP }, + { MASK_VSX, CODE_FOR_vsx_floatunsv2div2df2, "__builtin_vsx_xvcvuxddp", VSX_BUILTIN_XVCVUXDDP }, + { MASK_VSX, CODE_FOR_vsx_floatunsv2div2df2, "__builtin_vsx_xvcvuxddp_uns", VSX_BUILTIN_XVCVUXDDP_UNS }, + + { MASK_VSX, CODE_FOR_vsx_fix_truncv4sfv4si2, "__builtin_vsx_xvcvspsxws", VSX_BUILTIN_XVCVSPSXWS }, + { MASK_VSX, CODE_FOR_vsx_fixuns_truncv4sfv4si2, "__builtin_vsx_xvcvspuxws", VSX_BUILTIN_XVCVSPUXWS }, + { MASK_VSX, CODE_FOR_vsx_floatv4siv4sf2, "__builtin_vsx_xvcvsxwsp", VSX_BUILTIN_XVCVSXWSP }, + { MASK_VSX, CODE_FOR_vsx_floatunsv4siv4sf2, "__builtin_vsx_xvcvuxwsp", VSX_BUILTIN_XVCVUXWSP }, + + { MASK_VSX, CODE_FOR_vsx_xvcvdpsxws, "__builtin_vsx_xvcvdpsxws", VSX_BUILTIN_XVCVDPSXWS }, + { MASK_VSX, CODE_FOR_vsx_xvcvdpuxws, "__builtin_vsx_xvcvdpuxws", VSX_BUILTIN_XVCVDPUXWS }, + { MASK_VSX, CODE_FOR_vsx_xvcvsxwdp, "__builtin_vsx_xvcvsxwdp", VSX_BUILTIN_XVCVSXWDP }, + { MASK_VSX, CODE_FOR_vsx_xvcvuxwdp, "__builtin_vsx_xvcvuxwdp", VSX_BUILTIN_XVCVUXWDP }, + { MASK_VSX, CODE_FOR_vsx_xvrdpi, "__builtin_vsx_xvrdpi", VSX_BUILTIN_XVRDPI }, + { MASK_VSX, CODE_FOR_vsx_xvrdpic, "__builtin_vsx_xvrdpic", VSX_BUILTIN_XVRDPIC }, + { MASK_VSX, CODE_FOR_vsx_floorv2df2, "__builtin_vsx_xvrdpim", VSX_BUILTIN_XVRDPIM }, + { MASK_VSX, CODE_FOR_vsx_ceilv2df2, "__builtin_vsx_xvrdpip", VSX_BUILTIN_XVRDPIP }, + { MASK_VSX, CODE_FOR_vsx_btruncv2df2, "__builtin_vsx_xvrdpiz", VSX_BUILTIN_XVRDPIZ }, + + { MASK_VSX, CODE_FOR_vsx_xvcvspsxds, "__builtin_vsx_xvcvspsxds", VSX_BUILTIN_XVCVSPSXDS }, + { MASK_VSX, CODE_FOR_vsx_xvcvspuxds, "__builtin_vsx_xvcvspuxds", VSX_BUILTIN_XVCVSPUXDS }, + { MASK_VSX, CODE_FOR_vsx_xvcvsxdsp, "__builtin_vsx_xvcvsxdsp", VSX_BUILTIN_XVCVSXDSP }, + { MASK_VSX, CODE_FOR_vsx_xvcvuxdsp, "__builtin_vsx_xvcvuxdsp", VSX_BUILTIN_XVCVUXDSP }, + { MASK_VSX, CODE_FOR_vsx_xvrspi, "__builtin_vsx_xvrspi", VSX_BUILTIN_XVRSPI }, + { MASK_VSX, CODE_FOR_vsx_xvrspic, "__builtin_vsx_xvrspic", VSX_BUILTIN_XVRSPIC }, + { MASK_VSX, CODE_FOR_vsx_floorv4sf2, "__builtin_vsx_xvrspim", VSX_BUILTIN_XVRSPIM }, + { MASK_VSX, CODE_FOR_vsx_ceilv4sf2, "__builtin_vsx_xvrspip", VSX_BUILTIN_XVRSPIP }, + { MASK_VSX, CODE_FOR_vsx_btruncv4sf2, "__builtin_vsx_xvrspiz", VSX_BUILTIN_XVRSPIZ }, + + { MASK_VSX, CODE_FOR_vsx_xsrdpi, "__builtin_vsx_xsrdpi", VSX_BUILTIN_XSRDPI }, + { MASK_VSX, CODE_FOR_vsx_xsrdpic, "__builtin_vsx_xsrdpic", VSX_BUILTIN_XSRDPIC }, + { MASK_VSX, CODE_FOR_vsx_floordf2, "__builtin_vsx_xsrdpim", VSX_BUILTIN_XSRDPIM }, + { MASK_VSX, CODE_FOR_vsx_ceildf2, "__builtin_vsx_xsrdpip", VSX_BUILTIN_XSRDPIP }, + { MASK_VSX, CODE_FOR_vsx_btruncdf2, "__builtin_vsx_xsrdpiz", VSX_BUILTIN_XSRDPIZ }, + { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_abs", ALTIVEC_BUILTIN_VEC_ABS }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_abss", ALTIVEC_BUILTIN_VEC_ABSS }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_ceil", ALTIVEC_BUILTIN_VEC_CEIL }, @@ -8770,6 +9188,10 @@ static struct builtin_description bdesc_1arg[] = { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vupklsh", ALTIVEC_BUILTIN_VEC_VUPKLSH }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vupklsb", ALTIVEC_BUILTIN_VEC_VUPKLSB }, + { MASK_VSX, CODE_FOR_nothing, "__builtin_vec_nearbyint", ALTIVEC_BUILTIN_VEC_NEARBYINT }, + { MASK_VSX, CODE_FOR_nothing, "__builtin_vec_rint", ALTIVEC_BUILTIN_VEC_RINT }, + { MASK_VSX, CODE_FOR_nothing, "__builtin_vec_sqrt", ALTIVEC_BUILTIN_VEC_SQRT }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_floatv4siv4sf2, "__builtin_vec_float_sisf", VECTOR_BUILTIN_FLOAT_V4SI_V4SF }, { MASK_ALTIVEC|MASK_VSX, CODE_FOR_unsigned_floatv4siv4sf2, "__builtin_vec_uns_float_sisf", VECTOR_BUILTIN_UNSFLOAT_V4SI_V4SF }, { MASK_ALTIVEC|MASK_VSX, CODE_FOR_fix_truncv4sfv4si2, "__builtin_vec_fix_sfsi", VECTOR_BUILTIN_FIX_V4SF_V4SI }, @@ -9293,6 +9715,36 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target) } break; + case CODE_FOR_vsx_xxpermdi_v2df: + case CODE_FOR_vsx_xxpermdi_v2di: + case CODE_FOR_vsx_xxsldwi_v16qi: + case CODE_FOR_vsx_xxsldwi_v8hi: + case CODE_FOR_vsx_xxsldwi_v4si: + case CODE_FOR_vsx_xxsldwi_v4sf: + case CODE_FOR_vsx_xxsldwi_v2di: + case CODE_FOR_vsx_xxsldwi_v2df: + /* Only allow 2-bit unsigned literals. */ + STRIP_NOPS (arg2); + if (TREE_CODE (arg2) != INTEGER_CST + || TREE_INT_CST_LOW (arg2) & ~0x3) + { + error ("argument 3 must be a 2-bit unsigned literal"); + return const0_rtx; + } + break; + + case CODE_FOR_vsx_set_v2df: + case CODE_FOR_vsx_set_v2di: + /* Only allow 1-bit unsigned literals. */ + STRIP_NOPS (arg2); + if (TREE_CODE (arg2) != INTEGER_CST + || TREE_INT_CST_LOW (arg2) & ~0x1) + { + error ("argument 3 must be a 1-bit unsigned literal"); + return const0_rtx; + } + break; + default: break; } @@ -9602,8 +10054,10 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) enum machine_mode tmode, mode0; unsigned int fcode = DECL_FUNCTION_CODE (fndecl); - if (fcode >= ALTIVEC_BUILTIN_OVERLOADED_FIRST - && fcode <= ALTIVEC_BUILTIN_OVERLOADED_LAST) + if ((fcode >= ALTIVEC_BUILTIN_OVERLOADED_FIRST + && fcode <= ALTIVEC_BUILTIN_OVERLOADED_LAST) + || (fcode >= VSX_BUILTIN_OVERLOADED_FIRST + && fcode <= VSX_BUILTIN_OVERLOADED_LAST)) { *expandedp = true; error ("unresolved overload for Altivec builtin %qF", fndecl); @@ -9711,18 +10165,24 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) case ALTIVEC_BUILTIN_VEC_INIT_V8HI: case ALTIVEC_BUILTIN_VEC_INIT_V16QI: case ALTIVEC_BUILTIN_VEC_INIT_V4SF: + case VSX_BUILTIN_VEC_INIT_V2DF: + case VSX_BUILTIN_VEC_INIT_V2DI: return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target); case ALTIVEC_BUILTIN_VEC_SET_V4SI: case ALTIVEC_BUILTIN_VEC_SET_V8HI: case ALTIVEC_BUILTIN_VEC_SET_V16QI: case ALTIVEC_BUILTIN_VEC_SET_V4SF: + case VSX_BUILTIN_VEC_SET_V2DF: + case VSX_BUILTIN_VEC_SET_V2DI: return altivec_expand_vec_set_builtin (exp); case ALTIVEC_BUILTIN_VEC_EXT_V4SI: case ALTIVEC_BUILTIN_VEC_EXT_V8HI: case ALTIVEC_BUILTIN_VEC_EXT_V16QI: case ALTIVEC_BUILTIN_VEC_EXT_V4SF: + case VSX_BUILTIN_VEC_EXT_V2DF: + case VSX_BUILTIN_VEC_EXT_V2DI: return altivec_expand_vec_ext_builtin (exp, target); default: @@ -10245,6 +10705,11 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, if (fcode == RS6000_BUILTIN_BSWAP_HI) return rs6000_expand_unop_builtin (CODE_FOR_bswaphi2, exp, target); + if (fcode == POWER7_BUILTIN_BPERMD) + return rs6000_expand_binop_builtin (((TARGET_64BIT) + ? CODE_FOR_bpermd_di + : CODE_FOR_bpermd_si), exp, target); + if (fcode == ALTIVEC_BUILTIN_MASK_FOR_LOAD || fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE) { @@ -10500,6 +10965,33 @@ rs6000_init_builtins (void) TYPE_NAME (pixel_V8HI_type_node) = tdecl; (*lang_hooks.decls.pushdecl) (tdecl); + if (TARGET_VSX) + { + tdecl = build_decl (BUILTINS_LOCATION, + TYPE_DECL, get_identifier ("__vector double"), + unsigned_V2DI_type_node); + TYPE_NAME (V2DF_type_node) = tdecl; + (*lang_hooks.decls.pushdecl) (tdecl); + + tdecl = build_decl (BUILTINS_LOCATION, + TYPE_DECL, get_identifier ("__vector long"), + V2DI_type_node); + TYPE_NAME (V2DI_type_node) = tdecl; + (*lang_hooks.decls.pushdecl) (tdecl); + + tdecl = build_decl (BUILTINS_LOCATION, + TYPE_DECL, get_identifier ("__vector unsigned long"), + unsigned_V2DI_type_node); + TYPE_NAME (unsigned_V2DI_type_node) = tdecl; + (*lang_hooks.decls.pushdecl) (tdecl); + + tdecl = build_decl (BUILTINS_LOCATION, + TYPE_DECL, get_identifier ("__vector __bool long"), + bool_V2DI_type_node); + TYPE_NAME (bool_V2DI_type_node) = tdecl; + (*lang_hooks.decls.pushdecl) (tdecl); + } + if (TARGET_PAIRED_FLOAT) paired_init_builtins (); if (TARGET_SPE) @@ -10531,6 +11023,15 @@ rs6000_init_builtins (void) RS6000_BUILTIN_RECIP); } + if (TARGET_POPCNTD) + { + enum machine_mode mode = (TARGET_64BIT) ? DImode : SImode; + tree ftype = builtin_function_type (mode, mode, mode, VOIDmode, + POWER7_BUILTIN_BPERMD, + "__builtin_bpermd"); + def_builtin (MASK_POPCNTD, "__builtin_bpermd", ftype, + POWER7_BUILTIN_BPERMD); + } if (TARGET_POWERPC) { /* Don't use builtin_function_type here, as it maps HI/QI to SI. */ @@ -10969,6 +11470,10 @@ altivec_init_builtins (void) = build_function_type_list (integer_type_node, integer_type_node, V4SF_type_node, V4SF_type_node, NULL_TREE); + tree int_ftype_int_v2df_v2df + = build_function_type_list (integer_type_node, + integer_type_node, V2DF_type_node, + V2DF_type_node, NULL_TREE); tree v4si_ftype_v4si = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE); tree v8hi_ftype_v8hi @@ -10977,6 +11482,8 @@ altivec_init_builtins (void) = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE); tree v4sf_ftype_v4sf = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); + tree v2df_ftype_v2df + = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE); tree void_ftype_pcvoid_int_int = build_function_type_list (void_type_node, pcvoid_type_node, integer_type_node, @@ -11079,8 +11586,10 @@ altivec_init_builtins (void) { enum machine_mode mode1; tree type; - bool is_overloaded = dp->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST - && dp->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST; + bool is_overloaded = ((dp->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST + && dp->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST) + || (dp->code >= VSX_BUILTIN_OVERLOADED_FIRST + && dp->code <= VSX_BUILTIN_OVERLOADED_LAST)); if (is_overloaded) mode1 = VOIDmode; @@ -11104,6 +11613,9 @@ altivec_init_builtins (void) case V4SFmode: type = int_ftype_int_v4sf_v4sf; break; + case V2DFmode: + type = int_ftype_int_v2df_v2df; + break; default: gcc_unreachable (); } @@ -11134,6 +11646,9 @@ altivec_init_builtins (void) case V4SFmode: type = v4sf_ftype_v4sf; break; + case V2DFmode: + type = v2df_ftype_v2df; + break; default: gcc_unreachable (); } @@ -11193,6 +11708,19 @@ altivec_init_builtins (void) def_builtin (MASK_ALTIVEC, "__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF); + if (TARGET_VSX) + { + ftype = build_function_type_list (V2DF_type_node, double_type_node, + double_type_node, NULL_TREE); + def_builtin (MASK_VSX, "__builtin_vec_init_v2df", ftype, + VSX_BUILTIN_VEC_INIT_V2DF); + + ftype = build_function_type_list (V2DI_type_node, intDI_type_node, + intDI_type_node, NULL_TREE); + def_builtin (MASK_VSX, "__builtin_vec_init_v2di", ftype, + VSX_BUILTIN_VEC_INIT_V2DI); + } + /* Access to the vec_set patterns. */ ftype = build_function_type_list (V4SI_type_node, V4SI_type_node, intSI_type_node, @@ -11218,6 +11746,21 @@ altivec_init_builtins (void) def_builtin (MASK_ALTIVEC|MASK_VSX, "__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF); + if (TARGET_VSX) + { + ftype = build_function_type_list (V2DF_type_node, V2DF_type_node, + double_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_VSX, "__builtin_vec_set_v2df", ftype, + VSX_BUILTIN_VEC_SET_V2DF); + + ftype = build_function_type_list (V2DI_type_node, V2DI_type_node, + intDI_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_VSX, "__builtin_vec_set_v2di", ftype, + VSX_BUILTIN_VEC_SET_V2DI); + } + /* Access to the vec_extract patterns. */ ftype = build_function_type_list (intSI_type_node, V4SI_type_node, integer_type_node, NULL_TREE); @@ -11238,6 +11781,19 @@ altivec_init_builtins (void) integer_type_node, NULL_TREE); def_builtin (MASK_ALTIVEC|MASK_VSX, "__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF); + + if (TARGET_VSX) + { + ftype = build_function_type_list (double_type_node, V2DF_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_VSX, "__builtin_vec_ext_v2df", ftype, + VSX_BUILTIN_VEC_EXT_V2DF); + + ftype = build_function_type_list (intDI_type_node, V2DI_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_VSX, "__builtin_vec_ext_v2di", ftype, + VSX_BUILTIN_VEC_EXT_V2DI); + } } /* Hash function for builtin functions with up to 3 arguments and a return @@ -11333,6 +11889,14 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0, case ALTIVEC_BUILTIN_VSEL_8HI_UNS: case ALTIVEC_BUILTIN_VSEL_4SI_UNS: case ALTIVEC_BUILTIN_VSEL_2DI_UNS: + case VSX_BUILTIN_VPERM_16QI_UNS: + case VSX_BUILTIN_VPERM_8HI_UNS: + case VSX_BUILTIN_VPERM_4SI_UNS: + case VSX_BUILTIN_VPERM_2DI_UNS: + case VSX_BUILTIN_XXSEL_16QI_UNS: + case VSX_BUILTIN_XXSEL_8HI_UNS: + case VSX_BUILTIN_XXSEL_4SI_UNS: + case VSX_BUILTIN_XXSEL_2DI_UNS: h.uns_p[0] = 1; h.uns_p[1] = 1; h.uns_p[2] = 1; @@ -11346,6 +11910,12 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0, case ALTIVEC_BUILTIN_VPERM_4SF: case ALTIVEC_BUILTIN_VPERM_2DI: case ALTIVEC_BUILTIN_VPERM_2DF: + case VSX_BUILTIN_VPERM_16QI: + case VSX_BUILTIN_VPERM_8HI: + case VSX_BUILTIN_VPERM_4SI: + case VSX_BUILTIN_VPERM_4SF: + case VSX_BUILTIN_VPERM_2DI: + case VSX_BUILTIN_VPERM_2DF: h.uns_p[3] = 1; break; @@ -11442,8 +12012,10 @@ rs6000_common_init_builtins (void) || (mask == 0 && !TARGET_PAIRED_FLOAT)) continue; - if (d->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST - && d->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST) + if ((d->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST + && d->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST) + || (d->code >= VSX_BUILTIN_OVERLOADED_FIRST + && d->code <= VSX_BUILTIN_OVERLOADED_LAST)) { if (! (type = opaque_ftype_opaque_opaque_opaque)) type = opaque_ftype_opaque_opaque_opaque @@ -11481,8 +12053,10 @@ rs6000_common_init_builtins (void) || (mask == 0 && !TARGET_PAIRED_FLOAT)) continue; - if (d->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST - && d->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST) + if ((d->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST + && d->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST) + || (d->code >= VSX_BUILTIN_OVERLOADED_FIRST + && d->code <= VSX_BUILTIN_OVERLOADED_LAST)) { if (! (type = opaque_ftype_opaque_opaque)) type = opaque_ftype_opaque_opaque @@ -11537,14 +12111,15 @@ rs6000_common_init_builtins (void) enum machine_mode mode0, mode1; tree type; int mask = d->mask; - bool is_overloaded = d->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST - && d->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST; if ((mask != 0 && (mask & target_flags) == 0) || (mask == 0 && !TARGET_PAIRED_FLOAT)) continue; - if (is_overloaded) + if ((d->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST + && d->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST) + || (d->code >= VSX_BUILTIN_OVERLOADED_FIRST + && d->code <= VSX_BUILTIN_OVERLOADED_LAST)) { if (! (type = opaque_ftype_opaque)) type = opaque_ftype_opaque @@ -22228,18 +22803,24 @@ rs6000_handle_altivec_attribute (tree *node, mode = TYPE_MODE (type); /* Check for invalid AltiVec type qualifiers. */ - if (type == long_unsigned_type_node || type == long_integer_type_node) - { - if (TARGET_64BIT) - error ("use of % in AltiVec types is invalid for 64-bit code"); - else if (rs6000_warn_altivec_long) - warning (0, "use of % in AltiVec types is deprecated; use %"); - } - else if (type == long_long_unsigned_type_node - || type == long_long_integer_type_node) - error ("use of % in AltiVec types is invalid"); - else if (type == double_type_node) - error ("use of % in AltiVec types is invalid"); + if (!TARGET_VSX) + { + if (type == long_unsigned_type_node || type == long_integer_type_node) + { + if (TARGET_64BIT) + error ("use of % in AltiVec types is invalid for " + "64-bit code without -mvsx"); + else if (rs6000_warn_altivec_long) + warning (0, "use of % in AltiVec types is deprecated; " + "use %"); + } + else if (type == long_long_unsigned_type_node + || type == long_long_integer_type_node) + error ("use of % in AltiVec types is invalid without " + "-mvsx"); + else if (type == double_type_node) + error ("use of % in AltiVec types is invalid without -mvsx"); + } else if (type == long_double_type_node) error ("use of % in AltiVec types is invalid"); else if (type == boolean_type_node) @@ -22255,6 +22836,9 @@ rs6000_handle_altivec_attribute (tree *node, unsigned_p = TYPE_UNSIGNED (type); switch (mode) { + case DImode: + result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node); + break; case SImode: result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node); break; @@ -22265,10 +22849,12 @@ rs6000_handle_altivec_attribute (tree *node, result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node); break; case SFmode: result = V4SF_type_node; break; + case DFmode: result = V2DF_type_node; break; /* If the user says 'vector int bool', we may be handed the 'bool' attribute _before_ the 'vector' attribute, and so select the proper type in the 'b' case below. */ case V4SImode: case V8HImode: case V16QImode: case V4SFmode: + case V2DImode: case V2DFmode: result = type; default: break; } @@ -22276,6 +22862,7 @@ rs6000_handle_altivec_attribute (tree *node, case 'b': switch (mode) { + case DImode: case V2DImode: result = bool_V2DI_type_node; break; case SImode: case V4SImode: result = bool_V4SI_type_node; break; case HImode: case V8HImode: result = bool_V8HI_type_node; break; case QImode: case V16QImode: result = bool_V16QI_type_node; @@ -22320,6 +22907,7 @@ rs6000_mangle_type (const_tree type) if (type == bool_short_type_node) return "U6__bools"; if (type == pixel_type_node) return "u7__pixel"; if (type == bool_int_type_node) return "U6__booli"; + if (type == bool_long_type_node) return "U6__booll"; /* Mangle IBM extended float long double as `g' (__float128) on powerpc*-linux where long-double-64 previously was the default. */ @@ -24557,7 +25145,7 @@ rs6000_vector_mode_supported_p (enum machine_mode mode) if (TARGET_SPE && SPE_VECTOR_MODE (mode)) return true; - else if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)) + else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)) return true; else diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 3153243..0c5e593 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -1883,6 +1883,10 @@ typedef struct rs6000_args && EASY_VECTOR_15((n) >> 1) \ && ((n) & 1) == 0) +#define EASY_VECTOR_MSB(n,mode) \ + (((unsigned HOST_WIDE_INT)n) == \ + ((((unsigned HOST_WIDE_INT)GET_MODE_MASK (mode)) + 1) >> 1)) + /* Try a machine-dependent way of reloading an illegitimate address operand. If we find one, push the reload and jump to WIN. This @@ -2678,6 +2682,7 @@ enum rs6000_builtins ALTIVEC_BUILTIN_VEC_EXT_V8HI, ALTIVEC_BUILTIN_VEC_EXT_V16QI, ALTIVEC_BUILTIN_VEC_EXT_V4SF, + ALTIVEC_BUILTIN_COPYSIGN_V4SF, /* Altivec overloaded builtins. */ ALTIVEC_BUILTIN_VCMPEQ_P, @@ -2703,6 +2708,7 @@ enum rs6000_builtins ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VEC_CMPLE, ALTIVEC_BUILTIN_VEC_CMPLT, + ALTIVEC_BUILTIN_VEC_COPYSIGN, ALTIVEC_BUILTIN_VEC_CTF, ALTIVEC_BUILTIN_VEC_CTS, ALTIVEC_BUILTIN_VEC_CTU, @@ -2745,6 +2751,7 @@ enum rs6000_builtins ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VEC_MULO, + ALTIVEC_BUILTIN_VEC_NEARBYINT, ALTIVEC_BUILTIN_VEC_NMSUB, ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VEC_OR, @@ -2755,6 +2762,7 @@ enum rs6000_builtins ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VEC_RE, ALTIVEC_BUILTIN_VEC_RL, + ALTIVEC_BUILTIN_VEC_RINT, ALTIVEC_BUILTIN_VEC_ROUND, ALTIVEC_BUILTIN_VEC_RSQRTE, ALTIVEC_BUILTIN_VEC_SEL, @@ -2772,6 +2780,7 @@ enum rs6000_builtins ALTIVEC_BUILTIN_VEC_SPLTB, ALTIVEC_BUILTIN_VEC_SPLTH, ALTIVEC_BUILTIN_VEC_SPLTW, + ALTIVEC_BUILTIN_VEC_SQRT, ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VEC_SRL, @@ -3228,6 +3237,8 @@ enum rs6000_builtins VSX_BUILTIN_XSRSQRTEDP, VSX_BUILTIN_XSSQRTDP, VSX_BUILTIN_XSSUBDP, + VSX_BUILTIN_CPSGNDP, + VSX_BUILTIN_CPSGNSP, VSX_BUILTIN_XSTDIVDP_FE, VSX_BUILTIN_XSTDIVDP_FG, VSX_BUILTIN_XSTSQRTDP_FE, diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index ae1ea99..9524fe8 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -101,6 +101,7 @@ (UNSPEC_RSQRT 48) (UNSPEC_TOCREL 49) (UNSPEC_MACHOPIC_OFFSET 50) + (UNSPEC_BPERM 51) ]) ;; @@ -167,6 +168,7 @@ (include "power4.md") (include "power5.md") (include "power6.md") +(include "power7.md") (include "cell.md") (include "xfpu.md") @@ -5900,9 +5902,18 @@ (match_dup 5)) (match_dup 3) (match_dup 4)))] - "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT - && !HONOR_NANS (DFmode) && !HONOR_SIGNED_ZEROS (DFmode)" + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && ((TARGET_PPC_GFXOPT + && !HONOR_NANS (DFmode) + && !HONOR_SIGNED_ZEROS (DFmode)) + || VECTOR_UNIT_VSX_P (DFmode))" { + if (VECTOR_UNIT_VSX_P (DFmode)) + { + emit_insn (gen_vsx_copysigndf3 (operands[0], operands[1], + operands[2], CONST0_RTX (DFmode))); + DONE; + } operands[3] = gen_reg_rtx (DFmode); operands[4] = gen_reg_rtx (DFmode); operands[5] = CONST0_RTX (DFmode); @@ -6037,7 +6048,8 @@ (define_insn "*negdf2_fpr" [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (neg:DF (match_operand:DF 1 "gpc_reg_operand" "d")))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && !VECTOR_UNIT_VSX_P (DFmode)" "fneg %0,%1" [(set_attr "type" "fp")]) @@ -6050,14 +6062,16 @@ (define_insn "*absdf2_fpr" [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (abs:DF (match_operand:DF 1 "gpc_reg_operand" "d")))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && !VECTOR_UNIT_VSX_P (DFmode)" "fabs %0,%1" [(set_attr "type" "fp")]) (define_insn "*nabsdf2_fpr" [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (neg:DF (abs:DF (match_operand:DF 1 "gpc_reg_operand" "d"))))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && !VECTOR_UNIT_VSX_P (DFmode)" "fnabs %0,%1" [(set_attr "type" "fp")]) @@ -6072,7 +6086,8 @@ [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (plus:DF (match_operand:DF 1 "gpc_reg_operand" "%d") (match_operand:DF 2 "gpc_reg_operand" "d")))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && !VECTOR_UNIT_VSX_P (DFmode)" "{fa|fadd} %0,%1,%2" [(set_attr "type" "fp") (set_attr "fp_type" "fp_addsub_d")]) @@ -6088,7 +6103,8 @@ [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (minus:DF (match_operand:DF 1 "gpc_reg_operand" "d") (match_operand:DF 2 "gpc_reg_operand" "d")))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && !VECTOR_UNIT_VSX_P (DFmode)" "{fs|fsub} %0,%1,%2" [(set_attr "type" "fp") (set_attr "fp_type" "fp_addsub_d")]) @@ -6104,7 +6120,8 @@ [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d") (match_operand:DF 2 "gpc_reg_operand" "d")))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && !VECTOR_UNIT_VSX_P (DFmode)" "{fm|fmul} %0,%1,%2" [(set_attr "type" "dmul") (set_attr "fp_type" "fp_mul_d")]) @@ -6122,7 +6139,8 @@ [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (div:DF (match_operand:DF 1 "gpc_reg_operand" "d") (match_operand:DF 2 "gpc_reg_operand" "d")))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && !TARGET_SIMPLE_FPU" + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && !TARGET_SIMPLE_FPU + && !VECTOR_UNIT_VSX_P (DFmode)" "{fd|fdiv} %0,%1,%2" [(set_attr "type" "ddiv")]) @@ -6138,73 +6156,81 @@ DONE; }) -(define_insn "fred" +(define_expand "fred" [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d")] UNSPEC_FRES))] - "TARGET_POPCNTB && flag_finite_math_only" + "(TARGET_POPCNTB || VECTOR_UNIT_VSX_P (DFmode)) && flag_finite_math_only" + "") + +(define_insn "*fred_fpr" + [(set (match_operand:DF 0 "gpc_reg_operand" "=f") + (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))] + "TARGET_POPCNTB && flag_finite_math_only && !VECTOR_UNIT_VSX_P (DFmode)" "fre %0,%1" [(set_attr "type" "fp")]) -(define_insn "" +(define_insn "*fmadddf4_fpr" [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (plus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d") (match_operand:DF 2 "gpc_reg_operand" "d")) (match_operand:DF 3 "gpc_reg_operand" "d")))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT" + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT + && VECTOR_UNIT_NONE_P (DFmode)" "{fma|fmadd} %0,%1,%2,%3" [(set_attr "type" "dmul") (set_attr "fp_type" "fp_maddsub_d")]) -(define_insn "" +(define_insn "*fmsubdf4_fpr" [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (minus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d") (match_operand:DF 2 "gpc_reg_operand" "d")) (match_operand:DF 3 "gpc_reg_operand" "d")))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT" + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT + && VECTOR_UNIT_NONE_P (DFmode)" "{fms|fmsub} %0,%1,%2,%3" [(set_attr "type" "dmul") (set_attr "fp_type" "fp_maddsub_d")]) -(define_insn "" +(define_insn "*fnmadddf4_fpr_1" [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (neg:DF (plus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d") (match_operand:DF 2 "gpc_reg_operand" "d")) (match_operand:DF 3 "gpc_reg_operand" "d"))))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT - && HONOR_SIGNED_ZEROS (DFmode)" + && HONOR_SIGNED_ZEROS (DFmode) && VECTOR_UNIT_NONE_P (DFmode)" "{fnma|fnmadd} %0,%1,%2,%3" [(set_attr "type" "dmul") (set_attr "fp_type" "fp_maddsub_d")]) -(define_insn "" +(define_insn "*fnmadddf4_fpr_2" [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (minus:DF (mult:DF (neg:DF (match_operand:DF 1 "gpc_reg_operand" "d")) (match_operand:DF 2 "gpc_reg_operand" "d")) (match_operand:DF 3 "gpc_reg_operand" "d")))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT - && ! HONOR_SIGNED_ZEROS (DFmode)" + && ! HONOR_SIGNED_ZEROS (DFmode) && VECTOR_UNIT_NONE_P (DFmode)" "{fnma|fnmadd} %0,%1,%2,%3" [(set_attr "type" "dmul") (set_attr "fp_type" "fp_maddsub_d")]) -(define_insn "" +(define_insn "*fnmsubdf4_fpr_1" [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (neg:DF (minus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d") (match_operand:DF 2 "gpc_reg_operand" "d")) (match_operand:DF 3 "gpc_reg_operand" "d"))))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT - && HONOR_SIGNED_ZEROS (DFmode)" + && HONOR_SIGNED_ZEROS (DFmode) && VECTOR_UNIT_NONE_P (DFmode)" "{fnms|fnmsub} %0,%1,%2,%3" [(set_attr "type" "dmul") (set_attr "fp_type" "fp_maddsub_d")]) -(define_insn "" +(define_insn "*fnmsubdf4_fpr_2" [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (minus:DF (match_operand:DF 3 "gpc_reg_operand" "d") (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d") (match_operand:DF 2 "gpc_reg_operand" "d"))))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT - && ! HONOR_SIGNED_ZEROS (DFmode)" + && ! HONOR_SIGNED_ZEROS (DFmode) && VECTOR_UNIT_NONE_P (DFmode)" "{fnms|fnmsub} %0,%1,%2,%3" [(set_attr "type" "dmul") (set_attr "fp_type" "fp_maddsub_d")]) @@ -6213,7 +6239,8 @@ [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (sqrt:DF (match_operand:DF 1 "gpc_reg_operand" "d")))] "(TARGET_PPC_GPOPT || TARGET_POWER2) && TARGET_HARD_FLOAT && TARGET_FPRS - && TARGET_DOUBLE_FLOAT" + && TARGET_DOUBLE_FLOAT + && !VECTOR_UNIT_VSX_P (DFmode)" "fsqrt %0,%1" [(set_attr "type" "dsqrt")]) @@ -6308,6 +6335,12 @@ "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE" "") +(define_expand "fixuns_truncdfdi2" + [(set (match_operand:DI 0 "register_operand" "") + (unsigned_fix:DI (match_operand:DF 1 "register_operand" "")))] + "TARGET_HARD_FLOAT && TARGET_VSX" + "") + ; For each of these conversions, there is a define_expand, a define_insn ; with a '#' template, and a define_split (with C code). The idea is ; to allow constant folding with the template of the define_insn, @@ -6549,24 +6582,38 @@ "{fcirz|fctiwz} %0,%1" [(set_attr "type" "fp")]) -(define_insn "btruncdf2" +(define_expand "btruncdf2" [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d")] UNSPEC_FRIZ))] "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" + "") + +(define_insn "*btruncdf2_fpr" + [(set (match_operand:DF 0 "gpc_reg_operand" "=f") + (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")] UNSPEC_FRIZ))] + "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && !VECTOR_UNIT_VSX_P (DFmode)" "friz %0,%1" [(set_attr "type" "fp")]) (define_insn "btruncsf2" [(set (match_operand:SF 0 "gpc_reg_operand" "=f") (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")] UNSPEC_FRIZ))] - "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT " + "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT" "friz %0,%1" [(set_attr "type" "fp")]) -(define_insn "ceildf2" +(define_expand "ceildf2" + [(set (match_operand:DF 0 "gpc_reg_operand" "") + (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "")] UNSPEC_FRIP))] + "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" + "") + +(define_insn "*ceildf2_fpr" [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d")] UNSPEC_FRIP))] - "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" + "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && !VECTOR_UNIT_VSX_P (DFmode)" "frip %0,%1" [(set_attr "type" "fp")]) @@ -6577,10 +6624,17 @@ "frip %0,%1" [(set_attr "type" "fp")]) -(define_insn "floordf2" +(define_expand "floordf2" + [(set (match_operand:DF 0 "gpc_reg_operand" "") + (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "")] UNSPEC_FRIM))] + "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" + "") + +(define_insn "*floordf2_fpr" [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d")] UNSPEC_FRIM))] - "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" + "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && !VECTOR_UNIT_VSX_P (DFmode)" "frim %0,%1" [(set_attr "type" "fp")]) @@ -6591,6 +6645,7 @@ "frim %0,%1" [(set_attr "type" "fp")]) +;; No VSX equivalent to frin (define_insn "rounddf2" [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d")] UNSPEC_FRIN))] @@ -6605,6 +6660,12 @@ "frin %0,%1" [(set_attr "type" "fp")]) +(define_expand "ftruncdf2" + [(set (match_operand:DF 0 "gpc_reg_operand" "") + (fix:DF (match_operand:DF 1 "gpc_reg_operand" "")))] + "VECTOR_UNIT_VSX_P (DFmode)" + "") + ; An UNSPEC is used so we don't have to support SImode in FP registers. (define_insn "stfiwx" [(set (match_operand:SI 0 "memory_operand" "=Z") @@ -6620,17 +6681,40 @@ "TARGET_HARD_FLOAT && !TARGET_FPRS" "") -(define_insn "floatdidf2" +(define_expand "floatdidf2" + [(set (match_operand:DF 0 "gpc_reg_operand" "") + (float:DF (match_operand:DI 1 "gpc_reg_operand" "")))] + "(TARGET_POWERPC64 || TARGET_XILINX_FPU || VECTOR_UNIT_VSX_P (DFmode)) + && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS" + "") + +(define_insn "*floatdidf2_fpr" [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (float:DF (match_operand:DI 1 "gpc_reg_operand" "!d#r")))] - "(TARGET_POWERPC64 || TARGET_XILINX_FPU) && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS" + "(TARGET_POWERPC64 || TARGET_XILINX_FPU) + && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS + && !VECTOR_UNIT_VSX_P (DFmode)" "fcfid %0,%1" [(set_attr "type" "fp")]) -(define_insn "fix_truncdfdi2" +(define_expand "floatunsdidf2" + [(set (match_operand:DF 0 "gpc_reg_operand" "") + (unsigned_float:DF (match_operand:DI 1 "gpc_reg_operand" "")))] + "TARGET_VSX" + "") + +(define_expand "fix_truncdfdi2" + [(set (match_operand:DI 0 "gpc_reg_operand" "") + (fix:DI (match_operand:DF 1 "gpc_reg_operand" "")))] + "(TARGET_POWERPC64 || TARGET_XILINX_FPU || VECTOR_UNIT_VSX_P (DFmode)) + && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS" + "") + +(define_insn "*fix_truncdfdi2_fpr" [(set (match_operand:DI 0 "gpc_reg_operand" "=!d#r") (fix:DI (match_operand:DF 1 "gpc_reg_operand" "d")))] - "(TARGET_POWERPC64 || TARGET_XILINX_FPU) && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS" + "(TARGET_POWERPC64 || TARGET_XILINX_FPU) && TARGET_HARD_FLOAT + && TARGET_DOUBLE_FLOAT && TARGET_FPRS && !VECTOR_UNIT_VSX_P (DFmode)" "fctidz %0,%1" [(set_attr "type" "fp")]) @@ -8956,8 +9040,8 @@ ;; The "??" is a kludge until we can figure out a more reasonable way ;; of handling these non-offsettable values. (define_insn "*movdf_hardfloat32" - [(set (match_operand:DF 0 "nonimmediate_operand" "=!r,??r,m,d,d,m,!r,!r,!r") - (match_operand:DF 1 "input_operand" "r,m,r,d,m,d,G,H,F"))] + [(set (match_operand:DF 0 "nonimmediate_operand" "=!r,??r,m,ws,?wa,ws,?wa,Z,?Z,d,d,m,wa,!r,!r,!r") + (match_operand:DF 1 "input_operand" "r,m,r,ws,wa,Z,Z,ws,wa,d,m,d,j,G,H,F"))] "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && (gpc_reg_operand (operands[0], DFmode) || gpc_reg_operand (operands[1], DFmode))" @@ -9036,19 +9120,30 @@ return \"\"; } case 3: - return \"fmr %0,%1\"; case 4: - return \"lfd%U1%X1 %0,%1\"; + return \"xxlor %x0,%x1,%x1\"; case 5: - return \"stfd%U0%X0 %1,%0\"; case 6: + return \"lxsd%U1x %x0,%y1\"; case 7: case 8: + return \"stxsd%U0x %x1,%y0\"; + case 9: + return \"fmr %0,%1\"; + case 10: + return \"lfd%U1%X1 %0,%1\"; + case 11: + return \"stfd%U0%X0 %1,%0\"; + case 12: + return \"xxlxor %x0,%x0,%x0\"; + case 13: + case 14: + case 15: return \"#\"; } }" - [(set_attr "type" "two,load,store,fp,fpload,fpstore,*,*,*") - (set_attr "length" "8,16,16,4,4,4,8,12,16")]) + [(set_attr "type" "two,load,store,fp,fp,fpload,fpload,fpstore,fpstore,fp,fpload,fpstore,vecsimple,*,*,*") + (set_attr "length" "8,16,16,4,4,4,4,4,4,4,4,4,4,8,12,16")]) (define_insn "*movdf_softfloat32" [(set (match_operand:DF 0 "nonimmediate_operand" "=r,r,m,r,r,r") @@ -9096,19 +9191,26 @@ ; ld/std require word-aligned displacements -> 'Y' constraint. ; List Y->r and r->Y before r->r for reload. (define_insn "*movdf_hardfloat64_mfpgpr" - [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,d,d,m,*c*l,!r,*h,!r,!r,!r,r,d") - (match_operand:DF 1 "input_operand" "r,Y,r,d,m,d,r,h,0,G,H,F,d,r"))] + [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,ws,?wa,ws,?wa,Z,?Z,d,d,m,wa,*c*l,!r,*h,!r,!r,!r,r,d") + (match_operand:DF 1 "input_operand" "r,Y,r,ws,?wa,Z,Z,ws,wa,d,m,d,j,r,h,0,G,H,F,d,r"))] "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS - && TARGET_DOUBLE_FLOAT + && TARGET_DOUBLE_FLOAT && (gpc_reg_operand (operands[0], DFmode) || gpc_reg_operand (operands[1], DFmode))" "@ std%U0%X0 %1,%0 ld%U1%X1 %0,%1 mr %0,%1 + xxlor %x0,%x1,%x1 + xxlor %x0,%x1,%x1 + lxsd%U1x %x0,%y1 + lxsd%U1x %x0,%y1 + stxsd%U0x %x1,%y0 + stxsd%U0x %x1,%y0 fmr %0,%1 lfd%U1%X1 %0,%1 stfd%U0%X0 %1,%0 + xxlxor %x0,%x0,%x0 mt%0 %1 mf%1 %0 {cror 0,0,0|nop} @@ -9117,33 +9219,40 @@ # mftgpr %0,%1 mffgpr %0,%1" - [(set_attr "type" "store,load,*,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*,*,mftgpr,mffgpr") - (set_attr "length" "4,4,4,4,4,4,4,4,4,8,12,16,4,4")]) + [(set_attr "type" "store,load,*,fp,fp,fpload,fpload,fpstore,fpstore,fp,fpload,fpstore,vecsimple,mtjmpr,mfjmpr,*,*,*,*,mftgpr,mffgpr") + (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4")]) ; ld/std require word-aligned displacements -> 'Y' constraint. ; List Y->r and r->Y before r->r for reload. (define_insn "*movdf_hardfloat64" - [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,d,d,m,*c*l,!r,*h,!r,!r,!r") - (match_operand:DF 1 "input_operand" "r,Y,r,d,m,d,r,h,0,G,H,F"))] + [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,ws,?wa,ws,?wa,Z,?Z,d,d,m,wa,*c*l,!r,*h,!r,!r,!r") + (match_operand:DF 1 "input_operand" "r,Y,r,ws,wa,Z,Z,ws,wa,d,m,d,j,r,h,0,G,H,F"))] "TARGET_POWERPC64 && !TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS - && TARGET_DOUBLE_FLOAT + && TARGET_DOUBLE_FLOAT && (gpc_reg_operand (operands[0], DFmode) || gpc_reg_operand (operands[1], DFmode))" "@ std%U0%X0 %1,%0 ld%U1%X1 %0,%1 mr %0,%1 + xxlor %x0,%x1,%x1 + xxlor %x0,%x1,%x1 + lxsd%U1x %x0,%y1 + lxsd%U1x %x0,%y1 + stxsd%U0x %x1,%y0 + stxsd%U0x %x1,%y0 fmr %0,%1 lfd%U1%X1 %0,%1 stfd%U0%X0 %1,%0 + xxlxor %x0,%x0,%x0 mt%0 %1 mf%1 %0 {cror 0,0,0|nop} # # #" - [(set_attr "type" "store,load,*,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*,*") - (set_attr "length" "4,4,4,4,4,4,4,4,4,8,12,16")]) + [(set_attr "type" "store,load,*,fp,fp,fpload,fpload,fpstore,fpstore,fp,fpload,fpstore,vecsimple,mtjmpr,mfjmpr,*,*,*,*") + (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16")]) (define_insn "*movdf_softfloat64" [(set (match_operand:DF 0 "nonimmediate_operand" "=r,Y,r,cl,r,r,r,r,*h") @@ -9720,15 +9829,16 @@ (define_insn "*movti_ppc64" [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o<>,r") (match_operand:TI 1 "input_operand" "r,r,m"))] - "TARGET_POWERPC64 && (gpc_reg_operand (operands[0], TImode) - || gpc_reg_operand (operands[1], TImode))" + "(TARGET_POWERPC64 && (gpc_reg_operand (operands[0], TImode) + || gpc_reg_operand (operands[1], TImode))) + && VECTOR_MEM_NONE_P (TImode)" "#" [(set_attr "type" "*,store,load")]) (define_split [(set (match_operand:TI 0 "gpc_reg_operand" "") (match_operand:TI 1 "const_double_operand" ""))] - "TARGET_POWERPC64" + "TARGET_POWERPC64 && VECTOR_MEM_NONE_P (TImode)" [(set (match_dup 2) (match_dup 4)) (set (match_dup 3) (match_dup 5))] " @@ -9754,7 +9864,7 @@ (define_split [(set (match_operand:TI 0 "nonimmediate_operand" "") (match_operand:TI 1 "input_operand" ""))] - "reload_completed + "reload_completed && VECTOR_MEM_NONE_P (TImode) && gpr_or_gpr_p (operands[0], operands[1])" [(pc)] { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }) @@ -12647,7 +12757,8 @@ [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") (compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "d") (match_operand:DF 2 "gpc_reg_operand" "d")))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && !VECTOR_UNIT_VSX_P (DFmode)" "fcmpu %0,%1,%2" [(set_attr "type" "fpcompare")]) @@ -15320,9 +15431,19 @@ }" [(set_attr "type" "load")]) +(define_insn "bpermd_" + [(set (match_operand:P 0 "gpc_reg_operand" "=r") + (unspec:P [(match_operand:P 1 "gpc_reg_operand" "r") + (match_operand:P 2 "gpc_reg_operand" "r")] UNSPEC_BPERM))] + "TARGET_POPCNTD" + "bpermd %0,%1,%2" + [(set_attr "type" "integer")]) + + (include "sync.md") (include "vector.md") +(include "vsx.md") (include "altivec.md") (include "spe.md") (include "dfp.md") diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index ac61ffc..90af9dc 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -151,6 +151,10 @@ malign-branch-targets Target Undocumented Report Var(TARGET_ALIGN_BRANCH_TARGETS) Init(-1) ; Explicitly set/unset whether rs6000_align_branch_targets is set +mvectorize-builtins +Target Undocumented Report Var(TARGET_VECTORIZE_BUILTINS) Init(-1) +; Explicitly control whether we vectorize the builtins or not. + mupdate Target Report Var(TARGET_UPDATE) Init(1) Generate load/store with update instructions diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000 index 0b8e311..66a367a 100644 --- a/gcc/config/rs6000/t-rs6000 +++ b/gcc/config/rs6000/t-rs6000 @@ -53,6 +53,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rios1.md \ $(srcdir)/config/rs6000/power4.md \ $(srcdir)/config/rs6000/power5.md \ $(srcdir)/config/rs6000/power6.md \ + $(srcdir)/config/rs6000/power7.md \ $(srcdir)/config/rs6000/cell.md \ $(srcdir)/config/rs6000/xfpu.md \ $(srcdir)/config/rs6000/predicates.md \ @@ -60,6 +61,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rios1.md \ $(srcdir)/config/rs6000/darwin.md \ $(srcdir)/config/rs6000/sync.md \ $(srcdir)/config/rs6000/vector.md \ + $(srcdir)/config/rs6000/vsx.md \ $(srcdir)/config/rs6000/altivec.md \ $(srcdir)/config/rs6000/spe.md \ $(srcdir)/config/rs6000/dfp.md \ diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 1546db7..6366e4f 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -1,6 +1,7 @@ -;; Expander definitions for vector support. No instructions are in this file, -;; this file provides the generic vector expander, and the actual vector -;; instructions will be in altivec.md. +;; Expander definitions for vector support between altivec & vsx. No +;; instructions are in this file, this file provides the generic vector +;; expander, and the actual vector instructions will be in altivec.md and +;; vsx.md ;; Copyright (C) 2009 ;; Free Software Foundation, Inc. @@ -27,10 +28,10 @@ (define_mode_iterator VEC_I [V16QI V8HI V4SI]) ;; Vector float modes -(define_mode_iterator VEC_F [V4SF]) +(define_mode_iterator VEC_F [V4SF V2DF]) ;; Vector arithmetic modes -(define_mode_iterator VEC_A [V16QI V8HI V4SI V4SF]) +(define_mode_iterator VEC_A [V16QI V8HI V4SI V4SF V2DF]) ;; Vector modes that need alginment via permutes (define_mode_iterator VEC_K [V16QI V8HI V4SI V4SF]) @@ -41,6 +42,9 @@ ;; Vector modes for moves. Don't do TImode here. (define_mode_iterator VEC_M [V16QI V8HI V4SI V2DI V4SF V2DF]) +;; Vector modes for types that don't need a realignment under VSX +(define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF]) + ;; Vector comparison modes (define_mode_iterator VEC_C [V16QI V8HI V4SI V4SF V2DF]) @@ -75,7 +79,7 @@ (define_expand "mov" [(set (match_operand:VEC_M 0 "nonimmediate_operand" "") (match_operand:VEC_M 1 "any_operand" ""))] - "VECTOR_MEM_ALTIVEC_P (mode)" + "VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)" { if (can_create_pseudo_p ()) { @@ -89,24 +93,25 @@ } }) -;; Generic vector floating point load/store instructions. +;; Generic vector floating point load/store instructions. These will match +;; insns defined in vsx.md or altivec.md depending on the switches. (define_expand "vector_load_" [(set (match_operand:VEC_M 0 "vfloat_operand" "") (match_operand:VEC_M 1 "memory_operand" ""))] - "VECTOR_MEM_ALTIVEC_P (mode)" + "VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)" "") (define_expand "vector_store_" [(set (match_operand:VEC_M 0 "memory_operand" "") (match_operand:VEC_M 1 "vfloat_operand" ""))] - "VECTOR_MEM_ALTIVEC_P (mode)" + "VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)" "") ;; Splits if a GPR register was chosen for the move (define_split [(set (match_operand:VEC_L 0 "nonimmediate_operand" "") (match_operand:VEC_L 1 "input_operand" ""))] - "VECTOR_MEM_ALTIVEC_P (mode) + "VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && reload_completed && gpr_or_gpr_p (operands[0], operands[1])" [(pc)] @@ -149,7 +154,7 @@ (and:P (plus:P (match_operand:P 1 "gpc_reg_operand" "r") (match_operand:P 2 "reg_or_cint_operand" "rI")) (const_int -16)))] - "TARGET_ALTIVEC && (reload_in_progress || reload_completed)" + "(TARGET_ALTIVEC || TARGET_VSX) && (reload_in_progress || reload_completed)" "#" "&& reload_completed" [(set (match_dup 0) @@ -167,7 +172,7 @@ [(set (match_operand:P 0 "gpc_reg_operand" "=b") (and:P (match_operand:P 1 "gpc_reg_operand" "r") (const_int -16)))] - "TARGET_ALTIVEC && (reload_in_progress || reload_completed)" + "(TARGET_ALTIVEC || TARGET_VSX) && (reload_in_progress || reload_completed)" "#" "&& reload_completed" [(parallel [(set (match_dup 0) @@ -180,68 +185,131 @@ [(set (match_operand:VEC_F 0 "vfloat_operand" "") (plus:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") (match_operand:VEC_F 2 "vfloat_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" "") (define_expand "sub3" [(set (match_operand:VEC_F 0 "vfloat_operand" "") (minus:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") (match_operand:VEC_F 2 "vfloat_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" "") (define_expand "mul3" [(set (match_operand:VEC_F 0 "vfloat_operand" "") (mult:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") (match_operand:VEC_F 2 "vfloat_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (mode) && TARGET_FUSED_MADD" + "(VECTOR_UNIT_VSX_P (mode) + || (VECTOR_UNIT_ALTIVEC_P (mode) && TARGET_FUSED_MADD))" " { - emit_insn (gen_altivec_mulv4sf3 (operands[0], operands[1], operands[2])); - DONE; + if (mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (mode)) + { + emit_insn (gen_altivec_mulv4sf3 (operands[0], operands[1], operands[2])); + DONE; + } }") +(define_expand "div3" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (div:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") + (match_operand:VEC_F 2 "vfloat_operand" "")))] + "VECTOR_UNIT_VSX_P (mode)" + "") + (define_expand "neg2" [(set (match_operand:VEC_F 0 "vfloat_operand" "") (neg:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" " { - emit_insn (gen_altivec_negv4sf2 (operands[0], operands[1])); - DONE; + if (mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (mode)) + { + emit_insn (gen_altivec_negv4sf2 (operands[0], operands[1])); + DONE; + } }") (define_expand "abs2" [(set (match_operand:VEC_F 0 "vfloat_operand" "") (abs:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" " { - emit_insn (gen_altivec_absv4sf2 (operands[0], operands[1])); - DONE; + if (mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (mode)) + { + emit_insn (gen_altivec_absv4sf2 (operands[0], operands[1])); + DONE; + } }") (define_expand "smin3" [(set (match_operand:VEC_F 0 "register_operand" "") (smin:VEC_F (match_operand:VEC_F 1 "register_operand" "") (match_operand:VEC_F 2 "register_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" "") (define_expand "smax3" [(set (match_operand:VEC_F 0 "register_operand" "") (smax:VEC_F (match_operand:VEC_F 1 "register_operand" "") (match_operand:VEC_F 2 "register_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" "") +(define_expand "sqrt2" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (sqrt:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))] + "VECTOR_UNIT_VSX_P (mode)" + "") + (define_expand "ftrunc2" [(set (match_operand:VEC_F 0 "vfloat_operand" "") (fix:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" + "") + +(define_expand "vector_ceil2" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (unspec:VEC_F [(match_operand:VEC_F 1 "vfloat_operand" "")] + UNSPEC_FRIP))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" + "") + +(define_expand "vector_floor2" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (unspec:VEC_F [(match_operand:VEC_F 1 "vfloat_operand" "")] + UNSPEC_FRIM))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" "") +(define_expand "vector_btrunc2" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (fix:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" + "") + +(define_expand "vector_copysign3" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (if_then_else:VEC_F + (ge:VEC_F (match_operand:VEC_F 2 "vfloat_operand" "") + (match_dup 3)) + (abs:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")) + (neg:VEC_F (abs:VEC_F (match_dup 1)))))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" + " +{ + if (mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (mode)) + { + emit_insn (gen_altivec_copysign_v4sf3 (operands[0], operands[1], + operands[2])); + DONE; + } + + operands[3] = CONST0_RTX (mode); +}") + ;; Vector comparisons (define_expand "vcond" @@ -252,7 +320,7 @@ (match_operand:VEC_F 5 "vfloat_operand" "")]) (match_operand:VEC_F 1 "vfloat_operand" "") (match_operand:VEC_F 2 "vfloat_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" " { if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], @@ -302,21 +370,21 @@ [(set (match_operand:VEC_C 0 "vlogical_operand" "") (eq:VEC_C (match_operand:VEC_C 1 "vlogical_operand" "") (match_operand:VEC_C 2 "vlogical_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" "") (define_expand "vector_gt" [(set (match_operand:VEC_C 0 "vlogical_operand" "") (gt:VEC_C (match_operand:VEC_C 1 "vlogical_operand" "") (match_operand:VEC_C 2 "vlogical_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" "") (define_expand "vector_ge" [(set (match_operand:VEC_C 0 "vlogical_operand" "") (ge:VEC_C (match_operand:VEC_C 1 "vlogical_operand" "") (match_operand:VEC_C 2 "vlogical_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" "") (define_expand "vector_gtu" @@ -342,7 +410,7 @@ (const_int 0)) (match_operand:VEC_L 2 "vlogical_operand" "") (match_operand:VEC_L 1 "vlogical_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" "") (define_expand "vector_select__uns" @@ -352,7 +420,7 @@ (const_int 0)) (match_operand:VEC_L 2 "vlogical_operand" "") (match_operand:VEC_L 1 "vlogical_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" "") ;; Expansions that compare vectors producing a vector result and a predicate, @@ -366,7 +434,7 @@ (set (match_operand:VEC_A 0 "vlogical_operand" "") (eq:VEC_A (match_dup 1) (match_dup 2)))])] - "VECTOR_UNIT_ALTIVEC_P (mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" "") (define_expand "vector_gt__p" @@ -378,7 +446,7 @@ (set (match_operand:VEC_A 0 "vlogical_operand" "") (gt:VEC_A (match_dup 1) (match_dup 2)))])] - "VECTOR_UNIT_ALTIVEC_P (mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" "") (define_expand "vector_ge__p" @@ -390,7 +458,7 @@ (set (match_operand:VEC_F 0 "vfloat_operand" "") (ge:VEC_F (match_dup 1) (match_dup 2)))])] - "VECTOR_UNIT_ALTIVEC_P (mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" "") (define_expand "vector_gtu__p" @@ -402,16 +470,16 @@ (set (match_operand:VEC_I 0 "vlogical_operand" "") (gtu:VEC_I (match_dup 1) (match_dup 2)))])] - "VECTOR_UNIT_ALTIVEC_P (mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" "") -;; AltiVec predicates. +;; AltiVec/VSX predicates. (define_expand "cr6_test_for_zero" [(set (match_operand:SI 0 "register_operand" "=r") (eq:SI (reg:CC 74) (const_int 0)))] - "TARGET_ALTIVEC" + "TARGET_ALTIVEC || TARGET_VSX" "") (define_expand "cr6_test_for_zero_reverse" @@ -419,14 +487,14 @@ (eq:SI (reg:CC 74) (const_int 0))) (set (match_dup 0) (minus:SI (const_int 1) (match_dup 0)))] - "TARGET_ALTIVEC" + "TARGET_ALTIVEC || TARGET_VSX" "") (define_expand "cr6_test_for_lt" [(set (match_operand:SI 0 "register_operand" "=r") (lt:SI (reg:CC 74) (const_int 0)))] - "TARGET_ALTIVEC" + "TARGET_ALTIVEC || TARGET_VSX" "") (define_expand "cr6_test_for_lt_reverse" @@ -434,7 +502,7 @@ (lt:SI (reg:CC 74) (const_int 0))) (set (match_dup 0) (minus:SI (const_int 1) (match_dup 0)))] - "TARGET_ALTIVEC" + "TARGET_ALTIVEC || TARGET_VSX" "") @@ -443,82 +511,94 @@ [(set (match_operand:VEC_L 0 "vlogical_operand" "") (xor:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "") (match_operand:VEC_L 2 "vlogical_operand" "")))] - "VECTOR_MEM_ALTIVEC_P (mode)" + "VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)" "") (define_expand "ior3" [(set (match_operand:VEC_L 0 "vlogical_operand" "") (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "") (match_operand:VEC_L 2 "vlogical_operand" "")))] - "VECTOR_MEM_ALTIVEC_P (mode)" + "VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)" "") (define_expand "and3" [(set (match_operand:VEC_L 0 "vlogical_operand" "") (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "") (match_operand:VEC_L 2 "vlogical_operand" "")))] - "VECTOR_MEM_ALTIVEC_P (mode)" + "VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)" "") (define_expand "one_cmpl2" [(set (match_operand:VEC_L 0 "vlogical_operand" "") (not:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")))] - "VECTOR_MEM_ALTIVEC_P (mode)" + "VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)" "") (define_expand "nor3" [(set (match_operand:VEC_L 0 "vlogical_operand" "") (not:VEC_L (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "") (match_operand:VEC_L 2 "vlogical_operand" ""))))] - "VECTOR_MEM_ALTIVEC_P (mode)" + "VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)" "") (define_expand "andc3" [(set (match_operand:VEC_L 0 "vlogical_operand" "") (and:VEC_L (not:VEC_L (match_operand:VEC_L 2 "vlogical_operand" "")) (match_operand:VEC_L 1 "vlogical_operand" "")))] - "VECTOR_MEM_ALTIVEC_P (mode)" + "VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)" "") ;; Same size conversions (define_expand "float2" [(set (match_operand:VEC_F 0 "vfloat_operand" "") (float:VEC_F (match_operand: 1 "vint_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" " { - emit_insn (gen_altivec_vcfsx (operands[0], operands[1], const0_rtx)); - DONE; + if (mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (mode)) + { + emit_insn (gen_altivec_vcfsx (operands[0], operands[1], const0_rtx)); + DONE; + } }") (define_expand "unsigned_float2" [(set (match_operand:VEC_F 0 "vfloat_operand" "") (unsigned_float:VEC_F (match_operand: 1 "vint_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" " { - emit_insn (gen_altivec_vcfux (operands[0], operands[1], const0_rtx)); - DONE; + if (mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (mode)) + { + emit_insn (gen_altivec_vcfux (operands[0], operands[1], const0_rtx)); + DONE; + } }") (define_expand "fix_trunc2" [(set (match_operand: 0 "vint_operand" "") (fix: (match_operand:VEC_F 1 "vfloat_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" " { - emit_insn (gen_altivec_vctsxs (operands[0], operands[1], const0_rtx)); - DONE; + if (mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (mode)) + { + emit_insn (gen_altivec_vctsxs (operands[0], operands[1], const0_rtx)); + DONE; + } }") (define_expand "fixuns_trunc2" [(set (match_operand: 0 "vint_operand" "") (unsigned_fix: (match_operand:VEC_F 1 "vfloat_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" " { - emit_insn (gen_altivec_vctuxs (operands[0], operands[1], const0_rtx)); - DONE; + if (mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (mode)) + { + emit_insn (gen_altivec_vctuxs (operands[0], operands[1], const0_rtx)); + DONE; + } }") @@ -526,7 +606,7 @@ (define_expand "vec_init" [(match_operand:VEC_E 0 "vlogical_operand" "") (match_operand:VEC_E 1 "" "")] - "VECTOR_MEM_ALTIVEC_P (mode)" + "VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)" { rs6000_expand_vector_init (operands[0], operands[1]); DONE; @@ -536,7 +616,7 @@ [(match_operand:VEC_E 0 "vlogical_operand" "") (match_operand: 1 "register_operand" "") (match_operand 2 "const_int_operand" "")] - "VECTOR_MEM_ALTIVEC_P (mode)" + "VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)" { rs6000_expand_vector_set (operands[0], operands[1], INTVAL (operands[2])); DONE; @@ -546,7 +626,7 @@ [(match_operand: 0 "register_operand" "") (match_operand:VEC_E 1 "vlogical_operand" "") (match_operand 2 "const_int_operand" "")] - "VECTOR_MEM_ALTIVEC_P (mode)" + "VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)" { rs6000_expand_vector_extract (operands[0], operands[1], INTVAL (operands[2])); @@ -568,7 +648,7 @@ (const_int 3) (const_int 1)])) (const_int 5)))] - "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)" "") (define_expand "vec_interleave_lowv4sf" @@ -585,23 +665,171 @@ (const_int 1) (const_int 3)])) (const_int 5)))] - "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)" + "") + +(define_expand "vec_interleave_highv2df" + [(set (match_operand:V2DF 0 "vfloat_operand" "") + (vec_concat:V2DF + (vec_select:DF (match_operand:V2DF 1 "vfloat_operand" "") + (parallel [(const_int 0)])) + (vec_select:DF (match_operand:V2DF 2 "vfloat_operand" "") + (parallel [(const_int 0)]))))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "") + +(define_expand "vec_interleave_lowv2df" + [(set (match_operand:V2DF 0 "vfloat_operand" "") + (vec_concat:V2DF + (vec_select:DF (match_operand:V2DF 1 "vfloat_operand" "") + (parallel [(const_int 1)])) + (vec_select:DF (match_operand:V2DF 2 "vfloat_operand" "") + (parallel [(const_int 1)]))))] + "VECTOR_UNIT_VSX_P (V2DFmode)" "") +;; Convert double word types to single word types +(define_expand "vec_pack_trunc_v2df" + [(match_operand:V4SF 0 "vfloat_operand" "") + (match_operand:V2DF 1 "vfloat_operand" "") + (match_operand:V2DF 2 "vfloat_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode) && TARGET_ALTIVEC" +{ + rtx r1 = gen_reg_rtx (V4SFmode); + rtx r2 = gen_reg_rtx (V4SFmode); + + emit_insn (gen_vsx_xvcvdpsp (r1, operands[1])); + emit_insn (gen_vsx_xvcvdpsp (r2, operands[2])); + emit_insn (gen_vec_extract_evenv4sf (operands[0], r1, r2)); + DONE; +}) + +(define_expand "vec_pack_sfix_trunc_v2df" + [(match_operand:V4SI 0 "vint_operand" "") + (match_operand:V2DF 1 "vfloat_operand" "") + (match_operand:V2DF 2 "vfloat_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode) && TARGET_ALTIVEC" +{ + rtx r1 = gen_reg_rtx (V4SImode); + rtx r2 = gen_reg_rtx (V4SImode); + + emit_insn (gen_vsx_xvcvdpsxws (r1, operands[1])); + emit_insn (gen_vsx_xvcvdpsxws (r2, operands[2])); + emit_insn (gen_vec_extract_evenv4si (operands[0], r1, r2)); + DONE; +}) + +(define_expand "vec_pack_ufix_trunc_v2df" + [(match_operand:V4SI 0 "vint_operand" "") + (match_operand:V2DF 1 "vfloat_operand" "") + (match_operand:V2DF 2 "vfloat_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode) && TARGET_ALTIVEC" +{ + rtx r1 = gen_reg_rtx (V4SImode); + rtx r2 = gen_reg_rtx (V4SImode); + + emit_insn (gen_vsx_xvcvdpuxws (r1, operands[1])); + emit_insn (gen_vsx_xvcvdpuxws (r2, operands[2])); + emit_insn (gen_vec_extract_evenv4si (operands[0], r1, r2)); + DONE; +}) + +;; Convert single word types to double word +(define_expand "vec_unpacks_hi_v4sf" + [(match_operand:V2DF 0 "vfloat_operand" "") + (match_operand:V4SF 1 "vfloat_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)" +{ + rtx reg = gen_reg_rtx (V4SFmode); + + emit_insn (gen_vec_interleave_highv4sf (reg, operands[1], operands[1])); + emit_insn (gen_vsx_xvcvspdp (operands[0], reg)); + DONE; +}) + +(define_expand "vec_unpacks_lo_v4sf" + [(match_operand:V2DF 0 "vfloat_operand" "") + (match_operand:V4SF 1 "vfloat_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)" +{ + rtx reg = gen_reg_rtx (V4SFmode); + + emit_insn (gen_vec_interleave_lowv4sf (reg, operands[1], operands[1])); + emit_insn (gen_vsx_xvcvspdp (operands[0], reg)); + DONE; +}) + +(define_expand "vec_unpacks_float_hi_v4si" + [(match_operand:V2DF 0 "vfloat_operand" "") + (match_operand:V4SI 1 "vint_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)" +{ + rtx reg = gen_reg_rtx (V4SImode); + + emit_insn (gen_vec_interleave_highv4si (reg, operands[1], operands[1])); + emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg)); + DONE; +}) + +(define_expand "vec_unpacks_float_lo_v4si" + [(match_operand:V2DF 0 "vfloat_operand" "") + (match_operand:V4SI 1 "vint_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)" +{ + rtx reg = gen_reg_rtx (V4SImode); + + emit_insn (gen_vec_interleave_lowv4si (reg, operands[1], operands[1])); + emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg)); + DONE; +}) + +(define_expand "vec_unpacku_float_hi_v4si" + [(match_operand:V2DF 0 "vfloat_operand" "") + (match_operand:V4SI 1 "vint_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)" +{ + rtx reg = gen_reg_rtx (V4SImode); + + emit_insn (gen_vec_interleave_highv4si (reg, operands[1], operands[1])); + emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg)); + DONE; +}) + +(define_expand "vec_unpacku_float_lo_v4si" + [(match_operand:V2DF 0 "vfloat_operand" "") + (match_operand:V4SI 1 "vint_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)" +{ + rtx reg = gen_reg_rtx (V4SImode); + + emit_insn (gen_vec_interleave_lowv4si (reg, operands[1], operands[1])); + emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg)); + DONE; +}) + + ;; Align vector loads with a permute. (define_expand "vec_realign_load_" [(match_operand:VEC_K 0 "vlogical_operand" "") (match_operand:VEC_K 1 "vlogical_operand" "") (match_operand:VEC_K 2 "vlogical_operand" "") (match_operand:V16QI 3 "vlogical_operand" "")] - "VECTOR_MEM_ALTIVEC_P (mode)" + "VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)" { emit_insn (gen_altivec_vperm_ (operands[0], operands[1], operands[2], operands[3])); DONE; }) +;; Under VSX, vectors of 4/8 byte alignments do not need to be aligned +;; since the load already handles it. +(define_expand "movmisalign" + [(set (match_operand:VEC_N 0 "vfloat_operand" "") + (match_operand:VEC_N 1 "vfloat_operand" ""))] + "VECTOR_MEM_VSX_P (mode) && TARGET_ALLOW_MOVMISALIGN" + "") + ;; Vector shift left in bits. Currently supported ony for shift ;; amounts that can be expressed as byte shifts (divisible by 8). @@ -627,9 +855,18 @@ if (bitshift_val & 0x7) FAIL; byteshift_val = bitshift_val >> 3; - shift = gen_rtx_CONST_INT (QImode, byteshift_val); - insn = gen_altivec_vsldoi_ (operands[0], operands[1], operands[1], - shift); + if (TARGET_VSX && (byteshift_val & 0x3) == 0) + { + shift = gen_rtx_CONST_INT (QImode, byteshift_val >> 2); + insn = gen_vsx_xxsldwi_ (operands[0], operands[1], operands[1], + shift); + } + else + { + shift = gen_rtx_CONST_INT (QImode, byteshift_val); + insn = gen_altivec_vsldoi_ (operands[0], operands[1], operands[1], + shift); + } emit_insn (insn); DONE; @@ -659,9 +896,18 @@ if (bitshift_val & 0x7) FAIL; byteshift_val = 16 - (bitshift_val >> 3); - shift = gen_rtx_CONST_INT (QImode, byteshift_val); - insn = gen_altivec_vsldoi_ (operands[0], operands[1], operands[1], - shift); + if (TARGET_VSX && (byteshift_val & 0x3) == 0) + { + shift = gen_rtx_CONST_INT (QImode, byteshift_val >> 2); + insn = gen_vsx_xxsldwi_ (operands[0], operands[1], operands[1], + shift); + } + else + { + shift = gen_rtx_CONST_INT (QImode, byteshift_val); + insn = gen_altivec_vsldoi_ (operands[0], operands[1], operands[1], + shift); + } emit_insn (insn); DONE; diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md new file mode 100644 index 0000000..c6aafa6f --- /dev/null +++ b/gcc/config/rs6000/vsx.md @@ -0,0 +1,1339 @@ +;; VSX patterns. +;; Copyright (C) 2009 +;; Free Software Foundation, Inc. +;; Contributed by Michael Meissner + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Iterator for both scalar and vector floating point types supported by VSX +(define_mode_iterator VSX_B [DF V4SF V2DF]) + +;; Iterator for the 2 64-bit vector types +(define_mode_iterator VSX_D [V2DF V2DI]) + +;; Iterator for the 2 32-bit vector types +(define_mode_iterator VSX_W [V4SF V4SI]) + +;; Iterator for vector floating point types supported by VSX +(define_mode_iterator VSX_F [V4SF V2DF]) + +;; Iterator for logical types supported by VSX +(define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF TI]) + +;; Iterator for memory move. Handle TImode specially to allow +;; it to use gprs as well as vsx registers. +(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF]) + +;; Iterator for types for load/store with update +(define_mode_iterator VSX_U [V16QI V8HI V4SI V2DI V4SF V2DF TI DF]) + +;; Map into the appropriate load/store name based on the type +(define_mode_attr VSm [(V16QI "vw4") + (V8HI "vw4") + (V4SI "vw4") + (V4SF "vw4") + (V2DF "vd2") + (V2DI "vd2") + (DF "d") + (TI "vw4")]) + +;; Map into the appropriate suffix based on the type +(define_mode_attr VSs [(V16QI "sp") + (V8HI "sp") + (V4SI "sp") + (V4SF "sp") + (V2DF "dp") + (V2DI "dp") + (DF "dp") + (SF "sp") + (TI "sp")]) + +;; Map the register class used +(define_mode_attr VSr [(V16QI "v") + (V8HI "v") + (V4SI "v") + (V4SF "wf") + (V2DI "wd") + (V2DF "wd") + (DF "ws") + (SF "d") + (TI "wd")]) + +;; Map the register class used for float<->int conversions +(define_mode_attr VSr2 [(V2DF "wd") + (V4SF "wf") + (DF "!f#r")]) + +(define_mode_attr VSr3 [(V2DF "wa") + (V4SF "wa") + (DF "!f#r")]) + +;; Map the register class for sp<->dp float conversions, destination +(define_mode_attr VSr4 [(SF "ws") + (DF "f") + (V2DF "wd") + (V4SF "v")]) + +;; Map the register class for sp<->dp float conversions, destination +(define_mode_attr VSr5 [(SF "ws") + (DF "f") + (V2DF "v") + (V4SF "wd")]) + +;; Same size integer type for floating point data +(define_mode_attr VSi [(V4SF "v4si") + (V2DF "v2di") + (DF "di")]) + +(define_mode_attr VSI [(V4SF "V4SI") + (V2DF "V2DI") + (DF "DI")]) + +;; Word size for same size conversion +(define_mode_attr VSc [(V4SF "w") + (V2DF "d") + (DF "d")]) + +;; Bitsize for DF load with update +(define_mode_attr VSbit [(SI "32") + (DI "64")]) + +;; Map into either s or v, depending on whether this is a scalar or vector +;; operation +(define_mode_attr VSv [(V16QI "v") + (V8HI "v") + (V4SI "v") + (V4SF "v") + (V2DI "v") + (V2DF "v") + (TI "v") + (DF "s")]) + +;; Appropriate type for add ops (and other simple FP ops) +(define_mode_attr VStype_simple [(V2DF "vecfloat") + (V4SF "vecfloat") + (DF "fp")]) + +(define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d") + (V4SF "fp_addsub_s") + (DF "fp_addsub_d")]) + +;; Appropriate type for multiply ops +(define_mode_attr VStype_mul [(V2DF "vecfloat") + (V4SF "vecfloat") + (DF "dmul")]) + +(define_mode_attr VSfptype_mul [(V2DF "fp_mul_d") + (V4SF "fp_mul_s") + (DF "fp_mul_d")]) + +;; Appropriate type for divide ops. For now, just lump the vector divide with +;; the scalar divides +(define_mode_attr VStype_div [(V2DF "ddiv") + (V4SF "sdiv") + (DF "ddiv")]) + +(define_mode_attr VSfptype_div [(V2DF "fp_div_d") + (V4SF "fp_div_s") + (DF "fp_div_d")]) + +;; Appropriate type for sqrt ops. For now, just lump the vector sqrt with +;; the scalar sqrt +(define_mode_attr VStype_sqrt [(V2DF "dsqrt") + (V4SF "sdiv") + (DF "ddiv")]) + +(define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d") + (V4SF "fp_sqrt_s") + (DF "fp_sqrt_d")]) + +;; Iterator and modes for sp<->dp conversions +;; Because scalar SF values are represented internally as double, use the +;; V4SF type to represent this than SF. +(define_mode_iterator VSX_SPDP [DF V4SF V2DF]) + +(define_mode_attr VS_spdp_res [(DF "V4SF") + (V4SF "V2DF") + (V2DF "V4SF")]) + +(define_mode_attr VS_spdp_insn [(DF "xscvdpsp") + (V4SF "xvcvspdp") + (V2DF "xvcvdpsp")]) + +(define_mode_attr VS_spdp_type [(DF "fp") + (V4SF "vecfloat") + (V2DF "vecfloat")]) + +;; Map the scalar mode for a vector type +(define_mode_attr VS_scalar [(V2DF "DF") + (V2DI "DI") + (V4SF "SF") + (V4SI "SI") + (V8HI "HI") + (V16QI "QI")]) + +;; Appropriate type for load + update +(define_mode_attr VStype_load_update [(V16QI "vecload") + (V8HI "vecload") + (V4SI "vecload") + (V4SF "vecload") + (V2DI "vecload") + (V2DF "vecload") + (TI "vecload") + (DF "fpload")]) + +;; Appropriate type for store + update +(define_mode_attr VStype_store_update [(V16QI "vecstore") + (V8HI "vecstore") + (V4SI "vecstore") + (V4SF "vecstore") + (V2DI "vecstore") + (V2DF "vecstore") + (TI "vecstore") + (DF "fpstore")]) + +;; Constants for creating unspecs +(define_constants + [(UNSPEC_VSX_CONCAT 500) + (UNSPEC_VSX_CVDPSXWS 501) + (UNSPEC_VSX_CVDPUXWS 502) + (UNSPEC_VSX_CVSPDP 503) + (UNSPEC_VSX_CVSXWDP 504) + (UNSPEC_VSX_CVUXWDP 505) + (UNSPEC_VSX_CVSXDSP 506) + (UNSPEC_VSX_CVUXDSP 507) + (UNSPEC_VSX_CVSPSXDS 508) + (UNSPEC_VSX_CVSPUXDS 509) + (UNSPEC_VSX_MADD 510) + (UNSPEC_VSX_MSUB 511) + (UNSPEC_VSX_NMADD 512) + (UNSPEC_VSX_NMSUB 513) + (UNSPEC_VSX_RSQRTE 514) + (UNSPEC_VSX_TDIV 515) + (UNSPEC_VSX_TSQRT 516) + (UNSPEC_VSX_XXPERMDI 517) + (UNSPEC_VSX_SET 518) + (UNSPEC_VSX_ROUND_I 519) + (UNSPEC_VSX_ROUND_IC 520) + (UNSPEC_VSX_SLDWI 521)]) + +;; VSX moves +(define_insn "*vsx_mov" + [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,,,?Z,?wa,?wa,*o,*r,*r,,?wa,v,wZ,v") + (match_operand:VSX_M 1 "input_operand" ",Z,,wa,Z,wa,r,o,r,j,j,W,v,wZ"))] + "VECTOR_MEM_VSX_P (mode) + && (register_operand (operands[0], mode) + || register_operand (operands[1], mode))" +{ + switch (which_alternative) + { + case 0: + case 3: + return "stx%U0x %x1,%y0"; + + case 1: + case 4: + return "lx%U0x %x0,%y1"; + + case 2: + case 5: + return "xxlor %x0,%x1,%x1"; + + case 6: + case 7: + case 8: + return "#"; + + case 9: + case 10: + return "xxlxor %x0,%x0,%x0"; + + case 11: + return output_vec_const_move (operands); + + case 12: + return "stvx %1,%y0"; + + case 13: + return "lvx %0,%y1"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,*,*,*,vecsimple,vecsimple,*,vecstore,vecload")]) + +;; Unlike other VSX moves, allow the GPRs, since a normal use of TImode is for +;; unions. However for plain data movement, slightly favor the vector loads +(define_insn "*vsx_movti" + [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,?o,?r,?r,wa,v,v,wZ") + (match_operand:TI 1 "input_operand" "wa,Z,wa,r,o,r,j,W,wZ,v"))] + "VECTOR_MEM_VSX_P (TImode) + && (register_operand (operands[0], TImode) + || register_operand (operands[1], TImode))" +{ + switch (which_alternative) + { + case 0: + return "stxvd2%U0x %x1,%y0"; + + case 1: + return "lxvd2%U0x %x0,%y1"; + + case 2: + return "xxlor %x0,%x1,%x1"; + + case 3: + case 4: + case 5: + return "#"; + + case 6: + return "xxlxor %x0,%x0,%x0"; + + case 7: + return output_vec_const_move (operands); + + case 8: + return "stvx %1,%y0"; + + case 9: + return "lvx %0,%y1"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "vecstore,vecload,vecsimple,*,*,*,vecsimple,*,vecstore,vecload")]) + +;; Load/store with update +;; Define insns that do load or store with update. Because VSX only has +;; reg+reg addressing, pre-decrement or pre-increment is unlikely to be +;; generated. +;; +;; In all these cases, we use operands 0 and 1 for the register being +;; incremented because those are the operands that local-alloc will +;; tie and these are the pair most likely to be tieable (and the ones +;; that will benefit the most). + +(define_insn "*vsx_load_update_" + [(set (match_operand:VSX_U 3 "vsx_register_operand" "=,?wa") + (mem:VSX_U (plus:P (match_operand:P 1 "gpc_reg_operand" "0,0") + (match_operand:P 2 "gpc_reg_operand" "r,r")))) + (set (match_operand:P 0 "gpc_reg_operand" "=b,b") + (plus:P (match_dup 1) + (match_dup 2)))] + " && TARGET_UPDATE && VECTOR_MEM_VSX_P (mode)" + "lxux %x3,%0,%2" + [(set_attr "type" "")]) + +(define_insn "*vsx_store_update_" + [(set (mem:VSX_U (plus:P (match_operand:P 1 "gpc_reg_operand" "0,0") + (match_operand:P 2 "gpc_reg_operand" "r,r"))) + (match_operand:VSX_U 3 "gpc_reg_operand" ",?wa")) + (set (match_operand:P 0 "gpc_reg_operand" "=b,b") + (plus:P (match_dup 1) + (match_dup 2)))] + " && TARGET_UPDATE && VECTOR_MEM_VSX_P (mode)" + "stxux %x3,%0,%2" + [(set_attr "type" "")]) + +;; We may need to have a varient on the pattern for use in the prologue +;; that doesn't depend on TARGET_UPDATE. + + +;; VSX scalar and vector floating point arithmetic instructions +(define_insn "*vsx_add3" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") + (plus:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" ",wa") + (match_operand:VSX_B 2 "vsx_register_operand" ",wa")))] + "VECTOR_UNIT_VSX_P (mode)" + "xadd %x0,%x1,%x2" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "*vsx_sub3" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") + (minus:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" ",wa") + (match_operand:VSX_B 2 "vsx_register_operand" ",wa")))] + "VECTOR_UNIT_VSX_P (mode)" + "xsub %x0,%x1,%x2" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "*vsx_mul3" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") + (mult:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" ",wa") + (match_operand:VSX_B 2 "vsx_register_operand" ",wa")))] + "VECTOR_UNIT_VSX_P (mode)" + "xmul %x0,%x1,%x2" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "*vsx_div3" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") + (div:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" ",wa") + (match_operand:VSX_B 2 "vsx_register_operand" ",wa")))] + "VECTOR_UNIT_VSX_P (mode)" + "xdiv %x0,%x1,%x2" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +;; *tdiv* instruction returning the FG flag +(define_expand "vsx_tdiv3_fg" + [(set (match_dup 3) + (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "") + (match_operand:VSX_B 2 "vsx_register_operand" "")] + UNSPEC_VSX_TDIV)) + (set (match_operand:SI 0 "gpc_reg_operand" "") + (gt:SI (match_dup 3) + (const_int 0)))] + "VECTOR_UNIT_VSX_P (mode)" +{ + operands[3] = gen_reg_rtx (CCFPmode); +}) + +;; *tdiv* instruction returning the FE flag +(define_expand "vsx_tdiv3_fe" + [(set (match_dup 3) + (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "") + (match_operand:VSX_B 2 "vsx_register_operand" "")] + UNSPEC_VSX_TDIV)) + (set (match_operand:SI 0 "gpc_reg_operand" "") + (eq:SI (match_dup 3) + (const_int 0)))] + "VECTOR_UNIT_VSX_P (mode)" +{ + operands[3] = gen_reg_rtx (CCFPmode); +}) + +(define_insn "*vsx_tdiv3_internal" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x") + (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" ",wa") + (match_operand:VSX_B 2 "vsx_register_operand" ",wa")] + UNSPEC_VSX_TDIV))] + "VECTOR_UNIT_VSX_P (mode)" + "xtdiv %0,%x1,%x2" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "vsx_fre2" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") + (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" ",wa")] + UNSPEC_FRES))] + "VECTOR_UNIT_VSX_P (mode)" + "xre %x0,%x1" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "*vsx_neg2" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") + (neg:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" ",wa")))] + "VECTOR_UNIT_VSX_P (mode)" + "xneg %x0,%x1" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "*vsx_abs2" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") + (abs:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" ",wa")))] + "VECTOR_UNIT_VSX_P (mode)" + "xabs %x0,%x1" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "vsx_nabs2" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") + (neg:VSX_B + (abs:VSX_B + (match_operand:VSX_B 1 "vsx_register_operand" ",wa"))))] + "VECTOR_UNIT_VSX_P (mode)" + "xnabs %x0,%x1" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "vsx_smax3" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") + (smax:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" ",wa") + (match_operand:VSX_B 2 "vsx_register_operand" ",wa")))] + "VECTOR_UNIT_VSX_P (mode)" + "xmax %x0,%x1,%x2" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "*vsx_smin3" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") + (smin:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" ",wa") + (match_operand:VSX_B 2 "vsx_register_operand" ",wa")))] + "VECTOR_UNIT_VSX_P (mode)" + "xmin %x0,%x1,%x2" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "*vsx_sqrt2" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") + (sqrt:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" ",wa")))] + "VECTOR_UNIT_VSX_P (mode)" + "xsqrt %x0,%x1" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "vsx_rsqrte2" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") + (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" ",wa")] + UNSPEC_VSX_RSQRTE))] + "VECTOR_UNIT_VSX_P (mode)" + "xrsqrte %x0,%x1" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +;; *tsqrt* returning the fg flag +(define_expand "vsx_tsqrt2_fg" + [(set (match_dup 3) + (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")] + UNSPEC_VSX_TSQRT)) + (set (match_operand:SI 0 "gpc_reg_operand" "") + (gt:SI (match_dup 3) + (const_int 0)))] + "VECTOR_UNIT_VSX_P (mode)" +{ + operands[3] = gen_reg_rtx (CCFPmode); +}) + +;; *tsqrt* returning the fe flag +(define_expand "vsx_tsqrt2_fe" + [(set (match_dup 3) + (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")] + UNSPEC_VSX_TSQRT)) + (set (match_operand:SI 0 "gpc_reg_operand" "") + (eq:SI (match_dup 3) + (const_int 0)))] + "VECTOR_UNIT_VSX_P (mode)" +{ + operands[3] = gen_reg_rtx (CCFPmode); +}) + +(define_insn "*vsx_tsqrt2_internal" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x") + (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" ",wa")] + UNSPEC_VSX_TSQRT))] + "VECTOR_UNIT_VSX_P (mode)" + "xtsqrt %0,%x1" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +;; Fused vector multiply/add instructions + +;; Note we have a pattern for the multiply/add operations that uses unspec and +;; does not check -mfused-madd to allow users to use these ops when they know +;; they want the fused multiply/add. + +(define_expand "vsx_fmadd4" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "") + (plus:VSX_B + (mult:VSX_B + (match_operand:VSX_B 1 "vsx_register_operand" "") + (match_operand:VSX_B 2 "vsx_register_operand" "")) + (match_operand:VSX_B 3 "vsx_register_operand" "")))] + "VECTOR_UNIT_VSX_P (mode)" +{ + if (!TARGET_FUSED_MADD) + { + emit_insn (gen_vsx_fmadd4_2 (operands[0], operands[1], operands[2], + operands[3])); + DONE; + } +}) + +(define_insn "*vsx_fmadd4_1" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,,?wa,?wa") + (plus:VSX_B + (mult:VSX_B + (match_operand:VSX_B 1 "vsx_register_operand" "%,,wa,wa") + (match_operand:VSX_B 2 "vsx_register_operand" ",0,wa,0")) + (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa")))] + "VECTOR_UNIT_VSX_P (mode) && TARGET_FUSED_MADD" + "@ + xmadda %x0,%x1,%x2 + xmaddm %x0,%x1,%x3 + xmadda %x0,%x1,%x2 + xmaddm %x0,%x1,%x3" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "vsx_fmadd4_2" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,,?wa,?wa") + (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "%,,wa,wa") + (match_operand:VSX_B 2 "vsx_register_operand" ",0,wa,0") + (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa")] + UNSPEC_VSX_MADD))] + "VECTOR_UNIT_VSX_P (mode)" + "@ + xmadda %x0,%x1,%x2 + xmaddm %x0,%x1,%x3 + xmadda %x0,%x1,%x2 + xmaddm %x0,%x1,%x3" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_expand "vsx_fmsub4" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "") + (minus:VSX_B + (mult:VSX_B + (match_operand:VSX_B 1 "vsx_register_operand" "") + (match_operand:VSX_B 2 "vsx_register_operand" "")) + (match_operand:VSX_B 3 "vsx_register_operand" "")))] + "VECTOR_UNIT_VSX_P (mode)" +{ + if (!TARGET_FUSED_MADD) + { + emit_insn (gen_vsx_fmsub4_2 (operands[0], operands[1], operands[2], + operands[3])); + DONE; + } +}) + +(define_insn "*vsx_fmsub4_1" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,,?wa,?wa") + (minus:VSX_B + (mult:VSX_B + (match_operand:VSX_B 1 "vsx_register_operand" "%,,wa,wa") + (match_operand:VSX_B 2 "vsx_register_operand" ",0,wa,0")) + (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa")))] + "VECTOR_UNIT_VSX_P (mode) && TARGET_FUSED_MADD" + "@ + xmsuba %x0,%x1,%x2 + xmsubm %x0,%x1,%x3 + xmsuba %x0,%x1,%x2 + xmsubm %x0,%x1,%x3" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "vsx_fmsub4_2" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,,?wa,?wa") + (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "%,,wa,wa") + (match_operand:VSX_B 2 "vsx_register_operand" ",0,wa,0") + (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa")] + UNSPEC_VSX_MSUB))] + "VECTOR_UNIT_VSX_P (mode)" + "@ + xmsuba %x0,%x1,%x2 + xmsubm %x0,%x1,%x3 + xmsuba %x0,%x1,%x2 + xmsubm %x0,%x1,%x3" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_expand "vsx_fnmadd4" + [(match_operand:VSX_B 0 "vsx_register_operand" "") + (match_operand:VSX_B 1 "vsx_register_operand" "") + (match_operand:VSX_B 2 "vsx_register_operand" "") + (match_operand:VSX_B 3 "vsx_register_operand" "")] + "VECTOR_UNIT_VSX_P (mode)" +{ + if (TARGET_FUSED_MADD && HONOR_SIGNED_ZEROS (DFmode)) + { + emit_insn (gen_vsx_fnmadd4_1 (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } + else if (TARGET_FUSED_MADD && !HONOR_SIGNED_ZEROS (DFmode)) + { + emit_insn (gen_vsx_fnmadd4_2 (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } + else + { + emit_insn (gen_vsx_fnmadd4_3 (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } +}) + +(define_insn "vsx_fnmadd4_1" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,,?wa,?wa") + (neg:VSX_B + (plus:VSX_B + (mult:VSX_B + (match_operand:VSX_B 1 "vsx_register_operand" ",,wa,wa") + (match_operand:VSX_B 2 "vsx_register_operand" ",0,wa,0")) + (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa"))))] + "VECTOR_UNIT_VSX_P (mode) && TARGET_FUSED_MADD + && HONOR_SIGNED_ZEROS (DFmode)" + "@ + xnmadda %x0,%x1,%x2 + xnmaddm %x0,%x1,%x3 + xnmadda %x0,%x1,%x2 + xnmaddm %x0,%x1,%x3" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "vsx_fnmadd4_2" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,,?wa,?wa") + (minus:VSX_B + (mult:VSX_B + (neg:VSX_B + (match_operand:VSX_B 1 "gpc_reg_operand" ",,wa,wa")) + (match_operand:VSX_B 2 "gpc_reg_operand" ",0,wa,0")) + (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa")))] + "VECTOR_UNIT_VSX_P (mode) && TARGET_FUSED_MADD + && !HONOR_SIGNED_ZEROS (DFmode)" + "@ + xnmadda %x0,%x1,%x2 + xnmaddm %x0,%x1,%x3 + xnmadda %x0,%x1,%x2 + xnmaddm %x0,%x1,%x3" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "vsx_fnmadd4_3" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,,?wa,?wa") + (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" ",,wa,wa") + (match_operand:VSX_B 2 "vsx_register_operand" ",0,wa,0") + (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa")] + UNSPEC_VSX_NMADD))] + "VECTOR_UNIT_VSX_P (mode)" + "@ + xnmadda %x0,%x1,%x2 + xnmaddm %x0,%x1,%x3 + xnmadda %x0,%x1,%x2 + xnmaddm %x0,%x1,%x3" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_expand "vsx_fnmsub4" + [(match_operand:VSX_B 0 "vsx_register_operand" "") + (match_operand:VSX_B 1 "vsx_register_operand" "") + (match_operand:VSX_B 2 "vsx_register_operand" "") + (match_operand:VSX_B 3 "vsx_register_operand" "")] + "VECTOR_UNIT_VSX_P (mode)" +{ + if (TARGET_FUSED_MADD && HONOR_SIGNED_ZEROS (DFmode)) + { + emit_insn (gen_vsx_fnmsub4_1 (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } + else if (TARGET_FUSED_MADD && !HONOR_SIGNED_ZEROS (DFmode)) + { + emit_insn (gen_vsx_fnmsub4_2 (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } + else + { + emit_insn (gen_vsx_fnmsub4_3 (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } +}) + +(define_insn "vsx_fnmsub4_1" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,,?wa,?wa") + (neg:VSX_B + (minus:VSX_B + (mult:VSX_B + (match_operand:VSX_B 1 "vsx_register_operand" "%,,wa,wa") + (match_operand:VSX_B 2 "vsx_register_operand" ",0,wa,0")) + (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa"))))] + "VECTOR_UNIT_VSX_P (mode) && TARGET_FUSED_MADD + && HONOR_SIGNED_ZEROS (DFmode)" + "@ + xnmsuba %x0,%x1,%x2 + xnmsubm %x0,%x1,%x3 + xnmsuba %x0,%x1,%x2 + xnmsubm %x0,%x1,%x3" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "vsx_fnmsub4_2" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,,?wa,?wa") + (minus:VSX_B + (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa") + (mult:VSX_B + (match_operand:VSX_B 1 "vsx_register_operand" "%,,wa,wa") + (match_operand:VSX_B 2 "vsx_register_operand" ",0,wa,0"))))] + "VECTOR_UNIT_VSX_P (mode) && TARGET_FUSED_MADD + && !HONOR_SIGNED_ZEROS (DFmode)" + "@ + xnmsuba %x0,%x1,%x2 + xnmsubm %x0,%x1,%x3 + xnmsuba %x0,%x1,%x2 + xnmsubm %x0,%x1,%x3" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "vsx_fnmsub4_3" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,,?wa,?wa") + (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "%,,wa,wa") + (match_operand:VSX_B 2 "vsx_register_operand" ",0,wa,0") + (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa")] + UNSPEC_VSX_NMSUB))] + "VECTOR_UNIT_VSX_P (mode)" + "@ + xnmsuba %x0,%x1,%x2 + xnmsubm %x0,%x1,%x3 + xnmsuba %x0,%x1,%x2 + xnmsubm %x0,%x1,%x3" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +;; Vector conditional expressions (no scalar version for these instructions) +(define_insn "vsx_eq" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=,?wa") + (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" ",wa") + (match_operand:VSX_F 2 "vsx_register_operand" ",wa")))] + "VECTOR_UNIT_VSX_P (mode)" + "xvcmpeq %x0,%x1,%x2" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "vsx_gt" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=,?wa") + (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" ",wa") + (match_operand:VSX_F 2 "vsx_register_operand" ",wa")))] + "VECTOR_UNIT_VSX_P (mode)" + "xvcmpgt %x0,%x1,%x2" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "*vsx_ge" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=,?wa") + (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" ",wa") + (match_operand:VSX_F 2 "vsx_register_operand" ",wa")))] + "VECTOR_UNIT_VSX_P (mode)" + "xvcmpge %x0,%x1,%x2" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +;; Floating point scalar compare +(define_insn "*vsx_cmpdf_internal1" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y,?y") + (compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "ws,wa") + (match_operand:DF 2 "gpc_reg_operand" "ws,wa")))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && VECTOR_UNIT_VSX_P (DFmode)" + "xscmpudp %0,%x1,%x2" + [(set_attr "type" "fpcompare")]) + +;; Compare vectors producing a vector result and a predicate, setting CR6 to +;; indicate a combined status +(define_insn "*vsx_eq__p" + [(set (reg:CC 74) + (unspec:CC + [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" ",?wa") + (match_operand:VSX_F 2 "vsx_register_operand" ",?wa"))] + UNSPEC_PREDICATE)) + (set (match_operand:VSX_F 0 "vsx_register_operand" "=,?wa") + (eq:VSX_F (match_dup 1) + (match_dup 2)))] + "VECTOR_UNIT_VSX_P (mode)" + "xvcmpeq. %x0,%x1,%x2" + [(set_attr "type" "veccmp")]) + +(define_insn "*vsx_gt__p" + [(set (reg:CC 74) + (unspec:CC + [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" ",?wa") + (match_operand:VSX_F 2 "vsx_register_operand" ",?wa"))] + UNSPEC_PREDICATE)) + (set (match_operand:VSX_F 0 "vsx_register_operand" "=,?wa") + (gt:VSX_F (match_dup 1) + (match_dup 2)))] + "VECTOR_UNIT_VSX_P (mode)" + "xvcmpgt. %x0,%x1,%x2" + [(set_attr "type" "veccmp")]) + +(define_insn "*vsx_ge__p" + [(set (reg:CC 74) + (unspec:CC + [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" ",?wa") + (match_operand:VSX_F 2 "vsx_register_operand" ",?wa"))] + UNSPEC_PREDICATE)) + (set (match_operand:VSX_F 0 "vsx_register_operand" "=,?wa") + (ge:VSX_F (match_dup 1) + (match_dup 2)))] + "VECTOR_UNIT_VSX_P (mode)" + "xvcmpge. %x0,%x1,%x2" + [(set_attr "type" "veccmp")]) + +;; Vector select +(define_insn "*vsx_xxsel" + [(set (match_operand:VSX_L 0 "vsx_register_operand" "=,?wa") + (if_then_else:VSX_L + (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" ",wa") + (const_int 0)) + (match_operand:VSX_L 2 "vsx_register_operand" ",wa") + (match_operand:VSX_L 3 "vsx_register_operand" ",wa")))] + "VECTOR_MEM_VSX_P (mode)" + "xxsel %x0,%x3,%x2,%x1" + [(set_attr "type" "vecperm")]) + +(define_insn "*vsx_xxsel_uns" + [(set (match_operand:VSX_L 0 "vsx_register_operand" "=,?wa") + (if_then_else:VSX_L + (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" ",wa") + (const_int 0)) + (match_operand:VSX_L 2 "vsx_register_operand" ",wa") + (match_operand:VSX_L 3 "vsx_register_operand" ",wa")))] + "VECTOR_MEM_VSX_P (mode)" + "xxsel %x0,%x3,%x2,%x1" + [(set_attr "type" "vecperm")]) + +;; Copy sign +(define_insn "vsx_copysign3" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") + (if_then_else:VSX_B + (ge:VSX_B (match_operand:VSX_B 2 "vsx_register_operand" ",wa") + (match_operand:VSX_B 3 "zero_constant" "j,j")) + (abs:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" ",wa")) + (neg:VSX_B (abs:VSX_B (match_dup 1)))))] + "VECTOR_UNIT_VSX_P (mode)" + "xcpsgn %x0,%x2,%x1" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +;; For the conversions, limit the register class for the integer value to be +;; the fprs because we don't want to add the altivec registers to movdi/movsi. +;; For the unsigned tests, there isn't a generic double -> unsigned conversion +;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX. +(define_insn "vsx_float2" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") + (float:VSX_B (match_operand: 1 "vsx_register_operand" ",")))] + "VECTOR_UNIT_VSX_P (mode)" + "xcvsx %x0,%x1" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "vsx_floatuns2" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") + (unsigned_float:VSX_B (match_operand: 1 "vsx_register_operand" ",")))] + "VECTOR_UNIT_VSX_P (mode)" + "xcvux %x0,%x1" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "vsx_fix_trunc2" + [(set (match_operand: 0 "vsx_register_operand" "=,?") + (fix: (match_operand:VSX_B 1 "vsx_register_operand" ",wa")))] + "VECTOR_UNIT_VSX_P (mode)" + "xcvsxs %x0,%x1" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "vsx_fixuns_trunc2" + [(set (match_operand: 0 "vsx_register_operand" "=,?") + (unsigned_fix: (match_operand:VSX_B 1 "vsx_register_operand" ",wa")))] + "VECTOR_UNIT_VSX_P (mode)" + "xcvuxs %x0,%x1" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +;; Math rounding functions +(define_insn "vsx_xri" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") + (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" ",wa")] + UNSPEC_VSX_ROUND_I))] + "VECTOR_UNIT_VSX_P (mode)" + "xri %x0,%x1" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "vsx_xric" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") + (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" ",wa")] + UNSPEC_VSX_ROUND_IC))] + "VECTOR_UNIT_VSX_P (mode)" + "xric %x0,%x1" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "vsx_btrunc2" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") + (fix:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" ",wa")))] + "VECTOR_UNIT_VSX_P (mode)" + "xriz %x0,%x1" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "*vsx_b2trunc2" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") + (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" ",wa")] + UNSPEC_FRIZ))] + "VECTOR_UNIT_VSX_P (mode)" + "xriz %x0,%x1" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "vsx_floor2" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") + (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" ",wa")] + UNSPEC_FRIM))] + "VECTOR_UNIT_VSX_P (mode)" + "xrim %x0,%x1" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + +(define_insn "vsx_ceil2" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") + (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" ",wa")] + UNSPEC_FRIP))] + "VECTOR_UNIT_VSX_P (mode)" + "xrip %x0,%x1" + [(set_attr "type" "") + (set_attr "fp_type" "")]) + + +;; VSX convert to/from double vector + +;; Convert between single and double precision +;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal +;; scalar single precision instructions internally use the double format. +;; Prefer the altivec registers, since we likely will need to do a vperm +(define_insn "vsx_" + [(set (match_operand: 0 "vsx_register_operand" "=,?wa") + (unspec: [(match_operand:VSX_SPDP 1 "vsx_register_operand" ",wa")] + UNSPEC_VSX_CVSPDP))] + "VECTOR_UNIT_VSX_P (mode)" + " %x0,%x1" + [(set_attr "type" "")]) + +;; xscvspdp, represent the scalar SF type as V4SF +(define_insn "vsx_xscvspdp" + [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa") + (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")] + UNSPEC_VSX_CVSPDP))] + "VECTOR_UNIT_VSX_P (DFmode)" + "xscvspdp %x0,%x1" + [(set_attr "type" "fp")]) + +;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF +;; format of scalars is actually DF. +(define_insn "vsx_xscvdpsp_scalar" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") + (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")] + UNSPEC_VSX_CVSPDP))] + "VECTOR_UNIT_VSX_P (DFmode)" + "xscvdpsp %x0,%x1" + [(set_attr "type" "fp")]) + +;; Convert from 64-bit to 32-bit types +;; Note, favor the Altivec registers since the usual use of these instructions +;; is in vector converts and we need to use the Altivec vperm instruction. + +(define_insn "vsx_xvcvdpsxws" + [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa") + (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")] + UNSPEC_VSX_CVDPSXWS))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvdpsxws %x0,%x1" + [(set_attr "type" "vecfloat")]) + +(define_insn "vsx_xvcvdpuxws" + [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa") + (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")] + UNSPEC_VSX_CVDPUXWS))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvdpuxws %x0,%x1" + [(set_attr "type" "vecfloat")]) + +(define_insn "vsx_xvcvsxdsp" + [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa") + (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")] + UNSPEC_VSX_CVSXDSP))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvsxdsp %x0,%x1" + [(set_attr "type" "vecfloat")]) + +(define_insn "vsx_xvcvuxdsp" + [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa") + (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")] + UNSPEC_VSX_CVUXDSP))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvuxwdp %x0,%x1" + [(set_attr "type" "vecfloat")]) + +;; Convert from 32-bit to 64-bit types +(define_insn "vsx_xvcvsxwdp" + [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa") + (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")] + UNSPEC_VSX_CVSXWDP))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvsxwdp %x0,%x1" + [(set_attr "type" "vecfloat")]) + +(define_insn "vsx_xvcvuxwdp" + [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa") + (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")] + UNSPEC_VSX_CVUXWDP))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvuxwdp %x0,%x1" + [(set_attr "type" "vecfloat")]) + +(define_insn "vsx_xvcvspsxds" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa") + (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")] + UNSPEC_VSX_CVSPSXDS))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvspsxds %x0,%x1" + [(set_attr "type" "vecfloat")]) + +(define_insn "vsx_xvcvspuxds" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa") + (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")] + UNSPEC_VSX_CVSPUXDS))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvspuxds %x0,%x1" + [(set_attr "type" "vecfloat")]) + +;; Logical and permute operations +(define_insn "*vsx_and3" + [(set (match_operand:VSX_L 0 "vsx_register_operand" "=,?wa") + (and:VSX_L + (match_operand:VSX_L 1 "vsx_register_operand" ",?wa") + (match_operand:VSX_L 2 "vsx_register_operand" ",?wa")))] + "VECTOR_MEM_VSX_P (mode)" + "xxland %x0,%x1,%x2" + [(set_attr "type" "vecsimple")]) + +(define_insn "*vsx_ior3" + [(set (match_operand:VSX_L 0 "vsx_register_operand" "=,?wa") + (ior:VSX_L (match_operand:VSX_L 1 "vsx_register_operand" ",?wa") + (match_operand:VSX_L 2 "vsx_register_operand" ",?wa")))] + "VECTOR_MEM_VSX_P (mode)" + "xxlor %x0,%x1,%x2" + [(set_attr "type" "vecsimple")]) + +(define_insn "*vsx_xor3" + [(set (match_operand:VSX_L 0 "vsx_register_operand" "=,?wa") + (xor:VSX_L + (match_operand:VSX_L 1 "vsx_register_operand" ",?wa") + (match_operand:VSX_L 2 "vsx_register_operand" ",?wa")))] + "VECTOR_MEM_VSX_P (mode)" + "xxlxor %x0,%x1,%x2" + [(set_attr "type" "vecsimple")]) + +(define_insn "*vsx_one_cmpl2" + [(set (match_operand:VSX_L 0 "vsx_register_operand" "=,?wa") + (not:VSX_L + (match_operand:VSX_L 1 "vsx_register_operand" ",?wa")))] + "VECTOR_MEM_VSX_P (mode)" + "xxlnor %x0,%x1,%x1" + [(set_attr "type" "vecsimple")]) + +(define_insn "*vsx_nor3" + [(set (match_operand:VSX_L 0 "vsx_register_operand" "=,?wa") + (not:VSX_L + (ior:VSX_L + (match_operand:VSX_L 1 "vsx_register_operand" ",?wa") + (match_operand:VSX_L 2 "vsx_register_operand" ",?wa"))))] + "VECTOR_MEM_VSX_P (mode)" + "xxlnor %x0,%x1,%x2" + [(set_attr "type" "vecsimple")]) + +(define_insn "*vsx_andc3" + [(set (match_operand:VSX_L 0 "vsx_register_operand" "=,?wa") + (and:VSX_L + (not:VSX_L + (match_operand:VSX_L 2 "vsx_register_operand" ",?wa")) + (match_operand:VSX_L 1 "vsx_register_operand" ",?wa")))] + "VECTOR_MEM_VSX_P (mode)" + "xxlandc %x0,%x1,%x2" + [(set_attr "type" "vecsimple")]) + + +;; Permute operations + +;; Build a V2DF/V2DI vector from two scalars +(define_insn "vsx_concat_" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa") + (unspec:VSX_D + [(match_operand: 1 "vsx_register_operand" "ws,wa") + (match_operand: 2 "vsx_register_operand" "ws,wa")] + UNSPEC_VSX_CONCAT))] + "VECTOR_MEM_VSX_P (mode)" + "xxpermdi %x0,%x1,%x2,0" + [(set_attr "type" "vecperm")]) + +;; Special purpose concat using xxpermdi to glue two single precision values +;; together, relying on the fact that internally scalar floats are represented +;; as doubles. This is used to initialize a V4SF vector with 4 floats +(define_insn "vsx_concat_v2sf" + [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa") + (unspec:V2DF + [(match_operand:SF 1 "vsx_register_operand" "f,f") + (match_operand:SF 2 "vsx_register_operand" "f,f")] + UNSPEC_VSX_CONCAT))] + "VECTOR_MEM_VSX_P (V2DFmode)" + "xxpermdi %x0,%x1,%x2,0" + [(set_attr "type" "vecperm")]) + +;; Set the element of a V2DI/VD2F mode +(define_insn "vsx_set_" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa") + (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wd,wa") + (match_operand: 2 "vsx_register_operand" "ws,wa") + (match_operand:QI 3 "u5bit_cint_operand" "i,i")] + UNSPEC_VSX_SET))] + "VECTOR_MEM_VSX_P (mode)" +{ + if (INTVAL (operands[3]) == 0) + return \"xxpermdi %x0,%x1,%x2,1\"; + else if (INTVAL (operands[3]) == 1) + return \"xxpermdi %x0,%x2,%x1,0\"; + else + gcc_unreachable (); +} + [(set_attr "type" "vecperm")]) + +;; Extract a DF/DI element from V2DF/V2DI +(define_insn "vsx_extract_" + [(set (match_operand: 0 "vsx_register_operand" "=ws,d,?wa") + (vec_select: (match_operand:VSX_D 1 "vsx_register_operand" "wd,wd,wa") + (parallel + [(match_operand:QI 2 "u5bit_cint_operand" "i,i,i")])))] + "VECTOR_MEM_VSX_P (mode)" +{ + gcc_assert (UINTVAL (operands[2]) <= 1); + operands[3] = GEN_INT (INTVAL (operands[2]) << 1); + return \"xxpermdi %x0,%x1,%x1,%3\"; +} + [(set_attr "type" "vecperm")]) + +;; Optimize extracting element 0 from memory +(define_insn "*vsx_extract__zero" + [(set (match_operand: 0 "vsx_register_operand" "=ws,d,?wa") + (vec_select: + (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z,Z,Z") + (parallel [(const_int 0)])))] + "VECTOR_MEM_VSX_P (mode) && WORDS_BIG_ENDIAN" + "lxsd%U1x %x0,%y1" + [(set_attr "type" "fpload") + (set_attr "length" "4")]) + +;; General double word oriented permute, allow the other vector types for +;; optimizing the permute instruction. +(define_insn "vsx_xxpermdi_" + [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wd,?wa") + (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wd,wa") + (match_operand:VSX_L 2 "vsx_register_operand" "wd,wa") + (match_operand:QI 3 "u5bit_cint_operand" "i,i")] + UNSPEC_VSX_XXPERMDI))] + "VECTOR_MEM_VSX_P (mode)" + "xxpermdi %x0,%x1,%x2,%3" + [(set_attr "type" "vecperm")]) + +;; Varient of xxpermdi that is emitted by the vec_interleave functions +(define_insn "*vsx_xxpermdi2_" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd") + (vec_concat:VSX_D + (vec_select: + (match_operand:VSX_D 1 "vsx_register_operand" "wd") + (parallel + [(match_operand:QI 2 "u5bit_cint_operand" "i")])) + (vec_select: + (match_operand:VSX_D 3 "vsx_register_operand" "wd") + (parallel + [(match_operand:QI 4 "u5bit_cint_operand" "i")]))))] + "VECTOR_MEM_VSX_P (mode)" +{ + gcc_assert ((UINTVAL (operands[2]) <= 1) && (UINTVAL (operands[4]) <= 1)); + operands[5] = GEN_INT (((INTVAL (operands[2]) & 1) << 1) + | (INTVAL (operands[4]) & 1)); + return \"xxpermdi %x0,%x1,%x3,%5\"; +} + [(set_attr "type" "vecperm")]) + +;; V2DF/V2DI splat +(define_insn "vsx_splat_" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,wd,wd,?wa,?wa,?wa") + (vec_duplicate:VSX_D + (match_operand: 1 "input_operand" "ws,f,Z,wa,wa,Z")))] + "VECTOR_MEM_VSX_P (mode)" + "@ + xxpermdi %x0,%x1,%x1,0 + xxpermdi %x0,%x1,%x1,0 + lxvdsx %x0,%y1 + xxpermdi %x0,%x1,%x1,0 + xxpermdi %x0,%x1,%x1,0 + lxvdsx %x0,%y1" + [(set_attr "type" "vecperm,vecperm,vecload,vecperm,vecperm,vecload")]) + +;; V4SF/V4SI splat +(define_insn "vsx_xxspltw_" + [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa") + (vec_duplicate:VSX_W + (vec_select: + (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa") + (parallel + [(match_operand:QI 2 "u5bit_cint_operand" "i,i")]))))] + "VECTOR_MEM_VSX_P (mode)" + "xxspltw %x0,%x1,%2" + [(set_attr "type" "vecperm")]) + +;; V4SF/V4SI interleave +(define_insn "vsx_xxmrghw_" + [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa") + (vec_merge:VSX_W + (vec_select:VSX_W + (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (vec_select:VSX_W + (match_operand:VSX_W 2 "vsx_register_operand" "wf,wa") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (const_int 5)))] + "VECTOR_MEM_VSX_P (mode)" + "xxmrghw %x0,%x1,%x2" + [(set_attr "type" "vecperm")]) + +(define_insn "vsx_xxmrglw_" + [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa") + (vec_merge:VSX_W + (vec_select:VSX_W + (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (vec_select:VSX_W + (match_operand:VSX_W 2 "vsx_register_operand" "wf,?wa") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (const_int 5)))] + "VECTOR_MEM_VSX_P (mode)" + "xxmrglw %x0,%x1,%x2" + [(set_attr "type" "vecperm")]) + +;; Shift left double by word immediate +(define_insn "vsx_xxsldwi_" + [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa") + (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa") + (match_operand:VSX_L 2 "vsx_register_operand" "wa") + (match_operand:QI 3 "u5bit_cint_operand" "i")] + UNSPEC_VSX_SLDWI))] + "VECTOR_MEM_VSX_P (mode)" + "xxsldwi %x0,%x1,%x2,%3" + [(set_attr "type" "vecperm")]) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 37c5089..b545c32 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -7298,7 +7298,7 @@ instructions, but allow the compiler to schedule those calls. * MIPS Loongson Built-in Functions:: * Other MIPS Built-in Functions:: * picoChip Built-in Functions:: -* PowerPC AltiVec Built-in Functions:: +* PowerPC AltiVec/VSX Built-in Functions:: * SPARC VIS Built-in Functions:: * SPU Built-in Functions:: @end menu @@ -9776,7 +9776,7 @@ GCC defines the preprocessor macro @code{___GCC_HAVE_BUILTIN_MIPS_CACHE} when this function is available. @end table -@node PowerPC AltiVec Built-in Functions +@node PowerPC AltiVec/VSX Built-in Functions @subsection PowerPC AltiVec Built-in Functions GCC provides an interface for the PowerPC family of processors to access @@ -9802,6 +9802,19 @@ vector bool int vector float @end smallexample +If @option{-mvsx} is used the following additional vector types are +implemented. + +@smallexample +vector unsigned long +vector signed long +vector double +@end smallexample + +The long types are only implemented for 64-bit code generation, and +the long type is only used in the floating point/integer conversion +instructions. + GCC's implementation of the high-level language interface available from C and C++ code differs from Motorola's documentation in several ways. @@ -10067,6 +10080,8 @@ vector signed char vec_vavgsb (vector signed char, vector signed char); vector unsigned char vec_vavgub (vector unsigned char, vector unsigned char); +vector float vec_copysign (vector float); + vector float vec_ceil (vector float); vector signed int vec_cmpb (vector float, vector float); @@ -11669,6 +11684,92 @@ int vec_any_numeric (vector float); int vec_any_out (vector float, vector float); @end smallexample +If the vector/scalar (VSX) instruction set is available, the following +additional functions are available: + +@smallexample +vector double vec_abs (vector double); +vector double vec_add (vector double, vector double); +vector double vec_and (vector double, vector double); +vector double vec_and (vector double, vector bool long); +vector double vec_and (vector bool long, vector double); +vector double vec_andc (vector double, vector double); +vector double vec_andc (vector double, vector bool long); +vector double vec_andc (vector bool long, vector double); +vector double vec_ceil (vector double); +vector bool long vec_cmpeq (vector double, vector double); +vector bool long vec_cmpge (vector double, vector double); +vector bool long vec_cmpgt (vector double, vector double); +vector bool long vec_cmple (vector double, vector double); +vector bool long vec_cmplt (vector double, vector double); +vector float vec_div (vector float, vector float); +vector double vec_div (vector double, vector double); +vector double vec_floor (vector double); +vector double vec_madd (vector double, vector double, vector double); +vector double vec_max (vector double, vector double); +vector double vec_min (vector double, vector double); +vector float vec_msub (vector float, vector float, vector float); +vector double vec_msub (vector double, vector double, vector double); +vector float vec_mul (vector float, vector float); +vector double vec_mul (vector double, vector double); +vector float vec_nearbyint (vector float); +vector double vec_nearbyint (vector double); +vector float vec_nmadd (vector float, vector float, vector float); +vector double vec_nmadd (vector double, vector double, vector double); +vector double vec_nmsub (vector double, vector double, vector double); +vector double vec_nor (vector double, vector double); +vector double vec_or (vector double, vector double); +vector double vec_or (vector double, vector bool long); +vector double vec_or (vector bool long, vector double); +vector double vec_perm (vector double, + vector double, + vector unsigned char); +vector float vec_rint (vector float); +vector double vec_rint (vector double); +vector double vec_sel (vector double, vector double, vector bool long); +vector double vec_sel (vector double, vector double, vector unsigned long); +vector double vec_sub (vector double, vector double); +vector float vec_sqrt (vector float); +vector double vec_sqrt (vector double); +vector double vec_trunc (vector double); +vector double vec_xor (vector double, vector double); +vector double vec_xor (vector double, vector bool long); +vector double vec_xor (vector bool long, vector double); +int vec_all_eq (vector double, vector double); +int vec_all_ge (vector double, vector double); +int vec_all_gt (vector double, vector double); +int vec_all_le (vector double, vector double); +int vec_all_lt (vector double, vector double); +int vec_all_nan (vector double); +int vec_all_ne (vector double, vector double); +int vec_all_nge (vector double, vector double); +int vec_all_ngt (vector double, vector double); +int vec_all_nle (vector double, vector double); +int vec_all_nlt (vector double, vector double); +int vec_all_numeric (vector double); +int vec_any_eq (vector double, vector double); +int vec_any_ge (vector double, vector double); +int vec_any_gt (vector double, vector double); +int vec_any_le (vector double, vector double); +int vec_any_lt (vector double, vector double); +int vec_any_nan (vector double); +int vec_any_ne (vector double, vector double); +int vec_any_nge (vector double, vector double); +int vec_any_ngt (vector double, vector double); +int vec_any_nle (vector double, vector double); +int vec_any_nlt (vector double, vector double); +int vec_any_numeric (vector double); +@end smallexample + +GCC provides a few other builtins on Powerpc to access certain instructions: +@smallexample +float __builtin_recipdivf (float, float); +float __builtin_rsqrtf (float); +double __builtin_recipdiv (double, double); +long __builtin_bpermd (long, long); +int __builtin_bswap16 (int); +@end smallexample + @node SPARC VIS Built-in Functions @subsection SPARC VIS Built-in Functions diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index c256dde..fe24b98 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -738,7 +738,8 @@ See RS/6000 and PowerPC Options. -maltivec -mno-altivec @gol -mpowerpc-gpopt -mno-powerpc-gpopt @gol -mpowerpc-gfxopt -mno-powerpc-gfxopt @gol --mmfcrf -mno-mfcrf -mpopcntb -mno-popcntb -mfprnd -mno-fprnd @gol +-mmfcrf -mno-mfcrf -mpopcntb -mno-popcntb -mpopcntd -mno-popcntd @gol +-mfprnd -mno-fprnd @gol -mcmpb -mno-cmpb -mmfpgpr -mno-mfpgpr -mhard-dfp -mno-hard-dfp @gol -mnew-mnemonics -mold-mnemonics @gol -mfull-toc -mminimal-toc -mno-fp-in-toc -mno-sum-in-toc @gol @@ -752,7 +753,7 @@ See RS/6000 and PowerPC Options. -mstrict-align -mno-strict-align -mrelocatable @gol -mno-relocatable -mrelocatable-lib -mno-relocatable-lib @gol -mtoc -mno-toc -mlittle -mlittle-endian -mbig -mbig-endian @gol --mdynamic-no-pic -maltivec -mswdiv @gol +-mdynamic-no-pic -maltivec -mswdiv @gol -mprioritize-restricted-insns=@var{priority} @gol -msched-costly-dep=@var{dependence_type} @gol -minsert-sched-nops=@var{scheme} @gol @@ -14116,6 +14117,8 @@ These @samp{-m} options are defined for the IBM RS/6000 and PowerPC: @itemx -mno-mfcrf @itemx -mpopcntb @itemx -mno-popcntb +@itemx -mpopcntd +@itemx -mno-popcntd @itemx -mfprnd @itemx -mno-fprnd @itemx -mcmpb @@ -14140,6 +14143,8 @@ These @samp{-m} options are defined for the IBM RS/6000 and PowerPC: @opindex mno-mfcrf @opindex mpopcntb @opindex mno-popcntb +@opindex mpopcntd +@opindex mno-popcntd @opindex mfprnd @opindex mno-fprnd @opindex mcmpb @@ -14189,6 +14194,9 @@ The @option{-mpopcntb} option allows GCC to generate the popcount and double precision FP reciprocal estimate instruction implemented on the POWER5 processor and other processors that support the PowerPC V2.02 architecture. +The @option{-mpopcntd} option allows GCC to generate the popcount +instruction implemented on the POWER7 processor and other processors +that support the PowerPC V2.06 architecture. The @option{-mfprnd} option allows GCC to generate the FP round to integer instructions implemented on the POWER5+ processor and other processors that support the PowerPC V2.03 architecture. @@ -14267,9 +14275,9 @@ The @option{-mcpu} options automatically enable or disable the following options: @gccoptlist{-maltivec -mfprnd -mhard-float -mmfcrf -mmultiple @gol --mnew-mnemonics -mpopcntb -mpower -mpower2 -mpowerpc64 @gol +-mnew-mnemonics -mpopcntb -mpopcntd -mpower -mpower2 -mpowerpc64 @gol -mpowerpc-gpopt -mpowerpc-gfxopt -msingle-float -mdouble-float @gol --msimple-fpu -mstring -mmulhw -mdlmzb -mmfpgpr} +-msimple-fpu -mstring -mmulhw -mdlmzb -mmfpgpr -mvsx} The particular options set for any particular CPU will vary between compiler versions, depending on what setting seems to produce optimal @@ -14370,6 +14378,14 @@ instructions. This option has been deprecated. Use @option{-mspe} and @option{-mno-spe} instead. +@item -mvsx +@itemx -mno-vsx +@opindex mvsx +@opindex mno-vsx +Generate code that uses (does not use) vector/scalar (VSX) +instructions, and also enable the use of built-in functions that allow +more direct access to the VSX instruction set. + @item -mfloat-gprs=@var{yes/single/double/no} @itemx -mfloat-gprs @opindex mfloat-gprs diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index d9dca7a..0e516b0 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -1916,7 +1916,19 @@ Floating point register (containing 64-bit value) Floating point register (containing 32-bit value) @item v -Vector register +Altivec vector register + +@item wd +VSX vector register to hold vector double data + +@item wf +VSX vector register to hold vector float data + +@item ws +VSX vector register to hold scalar float data + +@item wa +Any VSX register @item h @samp{MQ}, @samp{CTR}, or @samp{LINK} register @@ -2029,6 +2041,9 @@ AND masks that can be performed by two rldic@{l, r@} instructions @item W Vector constant that does not require memory +@item j +Vector constant that is all zeros. + @end table @item Intel 386---@file{config/i386/constraints.md} diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 66b436d..4466ec7 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,53 @@ +2009-07-30 Michael Meissner + Pat Haugen + Revital Eres + + * testsuite/gcc.target/powerpc/altivec-32.c: New file to test + Altivec simple math function vectorization. + + * testsuite/gcc.target/powerpc/bswap-run.c: New file to test swap + builtins. + * testsuite/gcc.target/powerpc/bswap16.c: Ditto. + * testsuite/gcc.target/powerpc/bswap32.c: Ditto. + * testsuite/gcc.target/powerpc/bswap64-1.c: Ditto. + * testsuite/gcc.target/powerpc/bswap64-2.c: Ditto. + * testsuite/gcc.target/powerpc/bswap64-3.c: Ditto. + * testsuite/gcc.target/powerpc/optimize-bswapdi-2.c: Ditto. + * testsuite/gcc.target/powerpc/optimize-bswapdi-3.c: Ditto. + * testsuite/gcc.target/powerpc/optimize-bswapsi-2.c: Ditto. + + * testsuite/gcc.target/powerpc/popcount-2.c: New file to test + power7 popcntd instructions. + * testsuite/gcc.target/powerpc/popcount-3.c: Ditto. + + * testsuite/gcc.target/powerpc/pr39457.c: New VSX test. + * testsuite/gcc.target/powerpc/vsx-builtin-1.c: Ditto. + * testsuite/gcc.target/powerpc/vsx-builtin-2.c: Ditto. + * testsuite/gcc.target/powerpc/vsx-builtin-3.c: Ditto. + * testsuite/gcc.target/powerpc/vsx-builtin-4.c: Ditto. + * testsuite/gcc.target/powerpc/vsx-builtin-5.c: Ditto. + * testsuite/gcc.target/powerpc/vsx-builtin-6.c: Ditto. + * testsuite/gcc.target/powerpc/vsx-vector-1.c: Ditto. + * testsuite/gcc.target/powerpc/vsx-vector-2.c: Ditto. + * testsuite/gcc.target/powerpc/vsx-vector-3.c: Ditto. + * testsuite/gcc.target/powerpc/vsx-vector-4.c: Ditto. + * testsuite/gcc.target/powerpc/vsx-vector-5.c: Ditto. + * testsuite/gcc.target/powerpc/vsx-vector-6.c: Ditto. + + * testsuite/gcc.target/powerpc/altivec-6.c: Store the result of + vec_add, so the optimizer doesn't remove it. + + * testsuite/gcc.dg/optimize-bswapdi-1.c: Add powerpc 64-bit to + systems that support bswap64. + + * testsuite/gcc.dg/vmx/vmx.exp: Explicitly add -mno-vsx to + prevent VSX code generation. + + * testsuite/lib/target-supports.exp (check_vsx_hw_available): New + function to test if VSX available. + (check_effective_target_powerpc_vsx_ok): Ditto. + (check_vmx_hw_available): Add explicit -mno-vsx. + 2009-07-30 Janis Johnson PR c/39902 diff --git a/gcc/testsuite/gcc.dg/optimize-bswapdi-1.c b/gcc/testsuite/gcc.dg/optimize-bswapdi-1.c index 449dc19..a6aea4a 100644 --- a/gcc/testsuite/gcc.dg/optimize-bswapdi-1.c +++ b/gcc/testsuite/gcc.dg/optimize-bswapdi-1.c @@ -1,4 +1,4 @@ -/* { dg-do compile { target alpha*-*-* ia64*-*-* x86_64-*-* s390x-*-* } } */ +/* { dg-do compile { target alpha*-*-* ia64*-*-* x86_64-*-* s390x-*-* powerpc*-*-* rs6000-*-* } } */ /* { dg-require-effective-target stdint_types } */ /* { dg-require-effective-target lp64 } */ /* { dg-options "-O2 -fdump-tree-bswap" } */ diff --git a/gcc/testsuite/gcc.dg/vmx/vmx.exp b/gcc/testsuite/gcc.dg/vmx/vmx.exp index 8a842e1..85c88d8 100644 --- a/gcc/testsuite/gcc.dg/vmx/vmx.exp +++ b/gcc/testsuite/gcc.dg/vmx/vmx.exp @@ -31,7 +31,7 @@ if {![istarget powerpc*-*-*] # nothing but extensions. global DEFAULT_VMXCFLAGS if ![info exists DEFAULT_VMXCFLAGS] then { - set DEFAULT_VMXCFLAGS "-maltivec -mabi=altivec -std=gnu99" + set DEFAULT_VMXCFLAGS "-maltivec -mabi=altivec -std=gnu99 -mno-vsx" } # If the target system supports AltiVec instructions, the default action diff --git a/gcc/testsuite/gcc.target/powerpc/altivec-32.c b/gcc/testsuite/gcc.target/powerpc/altivec-32.c new file mode 100644 index 0000000..83105f8 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/altivec-32.c @@ -0,0 +1,59 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-O2 -ftree-vectorize -mcpu=power6 -m64 -maltivec" } */ +/* { dg-final { scan-assembler "vsel" } } */ +/* { dg-final { scan-assembler "vrfim" } } */ +/* { dg-final { scan-assembler "vrfip" } } */ +/* { dg-final { scan-assembler "vrfiz" } } */ + +#ifndef SIZE +#define SIZE 1024 +#endif + +float a[SIZE] __attribute__((__aligned__(32))); +float b[SIZE] __attribute__((__aligned__(32))); +float c[SIZE] __attribute__((__aligned__(32))); +float d[SIZE] __attribute__((__aligned__(32))); +float e[SIZE] __attribute__((__aligned__(32))); + +extern float floorf (float); +extern float ceilf (float); +extern float truncf (float); +extern float copysignf (float, float); + +void +vector_floor (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = floorf (b[i]); +} + +void +vector_ceil (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = ceilf (b[i]); +} + +void +vector_trunc (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = truncf (b[i]); +} + +void +vector_copysign (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = copysignf (b[i], c[i]); +} diff --git a/gcc/testsuite/gcc.target/powerpc/altivec-6.c b/gcc/testsuite/gcc.target/powerpc/altivec-6.c index dc115f9..51d4116 100644 --- a/gcc/testsuite/gcc.target/powerpc/altivec-6.c +++ b/gcc/testsuite/gcc.target/powerpc/altivec-6.c @@ -5,7 +5,7 @@ #include /* These denote "generic" GCC vectors. */ -static int __attribute__((vector_size(16))) x, y; +static int __attribute__((vector_size(16))) x, y, z; static vector signed int i,j; static vector signed short s,t; @@ -21,7 +21,7 @@ static int int1, int2; void b() { - vec_add (x, y); + z = vec_add (x, y); /* Make sure the predicates accept correct argument types. */ diff --git a/gcc/testsuite/gcc.target/powerpc/bswap-run.c b/gcc/testsuite/gcc.target/powerpc/bswap-run.c new file mode 100644 index 0000000..484908a --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bswap-run.c @@ -0,0 +1,102 @@ +/* { dg-do run { target powerpc*-*-* } } */ +/* { dg-options "-O2 -std=gnu99" } */ + +extern void abort (void); + +static unsigned char bytes[] = { 0, 1, 2, 0x80, 0xff }; + +unsigned short b16a (unsigned short *p) { return __builtin_bswap16 (*p); } +void b16b (unsigned short *p, unsigned short a) { *p = __builtin_bswap16 (a); } +int b16c (unsigned short a) { return __builtin_bswap16 (a); } + +unsigned int b32a (unsigned int *p) { return __builtin_bswap32 (*p); } +void b32b (unsigned int *p, unsigned int a) { *p = __builtin_bswap32 (a); } +static unsigned int b32c (unsigned int a) { return __builtin_bswap32 (a); } + +unsigned long long b64a (unsigned long long *p) { return __builtin_bswap64 (*p); } +void b64b (unsigned long long *p, unsigned long long a) { *p = __builtin_bswap64 (a); } +unsigned long long b64c (unsigned long long a) { return __builtin_bswap64 (a); } + +int +main (void) +{ + unsigned i1, i2, i3, i4, i5; + unsigned b1, b2, b3, b4, b5; + unsigned short b16_inp, b16_exp, b16_var; + unsigned int b32_inp, b32_exp, b32_var; + unsigned long long b64_inp, b64_exp, b64_var; + + for (i1 = 0; i1 < sizeof (bytes); i1++) + { + b1 = bytes[i1]; + for (i2 = 0; i2 < sizeof (bytes); i2++) + { + b2 = bytes[i2]; + b16_inp = (b1 << 8) | b2; + b16_exp = (b2 << 8) | b1; + + if (b16a (&b16_inp) != b16_exp) + abort (); + + b16b (&b16_var, b16_inp); + if (b16_var != b16_exp) + abort (); + + if (b16c (b16_inp) != b16_exp) + abort (); + + for (i3 = 0; i3 < sizeof (bytes); i3++) + { + b3 = bytes[i3]; + for (i4 = 0; i4 < sizeof (bytes); i4++) + { + b4 = bytes[i4]; + b32_inp = (b1 << 24) | (b2 << 16) | (b3 << 8) | b4; + b32_exp = (b4 << 24) | (b3 << 16) | (b2 << 8) | b1; + + if (b32a (&b32_inp) != b32_exp) + abort (); + + b32b (&b32_var, b32_inp); + if (b32_var != b32_exp) + abort (); + + if (b32c (b32_inp) != b32_exp) + abort (); + + for (i5 = 0; i5 < sizeof (bytes); i5++) + { + b5 = bytes[i5]; + b64_inp = (((unsigned long long)b32_inp) << 32) | b5; + b64_exp = (((unsigned long long)b5) << 56) | b32_exp; + + if (b64a (&b64_inp) != b64_exp) + abort (); + + b64b (&b64_var, b64_inp); + if (b64_var != b64_exp) + abort (); + + if (b64c (b64_inp) != b64_exp) + abort (); + + b64_inp = (((unsigned long long)b5) << 56) | b32_inp; + b64_exp = (((unsigned long long)b32_exp) << 32) | b5; + + if (b64a (&b64_inp) != b64_exp) + abort (); + + b64b (&b64_var, b64_inp); + if (b64_var != b64_exp) + abort (); + + if (b64c (b64_inp) != b64_exp) + abort (); + } + } + } + } + } + + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/bswap16.c b/gcc/testsuite/gcc.target/powerpc/bswap16.c new file mode 100644 index 0000000..5eea4f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bswap16.c @@ -0,0 +1,8 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler "lhbrx" } } */ +/* { dg-final { scan-assembler "sthbrx" } } */ + +unsigned short us; +unsigned int load_bswap16 (unsigned short *p) { return __builtin_bswap16 (*p); } +void store_bswap16 (unsigned int a) { us = __builtin_bswap16 (a); } diff --git a/gcc/testsuite/gcc.target/powerpc/bswap32.c b/gcc/testsuite/gcc.target/powerpc/bswap32.c new file mode 100644 index 0000000..1b1e189 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bswap32.c @@ -0,0 +1,8 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler "lwbrx" } } */ +/* { dg-final { scan-assembler "stwbrx" } } */ + +unsigned int ui; +unsigned int load_bswap32 (unsigned int *p) { return __builtin_bswap32 (*p); } +void store_bswap32 (unsigned int a) { ui = __builtin_bswap32 (a); } diff --git a/gcc/testsuite/gcc.target/powerpc/bswap64-1.c b/gcc/testsuite/gcc.target/powerpc/bswap64-1.c new file mode 100644 index 0000000..480e1cd --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bswap64-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-options "-O2 -mno-popcntd -mcpu=power5" } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-final { scan-assembler "lwbrx" } } */ +/* { dg-final { scan-assembler "stwbrx" } } */ + +unsigned long ul; +unsigned long load_bswap64 (unsigned long *p) { return __builtin_bswap64 (*p); } +void store_bswap64 (unsigned long a) { ul = __builtin_bswap64 (a); } diff --git a/gcc/testsuite/gcc.target/powerpc/bswap64-2.c b/gcc/testsuite/gcc.target/powerpc/bswap64-2.c new file mode 100644 index 0000000..6c3d8ca --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bswap64-2.c @@ -0,0 +1,10 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-options "-O2 -mpopcntd" } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-final { scan-assembler "ldbrx" } } */ +/* { dg-final { scan-assembler "stdbrx" } } */ + +unsigned long ul; +unsigned long load_bswap64 (unsigned long *p) { return __builtin_bswap64 (*p); } +void store_bswap64 (unsigned long a) { ul = __builtin_bswap64 (a); } diff --git a/gcc/testsuite/gcc.target/powerpc/bswap64-3.c b/gcc/testsuite/gcc.target/powerpc/bswap64-3.c new file mode 100644 index 0000000..7f1138c --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bswap64-3.c @@ -0,0 +1,10 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-options "-O2 -mcpu=cell" } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target powerpc_ppu_ok } */ +/* { dg-final { scan-assembler "ldbrx" } } */ +/* { dg-final { scan-assembler "stdbrx" } } */ + +unsigned long ul; +unsigned long load_bswap64 (unsigned long *p) { return __builtin_bswap64 (*p); } +void store_bswap64 (unsigned long a) { ul = __builtin_bswap64 (a); } diff --git a/gcc/testsuite/gcc.target/powerpc/optimize-bswapdi-2.c b/gcc/testsuite/gcc.target/powerpc/optimize-bswapdi-2.c new file mode 100644 index 0000000..7337e99 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/optimize-bswapdi-2.c @@ -0,0 +1,36 @@ +/* { dg-require-effective-target stdint_types } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -mcpu=power5" } */ + +/* This is a clone of gcc-dg/optimize-bswapdi-1.c, redone to use load and stores + to test whether lwbrx/stwbrx is generated for normal power systems. */ + +#include +#define __const_swab64(x) ((uint64_t)( \ + (((uint64_t)(x) & (uint64_t)0x00000000000000ffULL) << 56) | \ + (((uint64_t)(x) & (uint64_t)0x000000000000ff00ULL) << 40) | \ + (((uint64_t)(x) & (uint64_t)0x0000000000ff0000ULL) << 24) | \ + (((uint64_t)(x) & (uint64_t)0x00000000ff000000ULL) << 8) | \ + (((uint64_t)(x) & (uint64_t)0x000000ff00000000ULL) >> 8) | \ + (((uint64_t)(x) & (uint64_t)0x0000ff0000000000ULL) >> 24) | \ + (((uint64_t)(x) & (uint64_t)0x00ff000000000000ULL) >> 40) | \ + (((uint64_t)(x) & (uint64_t)0xff00000000000000ULL) >> 56))) + + +/* This byte swap implementation is used by the Linux kernel and the + GNU C library. */ + +uint64_t +swap64_load (uint64_t *in) +{ + return __const_swab64 (*in); +} + +void +swap64_store (uint64_t *out, uint64_t in) +{ + *out = __const_swab64 (in); +} + +/* { dg-final { scan-assembler-times "lwbrx" 2 } } */ +/* { dg-final { scan-assembler-times "stwbrx" 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/optimize-bswapdi-3.c b/gcc/testsuite/gcc.target/powerpc/optimize-bswapdi-3.c new file mode 100644 index 0000000..9dcd824 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/optimize-bswapdi-3.c @@ -0,0 +1,36 @@ +/* { dg-require-effective-target stdint_types } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -mcpu=power7" } */ + +/* This is a clone of gcc-dg/optimize-bswapdi-1.c, redone to use load and stores + to test whether ldbrx/stdbrx is generated for power7. */ + +#include +#define __const_swab64(x) ((uint64_t)( \ + (((uint64_t)(x) & (uint64_t)0x00000000000000ffULL) << 56) | \ + (((uint64_t)(x) & (uint64_t)0x000000000000ff00ULL) << 40) | \ + (((uint64_t)(x) & (uint64_t)0x0000000000ff0000ULL) << 24) | \ + (((uint64_t)(x) & (uint64_t)0x00000000ff000000ULL) << 8) | \ + (((uint64_t)(x) & (uint64_t)0x000000ff00000000ULL) >> 8) | \ + (((uint64_t)(x) & (uint64_t)0x0000ff0000000000ULL) >> 24) | \ + (((uint64_t)(x) & (uint64_t)0x00ff000000000000ULL) >> 40) | \ + (((uint64_t)(x) & (uint64_t)0xff00000000000000ULL) >> 56))) + + +/* This byte swap implementation is used by the Linux kernel and the + GNU C library. */ + +uint64_t +swap64_load (uint64_t *in) +{ + return __const_swab64 (*in); +} + +void +swap64_store (uint64_t *out, uint64_t in) +{ + *out = __const_swab64 (in); +} + +/* { dg-final { scan-assembler "ldbrx" } } */ +/* { dg-final { scan-assembler "stdbrx" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/optimize-bswapsi-2.c b/gcc/testsuite/gcc.target/powerpc/optimize-bswapsi-2.c new file mode 100644 index 0000000..34cc823 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/optimize-bswapsi-2.c @@ -0,0 +1,55 @@ +/* { dg-require-effective-target stdint_types } */ +/* { dg-options "-O2 -mcpu=power5" } */ + +#include + +/* This is a clone of gcc-dg/optimize-bswapsi-1.c, redone to use load and stores + to test whether lwbrx/stwbrx is generated for normal power systems. */ + +#define __const_swab32(x) ((uint32_t)( \ + (((uint32_t)(x) & (uint32_t)0x000000ffUL) << 24) | \ + (((uint32_t)(x) & (uint32_t)0x0000ff00UL) << 8) | \ + (((uint32_t)(x) & (uint32_t)0x00ff0000UL) >> 8) | \ + (((uint32_t)(x) & (uint32_t)0xff000000UL) >> 24))) + +/* This byte swap implementation is used by the Linux kernel and the + GNU C library. */ + +uint32_t +swap32_a_load (uint32_t *in) +{ + return __const_swab32 (*in); +} + +/* The OpenSSH byte swap implementation. */ +uint32_t +swap32_b_load (uint32_t *in) +{ + uint32_t a; + + a = (*in << 16) | (*in >> 16); + a = ((a & 0x00ff00ff) << 8) | ((a & 0xff00ff00) >> 8); + + return a; +} + +void +swap32_a_store (uint32_t *out, uint32_t in) +{ + *out = __const_swab32 (in); +} + +/* The OpenSSH byte swap implementation. */ +void +swap32_b_store (uint32_t *out, uint32_t in) +{ + uint32_t a; + + a = (in << 16) | (in >> 16); + a = ((a & 0x00ff00ff) << 8) | ((a & 0xff00ff00) >> 8); + + *out = a; +} + +/* { dg-final { scan-assembler-times "lwbrx" 2 } } */ +/* { dg-final { scan-assembler-times "stwbrx" 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/popcount-2.c b/gcc/testsuite/gcc.target/powerpc/popcount-2.c new file mode 100644 index 0000000..7546a3b --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/popcount-2.c @@ -0,0 +1,9 @@ +/* { dg-do compile { target { ilp32 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-options "-O2 -mcpu=power7 -m32" } */ +/* { dg-final { scan-assembler "popcntw" } } */ + +int foo(int x) +{ + return __builtin_popcount(x); +} diff --git a/gcc/testsuite/gcc.target/powerpc/popcount-3.c b/gcc/testsuite/gcc.target/powerpc/popcount-3.c new file mode 100644 index 0000000..c803532 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/popcount-3.c @@ -0,0 +1,9 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-options "-O2 -mcpu=power7 -m64" } */ +/* { dg-final { scan-assembler "popcntd" } } */ + +long foo(int x) +{ + return __builtin_popcountl(x); +} diff --git a/gcc/testsuite/gcc.target/powerpc/pr39457.c b/gcc/testsuite/gcc.target/powerpc/pr39457.c new file mode 100644 index 0000000..22057e5 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr39457.c @@ -0,0 +1,56 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-options "-m64 -O2 -mminimal-toc" } */ + +/* PR 39457 -- fix breakage because the compiler ran out of registers and + wanted to stash a floating point value to the LR/CTR register. */ + +/* -O2 -m64 -mminimal-toc */ +typedef struct { void *s; } S; +typedef void (*T1) (void); +typedef void (*T2) (void *, void *, int, void *); +char *fn1 (const char *, ...); +void *fn2 (void); +int fn3 (char *, int); +int fn4 (const void *); +int fn5 (const void *); +long fn6 (void) __attribute__ ((__const__)); +int fn7 (void *, void *, void *); +void *fn8 (void *, long); +void *fn9 (void *, long, const char *, ...); +void *fn10 (void *); +long fn11 (void) __attribute__ ((__const__)); +long fn12 (void *, const char *, T1, T2, void *); +void *fn13 (void *); +long fn14 (void) __attribute__ ((__const__)); +extern void *v1; +extern char *v2; +extern int v3; + +void +foo (void *x, char *z) +{ + void *i1, *i2; + int y; + if (v1) + return; + v1 = fn9 (fn10 (fn2 ()), fn6 (), "x", 0., "y", 0., 0); + y = 520 - (520 - fn4 (x)) / 2; + fn9 (fn8 (v1, fn6 ()), fn6 (), "wig", fn8 (v1, fn14 ()), "x", 18.0, + "y", 16.0, "wid", 80.0, "hi", 500.0, 0); + fn9 (fn10 (v1), fn6 (), "x1", 0., "y1", 0., "x2", 80.0, "y2", + 500.0, "f", fn3 ("fff", 0x0D0DFA00), 0); + fn13 (((S *) fn8 (v1, fn6 ()))->s); + fn12 (fn8 (v1, fn11 ()), "ev", (T1) fn7, 0, fn8 (v1, fn6 ())); + fn9 (fn8 (v1, fn6 ()), fn6 (), "wig", + fn8 (v1, fn14 ()), "x", 111.0, "y", 14.0, "wid", 774.0, "hi", + 500.0, 0); + v1 = fn9 (fn10 (v1), fn6 (), "x1", 0., "y1", 0., "x2", 774.0, "y2", + 500.0, "f", fn3 ("gc", 0x0D0DFA00), 0); + fn1 (z, 0); + i1 = fn9 (fn8 (v1, fn6 ()), fn6 (), "pixbuf", x, "x", + 800 - fn5 (x) / 2, "y", y - fn4 (x), 0); + fn12 (fn8 (i1, fn11 ()), "ev", (T1) fn7, 0, "/ok/"); + fn12 (fn8 (i1, fn11 ()), "ev", (T1) fn7, 0, 0); + i2 = fn9 (fn8 (v1, fn6 ()), fn6 (), "txt", "OK", "fnt", v2, "x", + 800, "y", y - fn4 (x) + 15, "ar", 0, "f", v3, 0); +} diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-1.c b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-1.c new file mode 100644 index 0000000..42d5b60 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-1.c @@ -0,0 +1,38 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mcpu=power7" } */ +/* { dg-final { scan-assembler "xvadddp" } } */ +/* { dg-final { scan-assembler "xvsubdp" } } */ +/* { dg-final { scan-assembler "xvmuldp" } } */ +/* { dg-final { scan-assembler "xvmadd" } } */ +/* { dg-final { scan-assembler "xvmsub" } } */ +/* { dg-final { scan-assembler "xvnmadd" } } */ +/* { dg-final { scan-assembler "xvnmsub" } } */ +/* { dg-final { scan-assembler "xvdivdp" } } */ +/* { dg-final { scan-assembler "xvmaxdp" } } */ +/* { dg-final { scan-assembler "xvmindp" } } */ +/* { dg-final { scan-assembler "xvsqrtdp" } } */ +/* { dg-final { scan-assembler "xvrsqrtedp" } } */ +/* { dg-final { scan-assembler "xvabsdp" } } */ +/* { dg-final { scan-assembler "xvnabsdp" } } */ +/* { dg-final { scan-assembler "xvredp" } } */ + +void use_builtins (__vector double *p, __vector double *q, __vector double *r, __vector double *s) +{ + p[0] = __builtin_vsx_xvadddp (q[0], r[0]); + p[1] = __builtin_vsx_xvsubdp (q[1], r[1]); + p[2] = __builtin_vsx_xvmuldp (q[2], r[2]); + p[3] = __builtin_vsx_xvdivdp (q[3], r[3]); + p[4] = __builtin_vsx_xvmaxdp (q[4], r[4]); + p[5] = __builtin_vsx_xvmindp (q[5], r[5]); + p[6] = __builtin_vsx_xvabsdp (q[6]); + p[7] = __builtin_vsx_xvnabsdp (q[7]); + p[8] = __builtin_vsx_xvsqrtdp (q[8]); + p[9] = __builtin_vsx_xvmadddp (q[9], r[9], s[9]); + p[10] = __builtin_vsx_xvmsubdp (q[10], r[10], s[10]); + p[11] = __builtin_vsx_xvnmadddp (q[11], r[11], s[11]); + p[12] = __builtin_vsx_xvnmsubdp (q[12], r[12], s[12]); + p[13] = __builtin_vsx_xvredp (q[13]); + p[14] = __builtin_vsx_xvrsqrtedp (q[14]); +} diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-2.c b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-2.c new file mode 100644 index 0000000..6d883dc --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-2.c @@ -0,0 +1,38 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mcpu=power7" } */ +/* { dg-final { scan-assembler "xvaddsp" } } */ +/* { dg-final { scan-assembler "xvsubsp" } } */ +/* { dg-final { scan-assembler "xvmulsp" } } */ +/* { dg-final { scan-assembler "xvmadd" } } */ +/* { dg-final { scan-assembler "xvmsub" } } */ +/* { dg-final { scan-assembler "xvnmadd" } } */ +/* { dg-final { scan-assembler "xvnmsub" } } */ +/* { dg-final { scan-assembler "xvdivsp" } } */ +/* { dg-final { scan-assembler "xvmaxsp" } } */ +/* { dg-final { scan-assembler "xvminsp" } } */ +/* { dg-final { scan-assembler "xvsqrtsp" } } */ +/* { dg-final { scan-assembler "xvabssp" } } */ +/* { dg-final { scan-assembler "xvnabssp" } } */ +/* { dg-final { scan-assembler "xvresp" } } */ +/* { dg-final { scan-assembler "xvrsqrtesp" } } */ + +void use_builtins (__vector float *p, __vector float *q, __vector float *r, __vector float *s) +{ + p[0] = __builtin_vsx_xvaddsp (q[0], r[0]); + p[1] = __builtin_vsx_xvsubsp (q[1], r[1]); + p[2] = __builtin_vsx_xvmulsp (q[2], r[2]); + p[3] = __builtin_vsx_xvdivsp (q[3], r[3]); + p[4] = __builtin_vsx_xvmaxsp (q[4], r[4]); + p[5] = __builtin_vsx_xvminsp (q[5], r[5]); + p[6] = __builtin_vsx_xvabssp (q[6]); + p[7] = __builtin_vsx_xvnabssp (q[7]); + p[8] = __builtin_vsx_xvsqrtsp (q[8]); + p[9] = __builtin_vsx_xvmaddsp (q[9], r[9], s[9]); + p[10] = __builtin_vsx_xvmsubsp (q[10], r[10], s[10]); + p[11] = __builtin_vsx_xvnmaddsp (q[11], r[11], s[11]); + p[12] = __builtin_vsx_xvnmsubsp (q[12], r[12], s[12]); + p[13] = __builtin_vsx_xvresp (q[13]); + p[14] = __builtin_vsx_xvrsqrtesp (q[14]); +} diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c new file mode 100644 index 0000000..8450920 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c @@ -0,0 +1,212 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mcpu=power7" } */ +/* { dg-final { scan-assembler "xxsel" } } */ +/* { dg-final { scan-assembler "vperm" } } */ +/* { dg-final { scan-assembler "xvrdpi" } } */ +/* { dg-final { scan-assembler "xvrdpic" } } */ +/* { dg-final { scan-assembler "xvrdpim" } } */ +/* { dg-final { scan-assembler "xvrdpip" } } */ +/* { dg-final { scan-assembler "xvrdpiz" } } */ +/* { dg-final { scan-assembler "xvrspi" } } */ +/* { dg-final { scan-assembler "xvrspic" } } */ +/* { dg-final { scan-assembler "xvrspim" } } */ +/* { dg-final { scan-assembler "xvrspip" } } */ +/* { dg-final { scan-assembler "xvrspiz" } } */ +/* { dg-final { scan-assembler "xsrdpi" } } */ +/* { dg-final { scan-assembler "xsrdpic" } } */ +/* { dg-final { scan-assembler "xsrdpim" } } */ +/* { dg-final { scan-assembler "xsrdpip" } } */ +/* { dg-final { scan-assembler "xsrdpiz" } } */ +/* { dg-final { scan-assembler "xsmaxdp" } } */ +/* { dg-final { scan-assembler "xsmindp" } } */ +/* { dg-final { scan-assembler "xxland" } } */ +/* { dg-final { scan-assembler "xxlandc" } } */ +/* { dg-final { scan-assembler "xxlnor" } } */ +/* { dg-final { scan-assembler "xxlor" } } */ +/* { dg-final { scan-assembler "xxlxor" } } */ +/* { dg-final { scan-assembler "xvcmpeqdp" } } */ +/* { dg-final { scan-assembler "xvcmpgtdp" } } */ +/* { dg-final { scan-assembler "xvcmpgedp" } } */ +/* { dg-final { scan-assembler "xvcmpeqsp" } } */ +/* { dg-final { scan-assembler "xvcmpgtsp" } } */ +/* { dg-final { scan-assembler "xvcmpgesp" } } */ +/* { dg-final { scan-assembler "xxsldwi" } } */ +/* { dg-final { scan-assembler-not "call" } } */ + +extern __vector int si[][4]; +extern __vector short ss[][4]; +extern __vector signed char sc[][4]; +extern __vector float f[][4]; +extern __vector unsigned int ui[][4]; +extern __vector unsigned short us[][4]; +extern __vector unsigned char uc[][4]; +extern __vector __bool int bi[][4]; +extern __vector __bool short bs[][4]; +extern __vector __bool char bc[][4]; +extern __vector __pixel p[][4]; +#ifdef __VSX__ +extern __vector double d[][4]; +extern __vector long sl[][4]; +extern __vector unsigned long ul[][4]; +extern __vector __bool long bl[][4]; +#endif + +int do_sel(void) +{ + int i = 0; + + si[i][0] = __builtin_vsx_xxsel_4si (si[i][1], si[i][2], si[i][3]); i++; + ss[i][0] = __builtin_vsx_xxsel_8hi (ss[i][1], ss[i][2], ss[i][3]); i++; + sc[i][0] = __builtin_vsx_xxsel_16qi (sc[i][1], sc[i][2], sc[i][3]); i++; + f[i][0] = __builtin_vsx_xxsel_4sf (f[i][1], f[i][2], f[i][3]); i++; + d[i][0] = __builtin_vsx_xxsel_2df (d[i][1], d[i][2], d[i][3]); i++; + + si[i][0] = __builtin_vsx_xxsel (si[i][1], si[i][2], bi[i][3]); i++; + ss[i][0] = __builtin_vsx_xxsel (ss[i][1], ss[i][2], bs[i][3]); i++; + sc[i][0] = __builtin_vsx_xxsel (sc[i][1], sc[i][2], bc[i][3]); i++; + f[i][0] = __builtin_vsx_xxsel (f[i][1], f[i][2], bi[i][3]); i++; + d[i][0] = __builtin_vsx_xxsel (d[i][1], d[i][2], bl[i][3]); i++; + + si[i][0] = __builtin_vsx_xxsel (si[i][1], si[i][2], ui[i][3]); i++; + ss[i][0] = __builtin_vsx_xxsel (ss[i][1], ss[i][2], us[i][3]); i++; + sc[i][0] = __builtin_vsx_xxsel (sc[i][1], sc[i][2], uc[i][3]); i++; + f[i][0] = __builtin_vsx_xxsel (f[i][1], f[i][2], ui[i][3]); i++; + d[i][0] = __builtin_vsx_xxsel (d[i][1], d[i][2], ul[i][3]); i++; + + return i; +} + +int do_perm(void) +{ + int i = 0; + + si[i][0] = __builtin_vsx_vperm_4si (si[i][1], si[i][2], uc[i][3]); i++; + ss[i][0] = __builtin_vsx_vperm_8hi (ss[i][1], ss[i][2], uc[i][3]); i++; + sc[i][0] = __builtin_vsx_vperm_16qi (sc[i][1], sc[i][2], uc[i][3]); i++; + f[i][0] = __builtin_vsx_vperm_4sf (f[i][1], f[i][2], uc[i][3]); i++; + d[i][0] = __builtin_vsx_vperm_2df (d[i][1], d[i][2], uc[i][3]); i++; + + si[i][0] = __builtin_vsx_vperm (si[i][1], si[i][2], uc[i][3]); i++; + ss[i][0] = __builtin_vsx_vperm (ss[i][1], ss[i][2], uc[i][3]); i++; + sc[i][0] = __builtin_vsx_vperm (sc[i][1], sc[i][2], uc[i][3]); i++; + f[i][0] = __builtin_vsx_vperm (f[i][1], f[i][2], uc[i][3]); i++; + d[i][0] = __builtin_vsx_vperm (d[i][1], d[i][2], uc[i][3]); i++; + + return i; +} + +int do_xxperm (void) +{ + int i = 0; + + d[i][0] = __builtin_vsx_xxpermdi_2df (d[i][1], d[i][2], 0); i++; + d[i][0] = __builtin_vsx_xxpermdi (d[i][1], d[i][2], 1); i++; + return i; +} + +double x, y; +void do_concat (void) +{ + d[0][0] = __builtin_vsx_concat_2df (x, y); +} + +void do_set (void) +{ + d[0][0] = __builtin_vsx_set_2df (d[0][1], x, 0); + d[1][0] = __builtin_vsx_set_2df (d[1][1], y, 1); +} + +extern double z[][4]; + +int do_math (void) +{ + int i = 0; + + d[i][0] = __builtin_vsx_xvrdpi (d[i][1]); i++; + d[i][0] = __builtin_vsx_xvrdpic (d[i][1]); i++; + d[i][0] = __builtin_vsx_xvrdpim (d[i][1]); i++; + d[i][0] = __builtin_vsx_xvrdpip (d[i][1]); i++; + d[i][0] = __builtin_vsx_xvrdpiz (d[i][1]); i++; + + f[i][0] = __builtin_vsx_xvrspi (f[i][1]); i++; + f[i][0] = __builtin_vsx_xvrspic (f[i][1]); i++; + f[i][0] = __builtin_vsx_xvrspim (f[i][1]); i++; + f[i][0] = __builtin_vsx_xvrspip (f[i][1]); i++; + f[i][0] = __builtin_vsx_xvrspiz (f[i][1]); i++; + + z[i][0] = __builtin_vsx_xsrdpi (z[i][1]); i++; + z[i][0] = __builtin_vsx_xsrdpic (z[i][1]); i++; + z[i][0] = __builtin_vsx_xsrdpim (z[i][1]); i++; + z[i][0] = __builtin_vsx_xsrdpip (z[i][1]); i++; + z[i][0] = __builtin_vsx_xsrdpiz (z[i][1]); i++; + z[i][0] = __builtin_vsx_xsmaxdp (z[i][1], z[i][0]); i++; + z[i][0] = __builtin_vsx_xsmindp (z[i][1], z[i][0]); i++; + return i; +} + +int do_cmp (void) +{ + int i = 0; + + d[i][0] = __builtin_vsx_xvcmpeqdp (d[i][1], d[i][2]); i++; + d[i][0] = __builtin_vsx_xvcmpgtdp (d[i][1], d[i][2]); i++; + d[i][0] = __builtin_vsx_xvcmpgedp (d[i][1], d[i][2]); i++; + + f[i][0] = __builtin_vsx_xvcmpeqsp (f[i][1], f[i][2]); i++; + f[i][0] = __builtin_vsx_xvcmpgtsp (f[i][1], f[i][2]); i++; + f[i][0] = __builtin_vsx_xvcmpgesp (f[i][1], f[i][2]); i++; + return i; +} + +int do_logical (void) +{ + int i = 0; + + si[i][0] = __builtin_vsx_xxland (si[i][1], si[i][2]); i++; + si[i][0] = __builtin_vsx_xxlandc (si[i][1], si[i][2]); i++; + si[i][0] = __builtin_vsx_xxlnor (si[i][1], si[i][2]); i++; + si[i][0] = __builtin_vsx_xxlor (si[i][1], si[i][2]); i++; + si[i][0] = __builtin_vsx_xxlxor (si[i][1], si[i][2]); i++; + + ss[i][0] = __builtin_vsx_xxland (ss[i][1], ss[i][2]); i++; + ss[i][0] = __builtin_vsx_xxlandc (ss[i][1], ss[i][2]); i++; + ss[i][0] = __builtin_vsx_xxlnor (ss[i][1], ss[i][2]); i++; + ss[i][0] = __builtin_vsx_xxlor (ss[i][1], ss[i][2]); i++; + ss[i][0] = __builtin_vsx_xxlxor (ss[i][1], ss[i][2]); i++; + + sc[i][0] = __builtin_vsx_xxland (sc[i][1], sc[i][2]); i++; + sc[i][0] = __builtin_vsx_xxlandc (sc[i][1], sc[i][2]); i++; + sc[i][0] = __builtin_vsx_xxlnor (sc[i][1], sc[i][2]); i++; + sc[i][0] = __builtin_vsx_xxlor (sc[i][1], sc[i][2]); i++; + sc[i][0] = __builtin_vsx_xxlxor (sc[i][1], sc[i][2]); i++; + + d[i][0] = __builtin_vsx_xxland (d[i][1], d[i][2]); i++; + d[i][0] = __builtin_vsx_xxlandc (d[i][1], d[i][2]); i++; + d[i][0] = __builtin_vsx_xxlnor (d[i][1], d[i][2]); i++; + d[i][0] = __builtin_vsx_xxlor (d[i][1], d[i][2]); i++; + d[i][0] = __builtin_vsx_xxlxor (d[i][1], d[i][2]); i++; + + f[i][0] = __builtin_vsx_xxland (f[i][1], f[i][2]); i++; + f[i][0] = __builtin_vsx_xxlandc (f[i][1], f[i][2]); i++; + f[i][0] = __builtin_vsx_xxlnor (f[i][1], f[i][2]); i++; + f[i][0] = __builtin_vsx_xxlor (f[i][1], f[i][2]); i++; + f[i][0] = __builtin_vsx_xxlxor (f[i][1], f[i][2]); i++; + return i; +} + +int do_xxsldwi (void) +{ + int i = 0; + + si[i][0] = __builtin_vsx_xxsldwi (si[i][1], si[i][2], 0); i++; + ss[i][0] = __builtin_vsx_xxsldwi (ss[i][1], ss[i][2], 1); i++; + sc[i][0] = __builtin_vsx_xxsldwi (sc[i][1], sc[i][2], 2); i++; + ui[i][0] = __builtin_vsx_xxsldwi (ui[i][1], ui[i][2], 3); i++; + us[i][0] = __builtin_vsx_xxsldwi (us[i][1], us[i][2], 0); i++; + uc[i][0] = __builtin_vsx_xxsldwi (uc[i][1], uc[i][2], 1); i++; + f[i][0] = __builtin_vsx_xxsldwi (f[i][1], f[i][2], 2); i++; + d[i][0] = __builtin_vsx_xxsldwi (d[i][1], d[i][2], 3); i++; + return i; +} diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-4.c b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-4.c new file mode 100644 index 0000000..bcf4863 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-4.c @@ -0,0 +1,142 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mcpu=power7" } */ +/* { dg-final { scan-assembler "xvcmpeqdp." } } */ +/* { dg-final { scan-assembler "xvcmpgtdp." } } */ +/* { dg-final { scan-assembler "xvcmpgedp." } } */ +/* { dg-final { scan-assembler "xvcmpeqsp." } } */ +/* { dg-final { scan-assembler "xvcmpgtsp." } } */ +/* { dg-final { scan-assembler "xvcmpgesp." } } */ +/* { dg-final { scan-assembler "vcmpbfp." } } */ +/* { dg-final { scan-assembler "vcmpequb." } } */ +/* { dg-final { scan-assembler "vcmpequh." } } */ +/* { dg-final { scan-assembler "vcmpequw." } } */ +/* { dg-final { scan-assembler "vcmpgtub." } } */ +/* { dg-final { scan-assembler "vcmpgtuh." } } */ +/* { dg-final { scan-assembler "vcmpgtuw." } } */ +/* { dg-final { scan-assembler "vcmpgtsb." } } */ +/* { dg-final { scan-assembler "vcmpgtsh." } } */ +/* { dg-final { scan-assembler "vcmpgtsw." } } */ +/* { dg-final { scan-assembler-not "vcmpeqfp" } } */ +/* { dg-final { scan-assembler-not "vcmpgtfp" } } */ +/* { dg-final { scan-assembler-not "vcmpgefp" } } */ + +/* check that Altivec builtins generate VSX if -mvsx. */ + +#include + +int *v16qi_s (vector signed char *a, vector signed char *b, int *p) +{ + if (vec_all_eq (*a, *b)) + *p++ = 1; + + if (vec_all_gt (*a, *b)) + *p++ = 2; + + if (vec_all_ge (*a, *b)) + *p++ = 2; + + return p; +} + +int *v16qi_u (vector unsigned char *a, vector unsigned char *b, int *p) +{ + if (vec_all_eq (*a, *b)) + *p++ = 1; + + if (vec_all_gt (*a, *b)) + *p++ = 2; + + if (vec_all_ge (*a, *b)) + *p++ = 2; + + return p; +} + +int *v8hi_s (vector short *a, vector short *b, int *p) +{ + if (vec_all_eq (*a, *b)) + *p++ = 1; + + if (vec_all_gt (*a, *b)) + *p++ = 2; + + if (vec_all_ge (*a, *b)) + *p++ = 2; + + return p; +} + +int *v8hi_u (vector unsigned short *a, vector unsigned short *b, int *p) +{ + if (vec_all_eq (*a, *b)) + *p++ = 1; + + if (vec_all_gt (*a, *b)) + *p++ = 2; + + if (vec_all_ge (*a, *b)) + *p++ = 2; + + return p; +} + +int *v4si_s (vector int *a, vector int *b, int *p) +{ + if (vec_all_eq (*a, *b)) + *p++ = 1; + + if (vec_all_gt (*a, *b)) + *p++ = 2; + + if (vec_all_ge (*a, *b)) + *p++ = 2; + + return p; +} + +int *v4si_u (vector unsigned int *a, vector unsigned int *b, int *p) +{ + if (vec_all_eq (*a, *b)) + *p++ = 1; + + if (vec_all_gt (*a, *b)) + *p++ = 2; + + if (vec_all_ge (*a, *b)) + *p++ = 2; + + return p; +} + +int *v4sf (vector float *a, vector float *b, int *p) +{ + if (vec_all_eq (*a, *b)) + *p++ = 1; + + if (vec_all_gt (*a, *b)) + *p++ = 2; + + if (vec_all_ge (*a, *b)) + *p++ = 3; + + if (vec_all_in (*a, *b)) /* veccmpbfp. */ + *p++ = 4; + + return p; +} + +int *v2df (vector double *a, vector double *b, int *p) +{ + if (vec_all_eq (*a, *b)) + *p++ = 1; + + if (vec_all_gt (*a, *b)) + *p++ = 2; + + if (vec_all_ge (*a, *b)) + *p++ = 3; + + return p; +} diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-5.c b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-5.c new file mode 100644 index 0000000..5c24dc6 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-5.c @@ -0,0 +1,14 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mcpu=power7" } */ +/* { dg-final { scan-assembler "xxpermdi" } } */ +/* { dg-final { scan-assembler-not "stxvd2x" } } */ + +/* Make sure double extract doesn't use a store instruction. */ + +double d0(__vector double v){ return __builtin_vec_extract (v, 0); } +double d1(__vector double v){ return __builtin_vec_extract (v, 1); } + +double e0(vector double v){ return __builtin_vec_ext_v2df (v, 0); } +double e1(vector double v){ return __builtin_vec_ext_v2df (v, 1); } diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-6.c b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-6.c new file mode 100644 index 0000000..a722b83 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-6.c @@ -0,0 +1,146 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mcpu=power7" } */ + +/* Check whether tdiv and tsqrt instructions generate the correct code. */ +/* Each of the *tdiv* and *tsqrt* instructions should be generated exactly 3 + times (the two calls in the _1 function should be combined). */ +/* { dg-final { scan-assembler-times "xstdivdp" 3 } } */ +/* { dg-final { scan-assembler-times "xvtdivdp" 3 } } */ +/* { dg-final { scan-assembler-times "xvtdivsp" 3 } } */ +/* { dg-final { scan-assembler-times "xstsqrtdp" 3 } } */ +/* { dg-final { scan-assembler-times "xvtsqrtdp" 3 } } */ +/* { dg-final { scan-assembler-times "xvtsqrtsp" 3 } } */ + +void test_div_df_1 (double a, double b, int *p) +{ + p[0] = __builtin_vsx_xstdivdp_fe (a, b); + p[1] = __builtin_vsx_xstdivdp_fg (a, b); +} + +int *test_div_df_2 (double a, double b, int *p) +{ + if (__builtin_vsx_xstdivdp_fe (a, b)) + *p++ = 1; + + return p; +} + +int *test_div_df_3 (double a, double b, int *p) +{ + if (__builtin_vsx_xstdivdp_fg (a, b)) + *p++ = 1; + + return p; +} + +void test_sqrt_df_1 (double a, int *p) +{ + p[0] = __builtin_vsx_xstsqrtdp_fe (a); + p[1] = __builtin_vsx_xstsqrtdp_fg (a); +} + +int *test_sqrt_df_2 (double a, int *p) +{ + if (__builtin_vsx_xstsqrtdp_fe (a)) + *p++ = 1; + + return p; +} + +int *test_sqrt_df_3 (double a, int *p) +{ + if (__builtin_vsx_xstsqrtdp_fg (a)) + *p++ = 1; + + return p; +} + +void test_div_v2df_1 (__vector double *a, __vector double *b, int *p) +{ + p[0] = __builtin_vsx_xvtdivdp_fe (*a, *b); + p[1] = __builtin_vsx_xvtdivdp_fg (*a, *b); +} + +int *test_div_v2df_2 (__vector double *a, __vector double *b, int *p) +{ + if (__builtin_vsx_xvtdivdp_fe (*a, *b)) + *p++ = 1; + + return p; +} + +int *test_div_v2df_3 (__vector double *a, __vector double *b, int *p) +{ + if (__builtin_vsx_xvtdivdp_fg (*a, *b)) + *p++ = 1; + + return p; +} + +void test_sqrt_v2df_1 (__vector double *a, int *p) +{ + p[0] = __builtin_vsx_xvtsqrtdp_fe (*a); + p[1] = __builtin_vsx_xvtsqrtdp_fg (*a); +} + +int *test_sqrt_v2df_2 (__vector double *a, int *p) +{ + if (__builtin_vsx_xvtsqrtdp_fe (*a)) + *p++ = 1; + + return p; +} + +int *test_sqrt_v2df_3 (__vector double *a, int *p) +{ + if (__builtin_vsx_xvtsqrtdp_fg (*a)) + *p++ = 1; + + return p; +} + +void test_div_v4sf_1 (__vector float *a, __vector float *b, int *p) +{ + p[0] = __builtin_vsx_xvtdivsp_fe (*a, *b); + p[1] = __builtin_vsx_xvtdivsp_fg (*a, *b); +} + +int *test_div_v4sf_2 (__vector float *a, __vector float *b, int *p) +{ + if (__builtin_vsx_xvtdivsp_fe (*a, *b)) + *p++ = 1; + + return p; +} + +int *test_div_v4sf_3 (__vector float *a, __vector float *b, int *p) +{ + if (__builtin_vsx_xvtdivsp_fg (*a, *b)) + *p++ = 1; + + return p; +} + +void test_sqrt_v4sf_1 (__vector float *a, int *p) +{ + p[0] = __builtin_vsx_xvtsqrtsp_fe (*a); + p[1] = __builtin_vsx_xvtsqrtsp_fg (*a); +} + +int *test_sqrt_v4sf_2 (__vector float *a, int *p) +{ + if (__builtin_vsx_xvtsqrtsp_fe (*a)) + *p++ = 1; + + return p; +} + +int *test_sqrt_v4sf_3 (__vector float *a, int *p) +{ + if (__builtin_vsx_xvtsqrtsp_fg (*a)) + *p++ = 1; + + return p; +} diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c new file mode 100644 index 0000000..55e999d --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c @@ -0,0 +1,150 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mcpu=power7" } */ + +/* Test simple extract/insert/slat operations. Make sure all types are + supported with various options. */ + +#include + +double extract_df_0_reg (vector double p) { return vec_extract (p, 0); } +double extract_df_1_reg (vector double p) { return vec_extract (p, 1); } +double extract_df_n_reg (vector double p, int n) { return vec_extract (p, n); } + +double extract_df_0_mem (vector double *p) { return vec_extract (*p, 0); } +double extract_df_1_mem (vector double *p) { return vec_extract (*p, 1); } +double extract_df_n_mem (vector double *p, int n) { return vec_extract (*p, n); } + +vector double insert_df_0 (vector double p, double x) { return vec_insert (x, p, 0); } +vector double insert_df_1 (vector double p, double x) { return vec_insert (x, p, 1); } +vector double insert_df_n (vector double p, double x, int n) { return vec_insert (x, p, n); } + +vector double splat_df_reg (double x) { return vec_splats (x); } +vector double splat_df_mem (double *x) { return vec_splats (*x); } + +#ifdef _ARCH_PPC64 +#define ll long +#else +#define ll long long +#endif + +ll extract_di_0_reg (vector ll p) { return vec_extract (p, 0); } +ll extract_di_1_reg (vector ll p) { return vec_extract (p, 1); } +ll extract_di_n_reg (vector ll p, int n) { return vec_extract (p, n); } + +ll extract_di_0_mem (vector ll *p) { return vec_extract (*p, 0); } +ll extract_di_1_mem (vector ll *p) { return vec_extract (*p, 1); } +ll extract_di_n_mem (vector ll *p, int n) { return vec_extract (*p, n); } + +vector ll insert_di_0 (vector ll p, ll x) { return vec_insert (x, p, 0); } +vector ll insert_di_1 (vector ll p, ll x) { return vec_insert (x, p, 1); } +vector ll insert_di_n (vector ll p, ll x, int n) { return vec_insert (x, p, n); } + +vector ll splat_di_reg (ll x) { return vec_splats (x); } +vector ll splat_di_mem (ll *x) { return vec_splats (*x); } + +float extract_sf_0_reg (vector float p) { return vec_extract (p, 0); } +float extract_sf_3_reg (vector float p) { return vec_extract (p, 3); } +float extract_sf_n_reg (vector float p, int n) { return vec_extract (p, n); } + +float extract_sf_0_mem (vector float *p) { return vec_extract (*p, 0); } +float extract_sf_3_mem (vector float *p) { return vec_extract (*p, 3); } +float extract_sf_n_mem (vector float *p, int n) { return vec_extract (*p, n); } + +vector float insert_sf_0 (vector float p, float x) { return vec_insert (x, p, 0); } +vector float insert_sf_3 (vector float p, float x) { return vec_insert (x, p, 3); } +vector float insert_sf_n (vector float p, float x, int n) { return vec_insert (x, p, n); } + +vector float splat_sf_reg (float x) { return vec_splats (x); } +vector float splat_sf_mem (float *x) { return vec_splats (*x); } + +int extract_si_0_reg (vector int p) { return vec_extract (p, 0); } +int extract_si_3_reg (vector int p) { return vec_extract (p, 3); } +int extract_si_n_reg (vector int p, int n) { return vec_extract (p, n); } + +int extract_si_0_mem (vector int *p) { return vec_extract (*p, 0); } +int extract_si_3_mem (vector int *p) { return vec_extract (*p, 3); } +int extract_si_n_mem (vector int *p, int n) { return vec_extract (*p, n); } + +vector int insert_si_0 (vector int p, int x) { return vec_insert (x, p, 0); } +vector int insert_si_3 (vector int p, int x) { return vec_insert (x, p, 3); } +vector int insert_si_n (vector int p, int x, int n) { return vec_insert (x, p, n); } + +vector int splat_si_reg (int x) { return vec_splats (x); } +vector int splat_si_mem (int *x) { return vec_splats (*x); } + +unsigned int extract_usi_0_reg (vector unsigned int p) { return vec_extract (p, 0); } +unsigned int extract_usi_3_reg (vector unsigned int p) { return vec_extract (p, 3); } +unsigned int extract_usi_n_reg (vector unsigned int p, int n) { return vec_extract (p, n); } + +unsigned int extract_usi_0_mem (vector unsigned int *p) { return vec_extract (*p, 0); } +unsigned int extract_usi_3_mem (vector unsigned int *p) { return vec_extract (*p, 3); } +unsigned int extract_usi_n_mem (vector unsigned int *p, int n) { return vec_extract (*p, n); } + +vector unsigned int insert_usi_0 (vector unsigned int p, unsigned int x) { return vec_insert (x, p, 0); } +vector unsigned int insert_usi_3 (vector unsigned int p, unsigned int x) { return vec_insert (x, p, 3); } +vector unsigned int insert_usi_n (vector unsigned int p, unsigned int x, int n) { return vec_insert (x, p, n); } + +vector unsigned int splat_usi_reg (unsigned int x) { return vec_splats (x); } +vector unsigned int splat_usi_mem (unsigned int *x) { return vec_splats (*x); } + +short extract_hi_0_reg (vector short p) { return vec_extract (p, 0); } +short extract_hi_7_reg (vector short p) { return vec_extract (p, 7); } +short extract_hi_n_reg (vector short p, int n) { return vec_extract (p, n); } + +short extract_hi_0_mem (vector short *p) { return vec_extract (*p, 0); } +short extract_hi_7_mem (vector short *p) { return vec_extract (*p, 7); } +short extract_hi_n_mem (vector short *p, int n) { return vec_extract (*p, n); } + +vector short insert_hi_0 (vector short p, short x) { return vec_insert (x, p, 0); } +vector short insert_hi_7 (vector short p, short x) { return vec_insert (x, p, 7); } +vector short insert_hi_n (vector short p, short x, int n) { return vec_insert (x, p, n); } + +vector short splat_hi_reg (short x) { return vec_splats (x); } +vector short splat_hi_mem (short *x) { return vec_splats (*x); } + +unsigned short extract_uhi_0_reg (vector unsigned short p) { return vec_extract (p, 0); } +unsigned short extract_uhi_7_reg (vector unsigned short p) { return vec_extract (p, 7); } +unsigned short extract_uhi_n_reg (vector unsigned short p, int n) { return vec_extract (p, n); } + +unsigned short extract_uhi_0_mem (vector unsigned short *p) { return vec_extract (*p, 0); } +unsigned short extract_uhi_7_mem (vector unsigned short *p) { return vec_extract (*p, 7); } +unsigned short extract_uhi_n_mem (vector unsigned short *p, int n) { return vec_extract (*p, n); } + +vector unsigned short insert_uhi_0 (vector unsigned short p, unsigned short x) { return vec_insert (x, p, 0); } +vector unsigned short insert_uhi_7 (vector unsigned short p, unsigned short x) { return vec_insert (x, p, 7); } +vector unsigned short insert_uhi_n (vector unsigned short p, unsigned short x, int n) { return vec_insert (x, p, n); } + +vector unsigned short splat_uhi_reg (unsigned short x) { return vec_splats (x); } +vector unsigned short splat_uhi_mem (unsigned short *x) { return vec_splats (*x); } + +signed char extract_qi_0_reg (vector signed char p) { return vec_extract (p, 0); } +signed char extract_qi_1_reg5 (vector signed char p) { return vec_extract (p, 15); } +signed char extract_qi_n_reg (vector signed char p, int n) { return vec_extract (p, n); } + +signed char extract_qi_0_mem (vector signed char *p) { return vec_extract (*p, 0); } +signed char extract_qi_1_mem5 (vector signed char *p) { return vec_extract (*p, 15); } +signed char extract_qi_n_mem (vector signed char *p, int n) { return vec_extract (*p, n); } + +vector signed char insert_qi_0 (vector signed char p, signed char x) { return vec_insert (x, p, 0); } +vector signed char insert_qi_15 (vector signed char p, signed char x) { return vec_insert (x, p, 15); } +vector signed char insert_qi_n (vector signed char p, signed char x, int n) { return vec_insert (x, p, n); } + +vector signed char splat_qi_reg (signed char x) { return vec_splats (x); } +vector signed char splat_qi_mem (signed char *x) { return vec_splats (*x); } + +unsigned char extract_uqi_0_reg (vector unsigned char p) { return vec_extract (p, 0); } +unsigned char extract_uqi_1_reg5 (vector unsigned char p) { return vec_extract (p, 15); } +unsigned char extract_uqi_n_reg (vector unsigned char p, int n) { return vec_extract (p, n); } + +unsigned char extract_uqi_0_mem (vector unsigned char *p) { return vec_extract (*p, 0); } +unsigned char extract_uqi_1_mem5 (vector unsigned char *p) { return vec_extract (*p, 15); } +unsigned char extract_uqi_n_mem (vector unsigned char *p, int n) { return vec_extract (*p, n); } + +vector unsigned char insert_uqi_0 (vector unsigned char p, unsigned char x) { return vec_insert (x, p, 0); } +vector unsigned char insert_uqi_15 (vector unsigned char p, unsigned char x) { return vec_insert (x, p, 15); } +vector unsigned char insert_uqi_n (vector unsigned char p, unsigned char x, int n) { return vec_insert (x, p, n); } + +vector unsigned char splat_uqi_reg (unsigned char x) { return vec_splats (x); } +vector unsigned char splat_uqi_mem (unsigned char *x) { return vec_splats (*x); } diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vector-1.c b/gcc/testsuite/gcc.target/powerpc/vsx-vector-1.c new file mode 100644 index 0000000..0bf3a7f --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vsx-vector-1.c @@ -0,0 +1,152 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -ftree-vectorize -mcpu=power7 -m64 -ffast-math" } */ +/* { dg-final { scan-assembler "xvadddp" } } */ +/* { dg-final { scan-assembler "xvsubdp" } } */ +/* { dg-final { scan-assembler "xvmuldp" } } */ +/* { dg-final { scan-assembler "xvdivdp" } } */ +/* { dg-final { scan-assembler "xvmadd" } } */ +/* { dg-final { scan-assembler "xvmsub" } } */ +/* { dg-final { scan-assembler "xvsqrtdp" } } */ +/* { dg-final { scan-assembler "xvcpsgndp" } } */ +/* { dg-final { scan-assembler "xvrdpim" } } */ +/* { dg-final { scan-assembler "xvrdpip" } } */ +/* { dg-final { scan-assembler "xvrdpiz" } } */ +/* { dg-final { scan-assembler "xvrdpic" } } */ +/* { dg-final { scan-assembler "xvrdpi " } } */ + +#ifndef SIZE +#define SIZE 1024 +#endif + +double a[SIZE] __attribute__((__aligned__(32))); +double b[SIZE] __attribute__((__aligned__(32))); +double c[SIZE] __attribute__((__aligned__(32))); +double d[SIZE] __attribute__((__aligned__(32))); +double e[SIZE] __attribute__((__aligned__(32))); + +void +vector_add (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = b[i] + c[i]; +} + +void +vector_subtract (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = b[i] - c[i]; +} + +void +vector_multiply (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = b[i] * c[i]; +} + +void +vector_multiply_add (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = (b[i] * c[i]) + d[i]; +} + +void +vector_multiply_subtract (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = (b[i] * c[i]) - d[i]; +} + +void +vector_divide (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = b[i] / c[i]; +} + +extern double sqrt (double); +extern double floor (double); +extern double ceil (double); +extern double trunc (double); +extern double nearbyint (double); +extern double rint (double); +extern double copysign (double, double); + +void +vector_sqrt (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = sqrt (b[i]); +} + +void +vector_floor (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = floor (b[i]); +} + +void +vector_ceil (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = ceil (b[i]); +} + +void +vector_trunc (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = trunc (b[i]); +} + +void +vector_nearbyint (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = nearbyint (b[i]); +} + +void +vector_rint (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = rint (b[i]); +} + +void +vector_copysign (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = copysign (b[i], c[i]); +} diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vector-2.c b/gcc/testsuite/gcc.target/powerpc/vsx-vector-2.c new file mode 100644 index 0000000..ba27b46 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vsx-vector-2.c @@ -0,0 +1,152 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -ftree-vectorize -mcpu=power7 -m64 -ffast-math" } */ +/* { dg-final { scan-assembler "xvaddsp" } } */ +/* { dg-final { scan-assembler "xvsubsp" } } */ +/* { dg-final { scan-assembler "xvmulsp" } } */ +/* { dg-final { scan-assembler "xvdivsp" } } */ +/* { dg-final { scan-assembler "xvmadd" } } */ +/* { dg-final { scan-assembler "xvmsub" } } */ +/* { dg-final { scan-assembler "xvsqrtsp" } } */ +/* { dg-final { scan-assembler "xvcpsgnsp" } } */ +/* { dg-final { scan-assembler "xvrspim" } } */ +/* { dg-final { scan-assembler "xvrspip" } } */ +/* { dg-final { scan-assembler "xvrspiz" } } */ +/* { dg-final { scan-assembler "xvrspic" } } */ +/* { dg-final { scan-assembler "xvrspi " } } */ + +#ifndef SIZE +#define SIZE 1024 +#endif + +float a[SIZE] __attribute__((__aligned__(32))); +float b[SIZE] __attribute__((__aligned__(32))); +float c[SIZE] __attribute__((__aligned__(32))); +float d[SIZE] __attribute__((__aligned__(32))); +float e[SIZE] __attribute__((__aligned__(32))); + +void +vector_add (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = b[i] + c[i]; +} + +void +vector_subtract (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = b[i] - c[i]; +} + +void +vector_multiply (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = b[i] * c[i]; +} + +void +vector_multiply_add (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = (b[i] * c[i]) + d[i]; +} + +void +vector_multiply_subtract (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = (b[i] * c[i]) - d[i]; +} + +void +vector_divide (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = b[i] / c[i]; +} + +extern float sqrtf (float); +extern float floorf (float); +extern float ceilf (float); +extern float truncf (float); +extern float nearbyintf (float); +extern float rintf (float); +extern float copysignf (float, float); + +void +vector_sqrt (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = sqrtf (b[i]); +} + +void +vector_floor (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = floorf (b[i]); +} + +void +vector_ceil (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = ceilf (b[i]); +} + +void +vector_trunc (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = truncf (b[i]); +} + +void +vector_nearbyint (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = nearbyintf (b[i]); +} + +void +vector_rint (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = rintf (b[i]); +} + +void +vector_copysign (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + a[i] = copysignf (b[i], c[i]); +} diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vector-3.c b/gcc/testsuite/gcc.target/powerpc/vsx-vector-3.c new file mode 100644 index 0000000..5f3bf5b --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vsx-vector-3.c @@ -0,0 +1,48 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -ftree-vectorize -mcpu=power7 -m64" } */ +/* { dg-final { scan-assembler "xvadddp" } } */ +/* { dg-final { scan-assembler "xvsubdp" } } */ +/* { dg-final { scan-assembler "xvmuldp" } } */ +/* { dg-final { scan-assembler "xvdivdp" } } */ +/* { dg-final { scan-assembler "xvmadd" } } */ +/* { dg-final { scan-assembler "xvmsub" } } */ + +__vector double a, b, c, d; + +void +vector_add (void) +{ + a = b + c; +} + +void +vector_subtract (void) +{ + a = b - c; +} + +void +vector_multiply (void) +{ + a = b * c; +} + +void +vector_multiply_add (void) +{ + a = (b * c) + d; +} + +void +vector_multiply_subtract (void) +{ + a = (b * c) - d; +} + +void +vector_divide (void) +{ + a = b / c; +} diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vector-4.c b/gcc/testsuite/gcc.target/powerpc/vsx-vector-4.c new file mode 100644 index 0000000..a34ba8f --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vsx-vector-4.c @@ -0,0 +1,48 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -ftree-vectorize -mcpu=power7 -m64" } */ +/* { dg-final { scan-assembler "xvaddsp" } } */ +/* { dg-final { scan-assembler "xvsubsp" } } */ +/* { dg-final { scan-assembler "xvmulsp" } } */ +/* { dg-final { scan-assembler "xvdivsp" } } */ +/* { dg-final { scan-assembler "xvmadd" } } */ +/* { dg-final { scan-assembler "xvmsub" } } */ + +__vector float a, b, c, d; + +void +vector_add (void) +{ + a = b + c; +} + +void +vector_subtract (void) +{ + a = b - c; +} + +void +vector_multiply (void) +{ + a = b * c; +} + +void +vector_multiply_add (void) +{ + a = (b * c) + d; +} + +void +vector_multiply_subtract (void) +{ + a = (b * c) - d; +} + +void +vector_divide (void) +{ + a = b / c; +} diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vector-5.c b/gcc/testsuite/gcc.target/powerpc/vsx-vector-5.c new file mode 100644 index 0000000..65843e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vsx-vector-5.c @@ -0,0 +1,392 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-mvsx -O2" } */ + +/* This will run, and someday we should add the support to test whether we are + running on VSX hardware. */ + +#include +#include + +#ifdef DEBUG +#include + +static int errors = 0; +#endif + +union args { + double scalar[2]; + vector double vect; +}; + +union largs { + unsigned long scalar[2]; + vector bool long vect; +}; + +static void +do_test (union args *expected, union args *got, const char *name) +{ + if (expected->scalar[0] != got->scalar[0] + || expected->scalar[1] != got->scalar[1]) + { +#ifdef DEBUG + printf ("%s failed!\n", name); + errors++; +#else + abort (); +#endif + } +} + +static void +do_ltest (union largs *expected, union largs *got, const char *name) +{ + if (expected->scalar[0] != got->scalar[0] + || expected->scalar[1] != got->scalar[1]) + { +#ifdef DEBUG + printf ("%s failed!\n", name); + errors++; +#else + abort (); +#endif + } +} + + +/* Vec functions taking a single argument. */ +static vector double +vabs (vector double arg) +{ + return vec_abs (arg); +} + +static vector double +vceil (vector double arg) +{ + return vec_ceil (arg); +} + +static vector double +vfloor (vector double arg) +{ + return vec_floor (arg); +} + +static vector double +vnearbyint (vector double arg) +{ + return vec_nearbyint (arg); +} + +static vector double +vrint (vector double arg) +{ + return vec_rint (arg); +} + +static vector double +vsqrt (vector double arg) +{ + return vec_sqrt (arg); +} + +/* Single argument tests. */ +static struct +{ + union args result; + union args input; + vector double (*func) (vector double); + const char *name; +} arg1_tests[] = { + /* result input function name */ + { { 1.0, 2.0 }, { -1.0, 2.0 }, vabs, "vabs" }, + { { 1.0, 2.0 }, { 1.0, -2.0 }, vabs, "vabs" }, + { { 2.0, 2.0 }, { 1.1, 1.7 }, vceil, "vceil" }, + { { -1.0, -1.0 }, { -1.1, -1.7 }, vceil, "vceil" }, + { { -1.0, 2.0 }, { -1.5, 1.5 }, vceil, "vceil" }, + { { 1.0, 1.0 }, { 1.1, 1.7 }, vfloor, "vfloor" }, + { { -2.0, -2.0 }, { -1.1, -1.7 }, vfloor, "vfloor" }, + { { -2.0, 1.0 }, { -1.5, 1.5 }, vfloor, "vfloor" }, + { { 1.0, 2.0 }, { 1.1, 1.7 }, vnearbyint, "vnearbyint" }, + { { -1.0, -2.0 }, { -1.1, -1.7 }, vnearbyint, "vnearbyint" }, + { { -2.0, 2.0 }, { -1.5, 1.5 }, vnearbyint, "vnearbyint" }, + { { 1.0, 2.0 }, { 1.1, 1.7 }, vrint, "vrint" }, + { { -1.0, -2.0 }, { -1.1, -1.7 }, vrint, "vrint" }, + { { -2.0, 2.0 }, { -1.5, 1.5 }, vrint, "vrint" }, + { { 2.0, 4.0 }, { 4.0, 16.0 }, vsqrt, "vsqrt" }, +}; + +static void +test_arg1 (void) +{ + unsigned i; + +#ifdef DEBUG + printf ("Single argument tests:\n"); +#endif + + for (i = 0; i < sizeof (arg1_tests) / sizeof (arg1_tests[0]); i++) + { + union args u; + u.vect = arg1_tests[i].func (arg1_tests[i].input.vect); + +#ifdef DEBUG + printf ("test %-16s: expected { %4g, %4g }, got { %4g, %4g }, input { %4g, %4g }\n", + arg1_tests[i].name, + arg1_tests[i].result.scalar[0], + arg1_tests[i].result.scalar[1], + u.scalar[0], + u.scalar[1], + arg1_tests[i].input.scalar[0], + arg1_tests[i].input.scalar[1]); +#endif + + do_test (&arg1_tests[i].result, &u, arg1_tests[i].name); + } + + return; +} + + +/* Vect functions taking 2 arguments. */ +static vector double +vadd (vector double arg1, vector double arg2) +{ + return vec_add (arg1, arg2); +} + +static vector double +vadd2 (vector double arg1, vector double arg2) +{ + return arg1 + arg2; +} + +static vector double +vsub (vector double arg1, vector double arg2) +{ + return vec_sub (arg1, arg2); +} + +static vector double +vsub2 (vector double arg1, vector double arg2) +{ + return arg1 - arg2; +} + +static vector double +vmul (vector double arg1, vector double arg2) +{ + return vec_mul (arg1, arg2); +} + +static vector double +vmul2 (vector double arg1, vector double arg2) +{ + return arg1 * arg2; +} + +static vector double +vdiv (vector double arg1, vector double arg2) +{ + return vec_div (arg1, arg2); +} + +static vector double +vdiv2 (vector double arg1, vector double arg2) +{ + return arg1 / arg2; +} + +static vector double +vmax (vector double arg1, vector double arg2) +{ + return vec_max (arg1, arg2); +} + +static vector double +vmin (vector double arg1, vector double arg2) +{ + return vec_min (arg1, arg2); +} + +/* 2 argument tests. */ +static struct +{ + union args result; + union args input[2]; + vector double (*func) (vector double, vector double); + const char *name; +} arg2_tests[] = { + /* result */ + { { 4.0, 6.0 }, { { 1.0, 2.0 }, { 3.0, 4.0 } }, vadd, "vadd" }, + { { 4.0, -6.0 }, { { 1.0, -2.0 }, { 3.0, -4.0 } }, vadd, "vadd" }, + { { 4.0, 6.0 }, { { 1.0, 2.0 }, { 3.0, 4.0 } }, vadd2, "vadd2" }, + { { 4.0, -6.0 }, { { 1.0, -2.0 }, { 3.0, -4.0 } }, vadd2, "vadd2" }, + { { -2.0, -2.0 }, { { 1.0, 2.0 }, { 3.0, 4.0 } }, vsub, "vsub" }, + { { -2.0, 2.0 }, { { 1.0, -2.0 }, { 3.0, -4.0 } }, vsub, "vsub" }, + { { -2.0, -2.0 }, { { 1.0, 2.0 }, { 3.0, 4.0 } }, vsub2, "vsub2" }, + { { -2.0, 2.0 }, { { 1.0, -2.0 }, { 3.0, -4.0 } }, vsub2, "vsub2" }, + { { 6.0, 4.0 }, { { 2.0, 8.0 }, { 3.0, 0.5 } }, vmul, "vmul" }, + { { 6.0, 4.0 }, { { 2.0, 8.0 }, { 3.0, 0.5 } }, vmul2, "vmul2" }, + { { 2.0, 0.5 }, { { 6.0, 4.0 }, { 3.0, 8.0 } }, vdiv, "vdiv" }, + { { 2.0, 0.5 }, { { 6.0, 4.0 }, { 3.0, 8.0 } }, vdiv2, "vdiv2" }, + { { 3.0, 4.0 }, { { 1.0, 2.0 }, { 3.0, 4.0 } }, vmax, "vmax" }, + { { 1.0, 4.0 }, { { 1.0, -2.0 }, { -3.0, 4.0 } }, vmax, "vmax" }, + { { 1.0, 2.0 }, { { 1.0, 2.0 }, { 3.0, 4.0 } }, vmin, "vmin" }, + { { -3.0, -2.0 }, { { 1.0, -2.0 }, { -3.0, 4.0 } }, vmin, "vmin" }, +}; + +static void +test_arg2 (void) +{ + unsigned i; + +#ifdef DEBUG + printf ("\nTwo argument tests:\n"); +#endif + + for (i = 0; i < sizeof (arg2_tests) / sizeof (arg2_tests[0]); i++) + { + union args u; + u.vect = arg2_tests[i].func (arg2_tests[i].input[0].vect, + arg2_tests[i].input[1].vect); + +#ifdef DEBUG + printf ("test %-16s: expected { %4g, %4g }, got { %4g, %4g }, input { %4g, %4g }, { %4g, %4g }\n", + arg2_tests[i].name, + arg2_tests[i].result.scalar[0], + arg2_tests[i].result.scalar[1], + u.scalar[0], + u.scalar[1], + arg2_tests[i].input[0].scalar[0], + arg2_tests[i].input[0].scalar[1], + arg2_tests[i].input[1].scalar[0], + arg2_tests[i].input[1].scalar[1]); +#endif + + do_test (&arg2_tests[i].result, &u, arg2_tests[i].name); + } + + return; +} + + +/* Comparisons, returnning a boolean vector. */ +static vector bool long +vcmpeq (vector double arg1, vector double arg2) +{ + return vec_cmpeq (arg1, arg2); +} + +static vector bool long +vcmplt (vector double arg1, vector double arg2) +{ + return vec_cmplt (arg1, arg2); +} + +static vector bool long +vcmple (vector double arg1, vector double arg2) +{ + return vec_cmple (arg1, arg2); +} + +static vector bool long +vcmpgt (vector double arg1, vector double arg2) +{ + return vec_cmpgt (arg1, arg2); +} + +static vector bool long +vcmpge (vector double arg1, vector double arg2) +{ + return vec_cmpge (arg1, arg2); +} + +#define ONE 0xffffffffffffffffUL +#define ZERO 0x0000000000000000UL + +/* comparison tests. */ +static struct +{ + union largs result; + union args input[2]; + vector bool long (*func) (vector double, vector double); + const char *name; +} argcmp_tests[] = { + { { ONE, ZERO }, { { 1.0, 2.0 }, { 1.0, -2.0 } }, vcmpeq, "vcmpeq" }, + { { ZERO, ONE }, { { -1.0, 2.0 }, { 1.0, 2.0 } }, vcmpeq, "vcmpeq" }, + + { { ONE, ONE }, { { 1.0, -2.0 }, { 1.0, -2.0 } }, vcmple, "vcmple" }, + { { ONE, ONE }, { { 1.0, -2.0 }, { 2.0, -1.0 } }, vcmple, "vcmple" }, + { { ZERO, ZERO }, { { 2.0, -1.0 }, { 1.0, -2.0 } }, vcmple, "vcmple" }, + + { { ZERO, ZERO }, { { 1.0, -2.0 }, { 1.0, -2.0 } }, vcmplt, "vcmplt" }, + { { ONE, ONE }, { { 1.0, -2.0 }, { 2.0, -1.0 } }, vcmplt, "vcmplt" }, + { { ZERO, ZERO }, { { 2.0, -1.0 }, { 1.0, -2.0 } }, vcmplt, "vcmplt" }, + + { { ZERO, ZERO }, { { 1.0, -2.0 }, { 1.0, -2.0 } }, vcmpgt, "vcmpgt" }, + { { ZERO, ZERO }, { { 1.0, -2.0 }, { 2.0, -1.0 } }, vcmpgt, "vcmpgt" }, + { { ONE, ONE }, { { 2.0, -1.0 }, { 1.0, -2.0 } }, vcmpgt, "vcmpgt" }, + + { { ONE, ONE }, { { 1.0, -2.0 }, { 1.0, -2.0 } }, vcmpge, "vcmpge" }, + { { ZERO, ZERO }, { { 1.0, -2.0 }, { 2.0, -1.0 } }, vcmpge, "vcmpge" }, + { { ONE, ONE }, { { 2.0, -1.0 }, { 1.0, -2.0 } }, vcmpge, "vcmpge" }, +}; + +static void +test_argcmp (void) +{ + unsigned i; + +#ifdef DEBUG + printf ("\nComparison tests:\n"); +#endif + + for (i = 0; i < sizeof (argcmp_tests) / sizeof (argcmp_tests[0]); i++) + { + union largs u; + u.vect = argcmp_tests[i].func (argcmp_tests[i].input[0].vect, + argcmp_tests[i].input[1].vect); + +#ifdef DEBUG + printf ("test %-16s: expected { 0x%016lx, 0x%016lx }, got { 0x%016lx, 0x%016lx }, input { %4g, %4g }, { %4g, %4g }\n", + argcmp_tests[i].name, + argcmp_tests[i].result.scalar[0], + argcmp_tests[i].result.scalar[1], + u.scalar[0], + u.scalar[1], + argcmp_tests[i].input[0].scalar[0], + argcmp_tests[i].input[0].scalar[1], + argcmp_tests[i].input[1].scalar[0], + argcmp_tests[i].input[1].scalar[1]); +#endif + + do_ltest (&argcmp_tests[i].result, &u, argcmp_tests[i].name); + } + + return; +} + + +int +main (int argc, char *argv[]) +{ + test_arg1 (); + test_arg2 (); + test_argcmp (); + +#ifdef DEBUG + if (errors) + { + printf ("There were %d error(s)\n", errors); + return errors; + } + else + printf ("There were no errors\n"); +#endif + + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vector-6.c b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6.c new file mode 100644 index 0000000..f8e644b --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6.c @@ -0,0 +1,81 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-mvsx -O2" } */ + +#include + +void foo (vector double *out, vector double *in, vector long *p_l, vector bool long *p_b, vector unsigned char *p_uc, int *i) +{ + vector double in0 = in[0]; + vector double in1 = in[1]; + vector double in2 = in[2]; + vector long inl = *p_l; + vector bool long inb = *p_b; + vector unsigned char uc = *p_uc; + + *out++ = vec_abs (in0); + *out++ = vec_add (in0, in1); + *out++ = vec_and (in0, in1); + *out++ = vec_and (in0, inb); + *out++ = vec_and (inb, in0); + *out++ = vec_andc (in0, in1); + *out++ = vec_andc (in0, inb); + *out++ = vec_andc (inb, in0); + *out++ = vec_ceil (in0); + *p_b++ = vec_cmpeq (in0, in1); + *p_b++ = vec_cmpgt (in0, in1); + *p_b++ = vec_cmpge (in0, in1); + *p_b++ = vec_cmplt (in0, in1); + *p_b++ = vec_cmple (in0, in1); + *out++ = vec_div (in0, in1); + *out++ = vec_floor (in0); + *out++ = vec_madd (in0, in1, in2); + *out++ = vec_msub (in0, in1, in2); + *out++ = vec_max (in0, in1); + *out++ = vec_min (in0, in1); + *out++ = vec_msub (in0, in1, in2); + *out++ = vec_mul (in0, in1); + *out++ = vec_nearbyint (in0); + *out++ = vec_nmadd (in0, in1, in2); + *out++ = vec_nmsub (in0, in1, in2); + *out++ = vec_nor (in0, in1); + *out++ = vec_or (in0, in1); + *out++ = vec_or (in0, inb); + *out++ = vec_or (inb, in0); + *out++ = vec_perm (in0, in1, uc); + *out++ = vec_rint (in0); + *out++ = vec_sel (in0, in1, inl); + *out++ = vec_sel (in0, in1, inb); + *out++ = vec_sub (in0, in1); + *out++ = vec_sqrt (in0); + *out++ = vec_trunc (in0); + *out++ = vec_xor (in0, in1); + *out++ = vec_xor (in0, inb); + *out++ = vec_xor (inb, in0); + + *i++ = vec_all_eq (in0, in1); + *i++ = vec_all_ge (in0, in1); + *i++ = vec_all_gt (in0, in1); + *i++ = vec_all_le (in0, in1); + *i++ = vec_all_lt (in0, in1); + *i++ = vec_all_nan (in0); + *i++ = vec_all_ne (in0, in1); + *i++ = vec_all_nge (in0, in1); + *i++ = vec_all_ngt (in0, in1); + *i++ = vec_all_nle (in0, in1); + *i++ = vec_all_nlt (in0, in1); + *i++ = vec_all_numeric (in0); + *i++ = vec_any_eq (in0, in1); + *i++ = vec_any_ge (in0, in1); + *i++ = vec_any_gt (in0, in1); + *i++ = vec_any_le (in0, in1); + *i++ = vec_any_lt (in0, in1); + *i++ = vec_any_nan (in0); + *i++ = vec_any_ne (in0, in1); + *i++ = vec_any_nge (in0, in1); + *i++ = vec_any_ngt (in0, in1); + *i++ = vec_any_nle (in0, in1); + *i++ = vec_any_nlt (in0, in1); + *i++ = vec_any_numeric (in0); +} diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index c847de0..050292b 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -902,6 +902,32 @@ proc check_sse2_hw_available { } { }] } +# Return 1 if the target supports executing VSX instructions, 0 +# otherwise. Cache the result. + +proc check_vsx_hw_available { } { + return [check_cached_effective_target vsx_hw_available { + # Some simulators are known to not support VSX instructions. + # For now, disable on Darwin + if { [istarget powerpc-*-eabi] || [istarget powerpc*-*-eabispe] || [istarget *-*-darwin*]} { + expr 0 + } else { + set options "-mvsx" + check_runtime_nocache vsx_hw_available { + int main() + { + #ifdef __MACH__ + asm volatile ("xxlor vs0,vs0,vs0"); + #else + asm volatile ("xxlor 0,0,0"); + #endif + return 0; + } + } $options + } + }] +} + # Return 1 if the target supports executing AltiVec instructions, 0 # otherwise. Cache the result. @@ -912,12 +938,13 @@ proc check_vmx_hw_available { } { expr 0 } else { # Most targets don't require special flags for this test case, but - # Darwin does. + # Darwin does. Just to be sure, make sure VSX is not enabled for + # the altivec tests. if { [istarget *-*-darwin*] || [istarget *-*-aix*] } { - set options "-maltivec" + set options "-maltivec -mno-vsx" } else { - set options "" + set options "-mno-vsx" } check_runtime_nocache vmx_hw_available { int main() @@ -1632,6 +1659,33 @@ proc check_effective_target_powerpc_altivec_ok { } { } } +# Return 1 if this is a PowerPC target supporting -mvsx + +proc check_effective_target_powerpc_vsx_ok { } { + if { ([istarget powerpc*-*-*] + && ![istarget powerpc-*-linux*paired*]) + || [istarget rs6000-*-*] } { + # AltiVec is not supported on AIX before 5.3. + if { [istarget powerpc*-*-aix4*] + || [istarget powerpc*-*-aix5.1*] + || [istarget powerpc*-*-aix5.2*] } { + return 0 + } + return [check_no_compiler_messages powerpc_vsx_ok object { + int main (void) { +#ifdef __MACH__ + asm volatile ("xxlor vs0,vs0,vs0"); +#else + asm volatile ("xxlor 0,0,0"); +#endif + return 0; + } + } "-mvsx"] + } else { + return 0 + } +} + # Return 1 if this is a PowerPC target supporting -mcpu=cell. proc check_effective_target_powerpc_ppu_ok { } { -- 2.7.4