From: uweigand Date: Fri, 13 Jul 2007 18:31:08 +0000 (+0000) Subject: 2007-07-13 Sa Liu X-Git-Tag: upstream/4.9.2~47463 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=5474166e273d10ef8a339801e6bcdf9882c3ab81;p=platform%2Fupstream%2Flinaro-gcc.git 2007-07-13 Sa Liu * config.gcc: Add options for arch and tune on SPU. * config/spu/predicates.md: Add constant operands 0 and 1. * config/spu/spu-builtins.def: Add builtins for double precision floating point comparison: si_dfceq, si_dfcmeq, si_dfcgt, si_dfcmgt, si_dftsv, spu_cmpeq_13, spu_cmpabseq_1, spu_cmpgt_13, spu_cmpabsgt_1, spu_testsv. * config/spu/spu-c.c: Define __SPU_EDP__ when builtins invoked with a CELLEDP target. * config/spu/spu-protos.h: Add new function prototypes. * config/spu/spu.c (spu_override_options): Check options -march and -mtune. (spu_comp_icode): Add comparison code for DFmode and vector mode. (spu_emit_branch_or_set): Use the new code for DFmode and vector mode comparison. (spu_const_from_int): New. Create a vector constant from 4 ints. (get_vec_cmp_insn): New. Get insn index of vector compare instruction. (spu_emit_vector_compare): New. Emit vector compare. (spu_emit_vector_cond_expr): New. Emit vector conditional expression. * config/spu/spu.h: Add options -march and -mtune. Define processor types PROCESSOR_CELL and PROCESSOR_CELLEDP. Define macro CANONICALIZE_COMPARISON. * config/spu/spu.md: Add new insns for double precision compare and double precision vector compare. Add vcond and smax/smin patterns to enable DFmode vector conditional expression. * config/spu/spu.opt: Add options -march and -mtune. * config/spu/spu_internals.h: Add builtins for CELLEDP target: si_dfceq, si_dfcmeq, si_dfcgt, si_dfcmgt, si_dftsv. Add builtin for both CELL and CELLEDP targets: spu_testsv. * config/spu/spu_intrinsics.h: Add flag mnemonics for test special values. testsuite/ * gcc.dg/vect/fast-math-vect-reduc-7.c: Switch on test for V2DFmode vector conditional expression. * gcc.target/spu/dfcmeq.c: New. Test combination of abs and dfceq patterns. * gcc.target/spu/dfcmgt.c: New. Test combination of abs and dfcgt patterns. * gcc.target/spu/intrinsics-2.c: New. Test intrinsics for V2DFmode comparison and test special values. * lib/target-supports.exp: Switch on test for V2DFmode vector conditional expression. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@126626 138bc75d-0d04-0410-961f-82ee72b054a4 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5c8b8be..6aeca7e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,36 @@ +2007-07-13 Sa Liu + + * config.gcc: Add options for arch and tune on SPU. + * config/spu/predicates.md: Add constant operands 0 and 1. + * config/spu/spu-builtins.def: Add builtins for double precision + floating point comparison: si_dfceq, si_dfcmeq, si_dfcgt, si_dfcmgt, + si_dftsv, spu_cmpeq_13, spu_cmpabseq_1, spu_cmpgt_13, spu_cmpabsgt_1, + spu_testsv. + * config/spu/spu-c.c: Define __SPU_EDP__ when builtins invoked with + a CELLEDP target. + * config/spu/spu-protos.h: Add new function prototypes. + * config/spu/spu.c (spu_override_options): Check options -march and + -mtune. + (spu_comp_icode): Add comparison code for DFmode and vector mode. + (spu_emit_branch_or_set): Use the new code for DFmode and vector + mode comparison. + (spu_const_from_int): New. Create a vector constant from 4 ints. + (get_vec_cmp_insn): New. Get insn index of vector compare instruction. + (spu_emit_vector_compare): New. Emit vector compare. + (spu_emit_vector_cond_expr): New. Emit vector conditional expression. + * config/spu/spu.h: Add options -march and -mtune. Define processor + types PROCESSOR_CELL and PROCESSOR_CELLEDP. Define macro + CANONICALIZE_COMPARISON. + * config/spu/spu.md: Add new insns for double precision compare + and double precision vector compare. Add vcond and smax/smin patterns + to enable DFmode vector conditional expression. + * config/spu/spu.opt: Add options -march and -mtune. + * config/spu/spu_internals.h: Add builtins for CELLEDP target: + si_dfceq, si_dfcmeq, si_dfcgt, si_dfcmgt, si_dftsv. Add builtin for + both CELL and CELLEDP targets: spu_testsv. + * config/spu/spu_intrinsics.h: Add flag mnemonics for test special + values. + 2007-07-13 Richard Guenther PR tree-optimization/32721 diff --git a/gcc/config.gcc b/gcc/config.gcc index 085222e..f13d7db 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -3142,6 +3142,23 @@ case "${target}" in esac ;; + spu-*-*) + supported_defaults="arch tune" + + for which in arch tune; do + eval "val=\$with_$which" + case ${val} in + "" | cell | celledp) + # OK + ;; + *) + echo "Unknown cpu used in --with-$which=$val." 1>&2 + exit 1 + ;; + esac + done + ;; + v850*-*-*) supported_defaults=cpu case ${with_cpu} in diff --git a/gcc/config/spu/predicates.md b/gcc/config/spu/predicates.md index 8b31e65..74659c3 100644 --- a/gcc/config/spu/predicates.md +++ b/gcc/config/spu/predicates.md @@ -16,6 +16,15 @@ ;; Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA ;; 02110-1301, USA. +;; Return 1 if operand is constant zero of its mode +(define_predicate "const_zero_operand" + (and (match_code "const_int,const,const_double,const_vector") + (match_test "op == CONST0_RTX (mode)"))) + +(define_predicate "const_one_operand" + (and (match_code "const_int,const,const_double,const_vector") + (match_test "op == CONST1_RTX (mode)"))) + (define_predicate "spu_reg_operand" (and (match_operand 0 "register_operand") (ior (not (match_code "subreg")) diff --git a/gcc/config/spu/spu-builtins.def b/gcc/config/spu/spu-builtins.def index 6ae382f..9ab1b5d 100644 --- a/gcc/config/spu/spu-builtins.def +++ b/gcc/config/spu/spu-builtins.def @@ -189,9 +189,14 @@ DEF_BUILTIN (SI_CFLTU, CODE_FOR_spu_cfltu, "si_cfltu", B_INSN, DEF_BUILTIN (SI_FRDS, CODE_FOR_spu_frds, "si_frds", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) DEF_BUILTIN (SI_FESD, CODE_FOR_spu_fesd, "si_fesd", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) DEF_BUILTIN (SI_FCEQ, CODE_FOR_ceq_v4sf, "si_fceq", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_DFCEQ, CODE_FOR_ceq_v2df, "si_dfceq", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) DEF_BUILTIN (SI_FCMEQ, CODE_FOR_cmeq_v4sf, "si_fcmeq", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_DFCMEQ, CODE_FOR_cmeq_v2df, "si_dfcmeq", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) DEF_BUILTIN (SI_FCGT, CODE_FOR_cgt_v4sf, "si_fcgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_DFCGT, CODE_FOR_cgt_v2df, "si_dfcgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) DEF_BUILTIN (SI_FCMGT, CODE_FOR_cmgt_v4sf, "si_fcmgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_DFCMGT, CODE_FOR_cmgt_v2df, "si_dfcmgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_DFTSV, CODE_FOR_dftsv, "si_dftsv", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U7)) DEF_BUILTIN (SI_STOP, CODE_FOR_spu_stop, "si_stop", B_INSN, _A2(SPU_BTI_VOID, SPU_BTI_U14)) DEF_BUILTIN (SI_STOPD, CODE_FOR_spu_stopd, "si_stopd", B_INSN, _A4(SPU_BTI_VOID, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) DEF_BUILTIN (SI_LNOP, CODE_FOR_lnop, "si_lnop", B_INSN, _A1(SPU_BTI_VOID)) @@ -245,11 +250,10 @@ DEF_BUILTIN (SPU_SUMB, CODE_FOR_spu_sumb, "spu_sumb", B_INSN, DEF_BUILTIN (SPU_BISLED, CODE_FOR_spu_bisled, "spu_bisled", B_BISLED, _A3(SPU_BTI_VOID, SPU_BTI_PTR, SPU_BTI_PTR)) DEF_BUILTIN (SPU_BISLED_D, CODE_FOR_spu_bisledd, "spu_bisled_d", B_BISLED, _A3(SPU_BTI_VOID, SPU_BTI_PTR, SPU_BTI_PTR)) DEF_BUILTIN (SPU_BISLED_E, CODE_FOR_spu_bislede, "spu_bisled_e", B_BISLED, _A3(SPU_BTI_VOID, SPU_BTI_PTR, SPU_BTI_PTR)) -DEF_BUILTIN (SPU_CMPABSEQ, CODE_FOR_cmeq_v4sf, "spu_cmpabseq", B_INSN, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF)) -DEF_BUILTIN (SPU_CMPABSGT, CODE_FOR_cmgt_v4sf, "spu_cmpabsgt", B_INSN, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF)) DEF_BUILTIN (SPU_IDISABLE, CODE_FOR_spu_idisable, "spu_idisable", B_INSN, _A1(SPU_BTI_VOID)) DEF_BUILTIN (SPU_IENABLE, CODE_FOR_spu_ienable, "spu_ienable", B_INSN, _A1(SPU_BTI_VOID)) DEF_BUILTIN (SPU_MASK_FOR_LOAD, CODE_FOR_spu_lvsr, "spu_lvsr", B_INSN, _A2(SPU_BTI_V16QI, SPU_BTI_PTR)) +DEF_BUILTIN (SPU_TESTSV, CODE_FOR_dftsv, "spu_testsv", B_INSN, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_U7)) /* definitions to support overloaded generic builtin functions: */ @@ -339,6 +343,10 @@ DEF_BUILTIN (SPU_CMPEQ_9, CODE_FOR_ceq_v8hi, "spu_cmpeq_9", DEF_BUILTIN (SPU_CMPEQ_10, CODE_FOR_ceq_v8hi, "spu_cmpeq_10", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_V8HI, SPU_BTI_INTHI)) DEF_BUILTIN (SPU_CMPEQ_11, CODE_FOR_ceq_v4si, "spu_cmpeq_11", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI)) DEF_BUILTIN (SPU_CMPEQ_12, CODE_FOR_ceq_v4si, "spu_cmpeq_12", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_CMPEQ_13, CODE_FOR_ceq_v2df, "spu_cmpeq_13", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_CMPABSEQ, CODE_FOR_nothing, "spu_cmpabseq", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_CMPABSEQ_0, CODE_FOR_cmeq_v4sf, "spu_cmpabseq_0", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_CMPABSEQ_1, CODE_FOR_cmeq_v2df, "spu_cmpabseq_1", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF)) DEF_BUILTIN (SPU_CMPGT, CODE_FOR_nothing, "spu_cmpgt", B_OVERLOAD, _A1(SPU_BTI_VOID)) DEF_BUILTIN (SPU_CMPGT_0, CODE_FOR_clgt_v16qi, "spu_cmpgt_0", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI)) DEF_BUILTIN (SPU_CMPGT_1, CODE_FOR_cgt_v16qi, "spu_cmpgt_1", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_V16QI, SPU_BTI_V16QI)) @@ -353,6 +361,10 @@ DEF_BUILTIN (SPU_CMPGT_9, CODE_FOR_clgt_v8hi, "spu_cmpgt_9", DEF_BUILTIN (SPU_CMPGT_10, CODE_FOR_cgt_v8hi, "spu_cmpgt_10", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_V8HI, SPU_BTI_INTHI)) DEF_BUILTIN (SPU_CMPGT_11, CODE_FOR_cgt_v4si, "spu_cmpgt_11", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SI, SPU_BTI_INTSI)) DEF_BUILTIN (SPU_CMPGT_12, CODE_FOR_clgt_v4si, "spu_cmpgt_12", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_CMPGT_13, CODE_FOR_cgt_v2df, "spu_cmpgt_13", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_CMPABSGT, CODE_FOR_nothing, "spu_cmpabsgt", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_CMPABSGT_0, CODE_FOR_cmgt_v4sf, "spu_cmpabsgt_0", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_CMPABSGT_1, CODE_FOR_cmgt_v2df, "spu_cmpabsgt_1", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF)) DEF_BUILTIN (SPU_HCMPEQ, CODE_FOR_nothing, "spu_hcmpeq", B_OVERLOAD, _A1(SPU_BTI_VOID)) DEF_BUILTIN (SPU_HCMPEQ_0, CODE_FOR_spu_heq, "spu_hcmpeq_0", B_INTERNAL, _A3(SPU_BTI_VOID, SPU_BTI_INTSI, SPU_BTI_INTSI)) DEF_BUILTIN (SPU_HCMPEQ_1, CODE_FOR_spu_heq, "spu_hcmpeq_1", B_INTERNAL, _A3(SPU_BTI_VOID, SPU_BTI_UINTSI, SPU_BTI_UINTSI)) diff --git a/gcc/config/spu/spu-c.c b/gcc/config/spu/spu-c.c index 43d5d16..56ddefb 100644 --- a/gcc/config/spu/spu-c.c +++ b/gcc/config/spu/spu-c.c @@ -138,6 +138,8 @@ spu_cpu_cpp_builtins (struct cpp_reader *pfile) builtin_define_std ("__SPU__"); cpp_assert (pfile, "cpu=spu"); cpp_assert (pfile, "machine=spu"); + if (spu_arch == PROCESSOR_CELLEDP) + builtin_define_std ("__SPU_EDP__"); builtin_define_std ("__vector=__attribute__((__spu_vector__))"); } diff --git a/gcc/config/spu/spu-protos.h b/gcc/config/spu/spu-protos.h index 4caaf1b..d069536 100644 --- a/gcc/config/spu/spu-protos.h +++ b/gcc/config/spu/spu-protos.h @@ -32,6 +32,7 @@ extern void spu_expand_insv (rtx * ops); extern int spu_expand_block_move (rtx * ops); extern void spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx * operands); +extern int spu_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx); extern HOST_WIDE_INT const_double_to_hwint (rtx x); extern rtx hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v); extern void print_operand_address (FILE * file, register rtx addr); @@ -43,6 +44,8 @@ extern void spu_expand_prologue (void); extern void spu_expand_epilogue (unsigned char sibcall_p); extern rtx spu_return_addr (int count, rtx frame); extern rtx spu_const (enum machine_mode mode, HOST_WIDE_INT val); +extern rtx spu_const_from_ints (enum machine_mode mode, + int a, int b, int c, int d); extern struct rtx_def *spu_float_const (const char *string, enum machine_mode mode); extern int immediate_load_p (rtx op, enum machine_mode mode); diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c index f963268..e283d87 100644 --- a/gcc/config/spu/spu.c +++ b/gcc/config/spu/spu.c @@ -95,6 +95,8 @@ static void emit_nop_for_insn (rtx insn); static bool insn_clobbers_hbr (rtx insn); static void spu_emit_branch_hint (rtx before, rtx branch, rtx target, int distance); +static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1, + enum machine_mode dmode); static rtx get_branch_target (rtx branch); static void insert_branch_hints (void); static void insert_nops (void); @@ -138,6 +140,11 @@ static int spu_builtin_vectorization_cost (bool); extern const char *reg_names[]; rtx spu_compare_op0, spu_compare_op1; +/* Which instruction set architecture to use. */ +int spu_arch; +/* Which cpu are we tuning for. */ +int spu_tune; + enum spu_immediate { SPU_NONE, SPU_IL, @@ -298,6 +305,28 @@ spu_override_options (void) if (spu_fixed_range_string) fix_range (spu_fixed_range_string); + + /* Determine processor architectural level. */ + if (spu_arch_string) + { + if (strcmp (&spu_arch_string[0], "cell") == 0) + spu_arch = PROCESSOR_CELL; + else if (strcmp (&spu_arch_string[0], "celledp") == 0) + spu_arch = PROCESSOR_CELLEDP; + else + error ("Unknown architecture '%s'", &spu_arch_string[0]); + } + + /* Determine processor to tune for. */ + if (spu_tune_string) + { + if (strcmp (&spu_tune_string[0], "cell") == 0) + spu_tune = PROCESSOR_CELL; + else if (strcmp (&spu_tune_string[0], "celledp") == 0) + spu_tune = PROCESSOR_CELLEDP; + else + error ("Unknown architecture '%s'", &spu_tune_string[0]); + } } /* Handle an attribute requiring a FUNCTION_DECL; arguments as in @@ -646,16 +675,19 @@ spu_expand_block_move (rtx ops[]) enum spu_comp_code { SPU_EQ, SPU_GT, SPU_GTU }; - -int spu_comp_icode[8][3] = { - {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi}, - {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi}, - {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si}, - {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di}, - {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti}, - {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0}, - {0, 0, 0}, - {CODE_FOR_ceq_vec, 0, 0}, +int spu_comp_icode[12][3] = { + {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi}, + {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi}, + {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si}, + {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di}, + {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti}, + {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0}, + {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0}, + {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi}, + {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi}, + {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si}, + {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0}, + {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0}, }; /* Generate a compare for CODE. Return a brand-new rtx that represents @@ -786,13 +818,26 @@ spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[]) index = 6; break; case V16QImode: + index = 7; + comp_mode = op_mode; + break; case V8HImode: + index = 8; + comp_mode = op_mode; + break; case V4SImode: - case V2DImode: + index = 9; + comp_mode = op_mode; + break; case V4SFmode: + index = 10; + comp_mode = V4SImode; + break; case V2DFmode: - index = 7; + index = 11; + comp_mode = V2DImode; break; + case V2DImode: default: abort (); } @@ -800,16 +845,19 @@ spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[]) if (GET_MODE (spu_compare_op1) == DFmode) { rtx reg = gen_reg_rtx (DFmode); - if (!flag_unsafe_math_optimizations + if ((!flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL) || (scode != SPU_GT && scode != SPU_EQ)) abort (); - if (reverse_compare) - emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0)); - else - emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1)); - reverse_compare = 0; - spu_compare_op0 = reg; - spu_compare_op1 = CONST0_RTX (DFmode); + if (spu_arch == PROCESSOR_CELL) + { + if (reverse_compare) + emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0)); + else + emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1)); + reverse_compare = 0; + spu_compare_op0 = reg; + spu_compare_op1 = CONST0_RTX (DFmode); + } } if (is_set == 0 && spu_compare_op1 == const0_rtx @@ -1884,6 +1932,30 @@ spu_const (enum machine_mode mode, HOST_WIDE_INT val) size.) */ int spu_hint_dist = (8 * 4); +/* Create a MODE vector constant from 4 ints. */ +rtx +spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d) +{ + unsigned char arr[16]; + arr[0] = (a >> 24) & 0xff; + arr[1] = (a >> 16) & 0xff; + arr[2] = (a >> 8) & 0xff; + arr[3] = (a >> 0) & 0xff; + arr[4] = (b >> 24) & 0xff; + arr[5] = (b >> 16) & 0xff; + arr[6] = (b >> 8) & 0xff; + arr[7] = (b >> 0) & 0xff; + arr[8] = (c >> 24) & 0xff; + arr[9] = (c >> 16) & 0xff; + arr[10] = (c >> 8) & 0xff; + arr[11] = (c >> 0) & 0xff; + arr[12] = (d >> 24) & 0xff; + arr[13] = (d >> 16) & 0xff; + arr[14] = (d >> 8) & 0xff; + arr[15] = (d >> 0) & 0xff; + return array_to_constant(mode, arr); +} + /* An array of these is used to propagate hints to predecessor blocks. */ struct spu_bb_info { @@ -4857,6 +4929,201 @@ spu_expand_vector_init (rtx target, rtx vals) } } +/* Return insn index for the vector compare instruction for given CODE, + and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */ + +static int +get_vec_cmp_insn (enum rtx_code code, + enum machine_mode dest_mode, + enum machine_mode op_mode) + +{ + switch (code) + { + case EQ: + if (dest_mode == V16QImode && op_mode == V16QImode) + return CODE_FOR_ceq_v16qi; + if (dest_mode == V8HImode && op_mode == V8HImode) + return CODE_FOR_ceq_v8hi; + if (dest_mode == V4SImode && op_mode == V4SImode) + return CODE_FOR_ceq_v4si; + if (dest_mode == V4SImode && op_mode == V4SFmode) + return CODE_FOR_ceq_v4sf; + if (dest_mode == V2DImode && op_mode == V2DFmode) + return CODE_FOR_ceq_v2df; + break; + case GT: + if (dest_mode == V16QImode && op_mode == V16QImode) + return CODE_FOR_cgt_v16qi; + if (dest_mode == V8HImode && op_mode == V8HImode) + return CODE_FOR_cgt_v8hi; + if (dest_mode == V4SImode && op_mode == V4SImode) + return CODE_FOR_cgt_v4si; + if (dest_mode == V4SImode && op_mode == V4SFmode) + return CODE_FOR_cgt_v4sf; + if (dest_mode == V2DImode && op_mode == V2DFmode) + return CODE_FOR_cgt_v2df; + break; + case GTU: + if (dest_mode == V16QImode && op_mode == V16QImode) + return CODE_FOR_clgt_v16qi; + if (dest_mode == V8HImode && op_mode == V8HImode) + return CODE_FOR_clgt_v8hi; + if (dest_mode == V4SImode && op_mode == V4SImode) + return CODE_FOR_clgt_v4si; + break; + default: + break; + } + return -1; +} + +/* Emit vector compare for operands OP0 and OP1 using code RCODE. + DMODE is expected destination mode. This is a recursive function. */ + +static rtx +spu_emit_vector_compare (enum rtx_code rcode, + rtx op0, rtx op1, + enum machine_mode dmode) +{ + int vec_cmp_insn; + rtx mask; + enum machine_mode dest_mode; + enum machine_mode op_mode = GET_MODE (op1); + + gcc_assert (GET_MODE (op0) == GET_MODE (op1)); + + /* Floating point vector compare instructions uses destination V4SImode. + Double floating point vector compare instructions uses destination V2DImode. + Move destination to appropriate mode later. */ + if (dmode == V4SFmode) + dest_mode = V4SImode; + else if (dmode == V2DFmode) + dest_mode = V2DImode; + else + dest_mode = dmode; + + mask = gen_reg_rtx (dest_mode); + vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode); + + if (vec_cmp_insn == -1) + { + bool swap_operands = false; + bool try_again = false; + switch (rcode) + { + case LT: + rcode = GT; + swap_operands = true; + try_again = true; + break; + case LTU: + rcode = GTU; + swap_operands = true; + try_again = true; + break; + case NE: + /* Treat A != B as ~(A==B). */ + { + enum insn_code nor_code; + rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode); + nor_code = one_cmpl_optab->handlers[(int)dest_mode].insn_code; + gcc_assert (nor_code != CODE_FOR_nothing); + emit_insn (GEN_FCN (nor_code) (mask, eq_rtx)); + if (dmode != dest_mode) + { + rtx temp = gen_reg_rtx (dest_mode); + convert_move (temp, mask, 0); + return temp; + } + return mask; + } + break; + case GE: + case GEU: + case LE: + case LEU: + /* Try GT/GTU/LT/LTU OR EQ */ + { + rtx c_rtx, eq_rtx; + enum insn_code ior_code; + enum rtx_code new_code; + + switch (rcode) + { + case GE: new_code = GT; break; + case GEU: new_code = GTU; break; + case LE: new_code = LT; break; + case LEU: new_code = LTU; break; + default: + gcc_unreachable (); + } + + c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode); + eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode); + + ior_code = ior_optab->handlers[(int)dest_mode].insn_code; + gcc_assert (ior_code != CODE_FOR_nothing); + emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx)); + if (dmode != dest_mode) + { + rtx temp = gen_reg_rtx (dest_mode); + convert_move (temp, mask, 0); + return temp; + } + return mask; + } + break; + default: + gcc_unreachable (); + } + + /* You only get two chances. */ + if (try_again) + vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode); + + gcc_assert (vec_cmp_insn != -1); + + if (swap_operands) + { + rtx tmp; + tmp = op0; + op0 = op1; + op1 = tmp; + } + } + + emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1)); + if (dmode != dest_mode) + { + rtx temp = gen_reg_rtx (dest_mode); + convert_move (temp, mask, 0); + return temp; + } + return mask; +} + + +/* Emit vector conditional expression. + DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands. + CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */ + +int +spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2, + rtx cond, rtx cc_op0, rtx cc_op1) +{ + enum machine_mode dest_mode = GET_MODE (dest); + enum rtx_code rcode = GET_CODE (cond); + rtx mask; + + /* Get the vector mask for the given relational operations. */ + mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode); + + emit_insn(gen_selb (dest, op2, op1, mask)); + + return 1; +} + static rtx spu_force_reg (enum machine_mode mode, rtx op) { diff --git a/gcc/config/spu/spu.h b/gcc/config/spu/spu.h index b8af6b2..8de32f8 100644 --- a/gcc/config/spu/spu.h +++ b/gcc/config/spu/spu.h @@ -32,6 +32,23 @@ extern int target_flags; extern const char *spu_fixed_range_string; +/* Which processor to generate code or schedule for. */ +enum processor_type +{ + PROCESSOR_CELL, + PROCESSOR_CELLEDP +}; + +extern GTY(()) int spu_arch; +extern GTY(()) int spu_tune; + +/* Support for a compile-time default architecture and tuning. The rules are: + --with-arch is ignored if -march is specified. + --with-tune is ignored if -mtune is specified. */ +#define OPTION_DEFAULT_SPECS \ + {"arch", "%{!march=*:-march=%(VALUE)}" }, \ + {"tune", "%{!mtune=*:-mtune=%(VALUE)}" } + /* Default target_flags if no switches specified. */ #ifndef TARGET_DEFAULT #define TARGET_DEFAULT (MASK_ERROR_RELOC | MASK_SAFE_DMA | MASK_BRANCH_HINTS) @@ -605,7 +622,18 @@ targetm.resolve_overloaded_builtin = spu_resolve_overloaded_builtin; \ #define NO_IMPLICIT_EXTERN_C 1 #define HANDLE_PRAGMA_PACK_PUSH_POP 1 - + +/* Canonicalize a comparison from one we don't have to one we do have. */ +#define CANONICALIZE_COMPARISON(CODE,OP0,OP1) \ + do { \ + if (((CODE) == LE || (CODE) == LT || (CODE) == LEU || (CODE) == LTU)) \ + { \ + rtx tem = (OP0); \ + (OP0) = (OP1); \ + (OP1) = tem; \ + (CODE) = swap_condition (CODE); \ + } \ + } while (0) /* These are set by the cmp patterns and used while expanding conditional branches. */ diff --git a/gcc/config/spu/spu.md b/gcc/config/spu/spu.md index b4c59cb..0b339c6 100644 --- a/gcc/config/spu/spu.md +++ b/gcc/config/spu/spu.md @@ -29,6 +29,7 @@ (define_attr "length" "" (const_int 4)) +(define_attr "tune" "cell,celledp" (const (symbol_ref "spu_tune"))) ;; Processor type -- this attribute must exactly match the processor_type ;; enumeration in spu.h. @@ -59,9 +60,17 @@ ;; for 6 cycles and the rest of the operation pipelines for ;; 7 cycles. The simplest way to model this is to simply ignore ;; the 6 cyle stall. -(define_insn_reservation "FPD" 7 (eq_attr "type" "fpd") +(define_insn_reservation "FPD" 7 + (and (eq_attr "tune" "cell") + (eq_attr "type" "fpd")) "pipe0 + pipe1, fp, nothing*5") +;; Tune for CELLEDP, 9 cycles, dual-issuable, fully pipelined +(define_insn_reservation "FPD_CELLEDP" 9 + (and (eq_attr "tune" "celledp") + (eq_attr "type" "fpd")) + "pipe0 + fp, nothing*8") + (define_insn_reservation "LNOP" 1 (eq_attr "type" "lnop") "pipe1") @@ -144,6 +153,7 @@ (UNSPEC_WRCH 48) (UNSPEC_SPU_REALIGN_LOAD 49) (UNSPEC_SPU_MASK_FOR_LOAD 50) + (UNSPEC_DFTSV 51) ]) (include "predicates.md") @@ -192,6 +202,16 @@ (define_mode_macro VSF [SF V4SF]) (define_mode_macro VDF [DF V2DF]) +(define_mode_macro VCMP [V16QI + V8HI + V4SI + V4SF + V2DF]) + +(define_mode_macro VCMPU [V16QI + V8HI + V4SI]) + (define_mode_attr bh [(QI "b") (V16QI "b") (HI "h") (V8HI "h") (SI "") (V4SI "")]) @@ -200,9 +220,14 @@ (DF "d") (V2DF "d")]) (define_mode_attr d6 [(SF "6") (V4SF "6") (DF "d") (V2DF "d")]) -(define_mode_attr f2i [(SF "SI") (V4SF "V4SI") + +(define_mode_attr f2i [(SF "si") (V4SF "v4si") + (DF "di") (V2DF "v2di")]) +(define_mode_attr F2I [(SF "SI") (V4SF "V4SI") (DF "DI") (V2DF "V2DI")]) +(define_mode_attr DF2I [(DF "SI") (V2DF "V2DI")]) + (define_mode_attr umask [(HI "f") (V8HI "f") (SI "g") (V4SI "g")]) (define_mode_attr nmask [(HI "F") (V8HI "F") @@ -990,8 +1015,8 @@ (neg:VSF (match_operand:VSF 1 "spu_reg_operand" ""))) (use (match_dup 2))])] "" - "operands[2] = gen_reg_rtx (mode); - emit_move_insn (operands[2], spu_const (mode, -0x80000000ull));") + "operands[2] = gen_reg_rtx (mode); + emit_move_insn (operands[2], spu_const (mode, -0x80000000ull));") (define_expand "neg2" [(parallel @@ -999,22 +1024,22 @@ (neg:VDF (match_operand:VDF 1 "spu_reg_operand" ""))) (use (match_dup 2))])] "" - "operands[2] = gen_reg_rtx (mode); - emit_move_insn (operands[2], spu_const (mode, -0x8000000000000000ull));") + "operands[2] = gen_reg_rtx (mode); + emit_move_insn (operands[2], spu_const (mode, -0x8000000000000000ull));") (define_insn_and_split "_neg2" [(set (match_operand:VSDF 0 "spu_reg_operand" "=r") (neg:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r"))) - (use (match_operand: 2 "spu_reg_operand" "r"))] + (use (match_operand: 2 "spu_reg_operand" "r"))] "" "#" "" - [(set (match_dup: 3) - (xor: (match_dup: 4) - (match_dup: 2)))] + [(set (match_dup: 3) + (xor: (match_dup: 4) + (match_dup: 2)))] { - operands[3] = spu_gen_subreg (mode, operands[0]); - operands[4] = spu_gen_subreg (mode, operands[1]); + operands[3] = spu_gen_subreg (mode, operands[0]); + operands[4] = spu_gen_subreg (mode, operands[1]); }) @@ -1026,8 +1051,8 @@ (abs:VSF (match_operand:VSF 1 "spu_reg_operand" ""))) (use (match_dup 2))])] "" - "operands[2] = gen_reg_rtx (mode); - emit_move_insn (operands[2], spu_const (mode, 0x7fffffffull));") + "operands[2] = gen_reg_rtx (mode); + emit_move_insn (operands[2], spu_const (mode, 0x7fffffffull));") (define_expand "abs2" [(parallel @@ -1035,22 +1060,22 @@ (abs:VDF (match_operand:VDF 1 "spu_reg_operand" ""))) (use (match_dup 2))])] "" - "operands[2] = gen_reg_rtx (mode); - emit_move_insn (operands[2], spu_const (mode, 0x7fffffffffffffffull));") + "operands[2] = gen_reg_rtx (mode); + emit_move_insn (operands[2], spu_const (mode, 0x7fffffffffffffffull));") (define_insn_and_split "_abs2" [(set (match_operand:VSDF 0 "spu_reg_operand" "=r") (abs:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r"))) - (use (match_operand: 2 "spu_reg_operand" "r"))] + (use (match_operand: 2 "spu_reg_operand" "r"))] "" "#" "" - [(set (match_dup: 3) - (and: (match_dup: 4) - (match_dup: 2)))] + [(set (match_dup: 3) + (and: (match_dup: 4) + (match_dup: 2)))] { - operands[3] = spu_gen_subreg (mode, operands[0]); - operands[4] = spu_gen_subreg (mode, operands[1]); + operands[3] = spu_gen_subreg (mode, operands[0]); + operands[4] = spu_gen_subreg (mode, operands[1]); }) @@ -2493,27 +2518,173 @@ (set_attr "length" "12")]) (define_insn "ceq_" - [(set (match_operand: 0 "spu_reg_operand" "=r") - (eq: (match_operand:VSF 1 "spu_reg_operand" "r") + [(set (match_operand: 0 "spu_reg_operand" "=r") + (eq: (match_operand:VSF 1 "spu_reg_operand" "r") (match_operand:VSF 2 "spu_reg_operand" "r")))] "" "fceq\t%0,%1,%2") (define_insn "cmeq_" - [(set (match_operand: 0 "spu_reg_operand" "=r") - (eq: (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r")) + [(set (match_operand: 0 "spu_reg_operand" "=r") + (eq: (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r")) (abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))] "" "fcmeq\t%0,%1,%2") -(define_insn "ceq_vec" +;; These implementations of ceq_df and cgt_df do not correctly handle +;; NAN or INF. We will also get incorrect results when the result +;; of the double subtract is too small. +(define_expand "ceq_df" [(set (match_operand:SI 0 "spu_reg_operand" "=r") - (eq:SI (match_operand 1 "spu_reg_operand" "r") - (match_operand 2 "spu_reg_operand" "r")))] - "VECTOR_MODE_P(GET_MODE(operands[1])) - && GET_MODE(operands[1]) == GET_MODE(operands[2])" - "ceq\t%0,%1,%2\;gb\t%0,%0\;ceqi\t%0,%0,15" - [(set_attr "length" "12")]) + (eq:SI (match_operand:DF 1 "spu_reg_operand" "r") + (match_operand:DF 2 "const_zero_operand" "i")))] + "" +{ + if (flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL) + { + rtx s0_ti = gen_reg_rtx(TImode); + rtx s1_v4 = gen_reg_rtx(V4SImode); + rtx s0_v4 = spu_gen_subreg(V4SImode, s0_ti); + rtx to_ti = gen_reg_rtx(TImode); + rtx to_v4 = gen_reg_rtx(V4SImode); + rtx l_v4 = gen_reg_rtx(V4SImode); + emit_insn (gen_spu_convert (l_v4, operands[1])); + emit_insn (gen_movv4si(s1_v4, spu_const(V4SImode, -0x80000000ll))); + emit_insn (gen_ceq_v4si(s0_v4, l_v4, CONST0_RTX(V4SImode))); + emit_insn (gen_ceq_v4si(s1_v4, l_v4, s1_v4)); + emit_insn (gen_rotqby_ti(to_ti, s0_ti, GEN_INT(4))); + emit_insn (gen_spu_convert (to_v4, to_ti)); + emit_insn (gen_iorv4si3(s1_v4, s0_v4, s1_v4)); + emit_insn (gen_andv4si3(to_v4, to_v4, s1_v4)); + emit_insn (gen_spu_convert (operands[0], to_v4)); + DONE; + } +}) + +(define_insn "ceq__celledp" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (eq: (match_operand:VDF 1 "spu_reg_operand" "r") + (match_operand:VDF 2 "spu_reg_operand" "r")))] + "spu_arch == PROCESSOR_CELLEDP" + "dfceq\t%0,%1,%2" + [(set_attr "type" "fpd")]) + +(define_insn "cmeq__celledp" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (eq: (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r")) + (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))] + "spu_arch == PROCESSOR_CELLEDP" + "dfcmeq\t%0,%1,%2" + [(set_attr "type" "fpd")]) + +(define_expand "ceq_v2df" + [(set (match_operand:V2DI 0 "spu_reg_operand" "=r") + (eq:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r") + (match_operand:V2DF 2 "spu_reg_operand" "r")))] + "" +{ + if (spu_arch == PROCESSOR_CELL) + { + rtx ra = spu_gen_subreg (V4SImode, operands[1]); + rtx rb = spu_gen_subreg (V4SImode, operands[2]); + rtx temp = gen_reg_rtx (TImode); + rtx temp_v4si = spu_gen_subreg (V4SImode, temp); + rtx temp2 = gen_reg_rtx (V4SImode); + rtx biteq = gen_reg_rtx (V4SImode); + rtx ahi_inf = gen_reg_rtx (V4SImode); + rtx a_nan = gen_reg_rtx (V4SImode); + rtx a_abs = gen_reg_rtx (V4SImode); + rtx b_abs = gen_reg_rtx (V4SImode); + rtx iszero = gen_reg_rtx (V4SImode); + rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, + 0x7FFFFFFF, 0xFFFFFFFF); + rtx sign_mask = gen_reg_rtx (V4SImode); + rtx nan_mask = gen_reg_rtx (V4SImode); + rtx hihi_promote = gen_reg_rtx (TImode); + + emit_move_insn (sign_mask, pat); + pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, + 0x7FF00000, 0x0); + emit_move_insn (nan_mask, pat); + pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213, + 0x08090A0B, 0x18191A1B); + emit_move_insn (hihi_promote, pat); + + emit_insn (gen_ceq_v4si (biteq, ra, rb)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si)); + emit_insn (gen_andv4si3 (a_abs, ra, sign_mask)); + emit_insn (gen_andv4si3 (b_abs, rb, sign_mask)); + emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask)); + emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf)); + emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2)); + emit_insn (gen_iorv4si3 (temp2, a_abs, b_abs)); + emit_insn (gen_ceq_v4si (iszero, temp2, CONST0_RTX (V4SImode))); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si)); + emit_insn (gen_iorv4si3 (temp2, biteq, iszero)); + emit_insn (gen_andc_v4si (temp2, temp2, a_nan)); + emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote)); + DONE; + } +}) + +(define_expand "cmeq_v2df" + [(set (match_operand:V2DI 0 "spu_reg_operand" "=r") + (eq:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r")) + (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))] + "" +{ + if(spu_arch == PROCESSOR_CELL) + { + rtx ra = spu_gen_subreg (V4SImode, operands[1]); + rtx rb = spu_gen_subreg (V4SImode, operands[2]); + rtx temp = gen_reg_rtx (TImode); + rtx temp_v4si = spu_gen_subreg (V4SImode, temp); + rtx temp2 = gen_reg_rtx (V4SImode); + rtx biteq = gen_reg_rtx (V4SImode); + rtx ahi_inf = gen_reg_rtx (V4SImode); + rtx a_nan = gen_reg_rtx (V4SImode); + rtx a_abs = gen_reg_rtx (V4SImode); + rtx b_abs = gen_reg_rtx (V4SImode); + + rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, + 0x7FFFFFFF, 0xFFFFFFFF); + rtx sign_mask = gen_reg_rtx (V4SImode); + rtx nan_mask = gen_reg_rtx (V4SImode); + rtx hihi_promote = gen_reg_rtx (TImode); + + emit_move_insn (sign_mask, pat); + + pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, + 0x7FF00000, 0x0); + emit_move_insn (nan_mask, pat); + pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213, + 0x08090A0B, 0x18191A1B); + emit_move_insn (hihi_promote, pat); + + emit_insn (gen_andv4si3 (a_abs, ra, sign_mask)); + emit_insn (gen_andv4si3 (b_abs, rb, sign_mask)); + emit_insn (gen_ceq_v4si (biteq, a_abs, b_abs)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si)); + emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask)); + emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf)); + emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2)); + emit_insn (gen_andc_v4si (temp2, biteq, a_nan)); + emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote)); + DONE; + } +}) ;; cgt @@ -2584,19 +2755,215 @@ selb\t%0,%5,%0,%3" (set_attr "length" "36")]) (define_insn "cgt_" - [(set (match_operand: 0 "spu_reg_operand" "=r") - (gt: (match_operand:VSF 1 "spu_reg_operand" "r") + [(set (match_operand: 0 "spu_reg_operand" "=r") + (gt: (match_operand:VSF 1 "spu_reg_operand" "r") (match_operand:VSF 2 "spu_reg_operand" "r")))] "" "fcgt\t%0,%1,%2") (define_insn "cmgt_" - [(set (match_operand: 0 "spu_reg_operand" "=r") - (gt: (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r")) + [(set (match_operand: 0 "spu_reg_operand" "=r") + (gt: (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r")) (abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))] "" "fcmgt\t%0,%1,%2") +(define_expand "cgt_df" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (gt:SI (match_operand:DF 1 "spu_reg_operand" "r") + (match_operand:DF 2 "const_zero_operand" "i")))] + "" +{ + if (flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL) + { + rtx s0_ti = gen_reg_rtx(TImode); + rtx s1_v4 = gen_reg_rtx(V4SImode); + rtx s0_v4 = spu_gen_subreg(V4SImode, s0_ti); + rtx to_ti = gen_reg_rtx(TImode); + rtx to_v4 = gen_reg_rtx(V4SImode); + rtx l_v4 = gen_reg_rtx(V4SImode); + emit_insn (gen_spu_convert(l_v4, operands[1])); + emit_insn (gen_ceq_v4si(s0_v4, l_v4, const0_rtx)); + emit_insn (gen_cgt_v4si(s1_v4, l_v4, const0_rtx)); + emit_insn (gen_rotqby_ti(to_ti, s0_ti, GEN_INT(4))); + emit_insn (gen_spu_convert(to_v4, to_ti)); + emit_insn (gen_andc_v4si(to_v4, s0_v4, to_v4)); + emit_insn (gen_iorv4si3(to_v4, to_v4, s1_v4)); + emit_insn (gen_spu_convert(operands[0], to_v4)); + DONE; + } +}) + +(define_insn "cgt__celledp" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (gt: (match_operand:VDF 1 "spu_reg_operand" "r") + (match_operand:VDF 2 "spu_reg_operand" "r")))] + "spu_arch == PROCESSOR_CELLEDP" + "dfcgt\t%0,%1,%2" + [(set_attr "type" "fpd")]) + +(define_insn "cmgt__celledp" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (gt: (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r")) + (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))] + "spu_arch == PROCESSOR_CELLEDP" + "dfcmgt\t%0,%1,%2" + [(set_attr "type" "fpd")]) + +(define_expand "cgt_v2df" + [(set (match_operand:V2DI 0 "spu_reg_operand" "=r") + (gt:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r") + (match_operand:V2DF 2 "spu_reg_operand" "r")))] + "" +{ + if(spu_arch == PROCESSOR_CELL) + { + rtx ra = spu_gen_subreg (V4SImode, operands[1]); + rtx rb = spu_gen_subreg (V4SImode, operands[2]); + rtx zero = gen_reg_rtx (V4SImode); + rtx temp = gen_reg_rtx (TImode); + rtx temp_v4si = spu_gen_subreg (V4SImode, temp); + rtx temp2 = gen_reg_rtx (V4SImode); + rtx hi_inf = gen_reg_rtx (V4SImode); + rtx a_nan = gen_reg_rtx (V4SImode); + rtx b_nan = gen_reg_rtx (V4SImode); + rtx a_abs = gen_reg_rtx (V4SImode); + rtx b_abs = gen_reg_rtx (V4SImode); + rtx asel = gen_reg_rtx (V4SImode); + rtx bsel = gen_reg_rtx (V4SImode); + rtx abor = gen_reg_rtx (V4SImode); + rtx bbor = gen_reg_rtx (V4SImode); + rtx gt_hi = gen_reg_rtx (V4SImode); + rtx gt_lo = gen_reg_rtx (V4SImode); + rtx sign_mask = gen_reg_rtx (V4SImode); + rtx nan_mask = gen_reg_rtx (V4SImode); + rtx hi_promote = gen_reg_rtx (TImode); + rtx borrow_shuffle = gen_reg_rtx (TImode); + rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, + 0x7FFFFFFF, 0xFFFFFFFF); + emit_move_insn (sign_mask, pat); + pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, + 0x7FF00000, 0x0); + emit_move_insn (nan_mask, pat); + pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203, + 0x08090A0B, 0x08090A0B); + emit_move_insn (hi_promote, pat); + pat = spu_const_from_ints (TImode, 0x04050607, 0xC0C0C0C0, + 0x0C0D0E0F, 0xC0C0C0C0); + emit_move_insn (borrow_shuffle, pat); + + emit_insn (gen_andv4si3 (a_nan, ra, sign_mask)); + emit_insn (gen_ceq_v4si (hi_inf, a_nan, nan_mask)); + emit_insn (gen_clgt_v4si (a_nan, a_nan, nan_mask)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf)); + emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2)); + emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote)); + emit_insn (gen_andv4si3 (b_nan, rb, sign_mask)); + emit_insn (gen_ceq_v4si (hi_inf, b_nan, nan_mask)); + emit_insn (gen_clgt_v4si (b_nan, b_nan, nan_mask)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf)); + emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2)); + emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote)); + emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan)); + emit_move_insn (zero, CONST0_RTX (V4SImode)); + emit_insn (gen_ashrv4si3 (asel, ra, spu_const (V4SImode, 31))); + emit_insn (gen_shufb (asel, asel, asel, hi_promote)); + emit_insn (gen_andv4si3 (a_abs, ra, sign_mask)); + emit_insn (gen_bg_v4si (abor, zero, a_abs)); + emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle)); + emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor)); + emit_insn (gen_selb (abor, a_abs, abor, asel)); + emit_insn (gen_ashrv4si3 (bsel, rb, spu_const (V4SImode, 31))); + emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote)); + emit_insn (gen_andv4si3 (b_abs, rb, sign_mask)); + emit_insn (gen_bg_v4si (bbor, zero, b_abs)); + emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle)); + emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor)); + emit_insn (gen_selb (bbor, b_abs, bbor, bsel)); + emit_insn (gen_cgt_v4si (gt_hi, abor, bbor)); + emit_insn (gen_clgt_v4si (gt_lo, abor, bbor)); + emit_insn (gen_ceq_v4si (temp2, abor, bbor)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si)); + emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2)); + + emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote)); + emit_insn (gen_andc_v4si (temp2, temp2, a_nan)); + emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2)); + DONE; + } +}) + +(define_expand "cmgt_v2df" + [(set (match_operand:V2DI 0 "spu_reg_operand" "=r") + (gt:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r")) + (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))] + "" +{ + if(spu_arch == PROCESSOR_CELL) + { + rtx ra = spu_gen_subreg (V4SImode, operands[1]); + rtx rb = spu_gen_subreg (V4SImode, operands[2]); + rtx temp = gen_reg_rtx (TImode); + rtx temp_v4si = spu_gen_subreg (V4SImode, temp); + rtx temp2 = gen_reg_rtx (V4SImode); + rtx hi_inf = gen_reg_rtx (V4SImode); + rtx a_nan = gen_reg_rtx (V4SImode); + rtx b_nan = gen_reg_rtx (V4SImode); + rtx a_abs = gen_reg_rtx (V4SImode); + rtx b_abs = gen_reg_rtx (V4SImode); + rtx gt_hi = gen_reg_rtx (V4SImode); + rtx gt_lo = gen_reg_rtx (V4SImode); + rtx sign_mask = gen_reg_rtx (V4SImode); + rtx nan_mask = gen_reg_rtx (V4SImode); + rtx hi_promote = gen_reg_rtx (TImode); + rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, + 0x7FFFFFFF, 0xFFFFFFFF); + emit_move_insn (sign_mask, pat); + pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, + 0x7FF00000, 0x0); + emit_move_insn (nan_mask, pat); + pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203, + 0x08090A0B, 0x08090A0B); + emit_move_insn (hi_promote, pat); + + emit_insn (gen_andv4si3 (a_abs, ra, sign_mask)); + emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask)); + emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf)); + emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2)); + emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote)); + emit_insn (gen_andv4si3 (b_abs, rb, sign_mask)); + emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask)); + emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf)); + emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2)); + emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote)); + emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan)); + + emit_insn (gen_clgt_v4si (gt_hi, a_abs, b_abs)); + emit_insn (gen_clgt_v4si (gt_lo, a_abs, b_abs)); + emit_insn (gen_ceq_v4si (temp2, a_abs, b_abs)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si)); + emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2)); + emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote)); + emit_insn (gen_andc_v4si (temp2, temp2, a_nan)); + emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2)); + DONE; + } +}) + ;; clgt @@ -2656,6 +3023,150 @@ selb\t%0,%4,%0,%3" (set_attr "length" "32")]) +;; dftsv +(define_insn "dftsv_celledp" + [(set (match_operand:V2DI 0 "spu_reg_operand" "=r") + (unspec [(match_operand:V2DF 1 "spu_reg_operand" "r") + (match_operand:SI 2 "const_int_operand" "i")] UNSPEC_DFTSV))] + "spu_arch == PROCESSOR_CELLEDP" + "dftsv\t%0,%1,%2" + [(set_attr "type" "fpd")]) + +(define_expand "dftsv" + [(set (match_operand:V2DI 0 "spu_reg_operand" "=r") + (unspec [(match_operand:V2DF 1 "spu_reg_operand" "r") + (match_operand:SI 2 "const_int_operand" "i")] UNSPEC_DFTSV))] + "" +{ + if(spu_arch == PROCESSOR_CELL) + { + rtx result = gen_reg_rtx (V4SImode); + emit_move_insn (result, CONST0_RTX (V4SImode)); + + if (INTVAL (operands[2])) + { + rtx ra = spu_gen_subreg (V4SImode, operands[1]); + rtx abs = gen_reg_rtx (V4SImode); + rtx sign = gen_reg_rtx (V4SImode); + rtx temp = gen_reg_rtx (TImode); + rtx temp_v4si = spu_gen_subreg (V4SImode, temp); + rtx temp2 = gen_reg_rtx (V4SImode); + rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, + 0x7FFFFFFF, 0xFFFFFFFF); + rtx sign_mask = gen_reg_rtx (V4SImode); + rtx hi_promote = gen_reg_rtx (TImode); + emit_move_insn (sign_mask, pat); + pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203, + 0x08090A0B, 0x08090A0B); + emit_move_insn (hi_promote, pat); + + emit_insn (gen_ashrv4si3 (sign, ra, spu_const (V4SImode, 31))); + emit_insn (gen_shufb (sign, sign, sign, hi_promote)); + emit_insn (gen_andv4si3 (abs, ra, sign_mask)); + + /* NaN or +inf or -inf */ + if (INTVAL (operands[2]) & 0x70) + { + rtx nan_mask = gen_reg_rtx (V4SImode); + rtx isinf = gen_reg_rtx (V4SImode); + pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, + 0x7FF00000, 0x0); + emit_move_insn (nan_mask, pat); + emit_insn (gen_ceq_v4si (isinf, abs, nan_mask)); + + /* NaN */ + if (INTVAL (operands[2]) & 0x40) + { + rtx isnan = gen_reg_rtx (V4SImode); + emit_insn (gen_clgt_v4si (isnan, abs, nan_mask)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isnan), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp_v4si, isinf)); + emit_insn (gen_iorv4si3 (isnan, isnan, temp2)); + emit_insn (gen_shufb (isnan, isnan, isnan, hi_promote)); + emit_insn (gen_iorv4si3 (result, result, isnan)); + } + /* +inf or -inf */ + if (INTVAL (operands[2]) & 0x30) + { + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isinf), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (isinf, isinf, temp_v4si)); + emit_insn (gen_shufb (isinf, isinf, isinf, hi_promote)); + + /* +inf */ + if (INTVAL (operands[2]) & 0x20) + { + emit_insn (gen_andc_v4si (temp2, isinf, sign)); + emit_insn (gen_iorv4si3 (result, result, temp2)); + } + /* -inf */ + if (INTVAL (operands[2]) & 0x10) + { + emit_insn (gen_andv4si3 (temp2, isinf, sign)); + emit_insn (gen_iorv4si3 (result, result, temp2)); + } + } + } + + /* 0 or denorm */ + if (INTVAL (operands[2]) & 0xF) + { + rtx iszero = gen_reg_rtx (V4SImode); + emit_insn (gen_ceq_v4si (iszero, abs, CONST0_RTX (V4SImode))); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si)); + + /* denorm */ + if (INTVAL (operands[2]) & 0x3) + { + rtx isdenorm = gen_reg_rtx (V4SImode); + rtx denorm_mask = gen_reg_rtx (V4SImode); + emit_move_insn (denorm_mask, spu_const (V4SImode, 0xFFFFF)); + emit_insn (gen_clgt_v4si (isdenorm, abs, denorm_mask)); + emit_insn (gen_nor_v4si (isdenorm, isdenorm, iszero)); + emit_insn (gen_shufb (isdenorm, isdenorm, + isdenorm, hi_promote)); + /* +denorm */ + if (INTVAL (operands[2]) & 0x2) + { + emit_insn (gen_andc_v4si (temp2, isdenorm, sign)); + emit_insn (gen_iorv4si3 (result, result, temp2)); + } + /* -denorm */ + if (INTVAL (operands[2]) & 0x1) + { + emit_insn (gen_andv4si3 (temp2, isdenorm, sign)); + emit_insn (gen_iorv4si3 (result, result, temp2)); + } + } + + /* 0 */ + if (INTVAL (operands[2]) & 0xC) + { + emit_insn (gen_shufb (iszero, iszero, iszero, hi_promote)); + /* +0 */ + if (INTVAL (operands[2]) & 0x8) + { + emit_insn (gen_andc_v4si (temp2, iszero, sign)); + emit_insn (gen_iorv4si3 (result, result, temp2)); + } + /* -0 */ + if (INTVAL (operands[2]) & 0x4) + { + emit_insn (gen_andv4si3 (temp2, iszero, sign)); + emit_insn (gen_iorv4si3 (result, result, temp2)); + } + } + } + } + emit_move_insn (operands[0], spu_gen_subreg (V2DImode, result)); + DONE; + } +}) + + ;; branches (define_insn "" @@ -2747,6 +3258,53 @@ selb\t%0,%4,%0,%3" DONE; }) +(define_expand "cmpdf" + [(set (cc0) + (compare (match_operand:DF 0 "register_operand" "") + (match_operand:DF 1 "register_operand" "")))] + "(flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL) + || spu_arch == PROCESSOR_CELLEDP " + "{ + spu_compare_op0 = operands[0]; + spu_compare_op1 = operands[1]; + DONE; +}") + +;; vector conditional compare patterns +(define_expand "vcond" + [(set (match_operand:VCMP 0 "spu_reg_operand" "=r") + (if_then_else:VCMP + (match_operator 3 "comparison_operator" + [(match_operand:VCMP 4 "spu_reg_operand" "r") + (match_operand:VCMP 5 "spu_reg_operand" "r")]) + (match_operand:VCMP 1 "spu_reg_operand" "r") + (match_operand:VCMP 2 "spu_reg_operand" "r")))] + "" + { + if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2], + operands[3], operands[4], operands[5])) + DONE; + else + FAIL; + }) + +(define_expand "vcondu" + [(set (match_operand:VCMPU 0 "spu_reg_operand" "=r") + (if_then_else:VCMPU + (match_operator 3 "comparison_operator" + [(match_operand:VCMPU 4 "spu_reg_operand" "r") + (match_operand:VCMPU 5 "spu_reg_operand" "r")]) + (match_operand:VCMPU 1 "spu_reg_operand" "r") + (match_operand:VCMPU 2 "spu_reg_operand" "r")))] + "" + { + if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2], + operands[3], operands[4], operands[5])) + DONE; + else + FAIL; + }) + ;; branch on condition @@ -3376,7 +3934,7 @@ selb\t%0,%4,%0,%3" (define_expand "sminv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=r") - (smax:V4SF (match_operand:V4SF 1 "register_operand" "r") + (smin:V4SF (match_operand:V4SF 1 "register_operand" "r") (match_operand:V4SF 2 "register_operand" "r")))] "" " @@ -3388,6 +3946,34 @@ selb\t%0,%4,%0,%3" DONE; }") +(define_expand "smaxv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=r") + (smax:V2DF (match_operand:V2DF 1 "register_operand" "r") + (match_operand:V2DF 2 "register_operand" "r")))] + "" + " +{ + rtx mask = gen_reg_rtx (V2DImode); + emit_insn (gen_cgt_v2df (mask, operands[1], operands[2])); + emit_insn (gen_selb (operands[0], operands[2], operands[1], + spu_gen_subreg (V4SImode, mask))); + DONE; +}") + +(define_expand "sminv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=r") + (smin:V2DF (match_operand:V2DF 1 "register_operand" "r") + (match_operand:V2DF 2 "register_operand" "r")))] + "" + " +{ + rtx mask = gen_reg_rtx (V2DImode); + emit_insn (gen_cgt_v2df (mask, operands[1], operands[2])); + emit_insn (gen_selb (operands[0], operands[1], operands[2], + spu_gen_subreg (V4SImode, mask))); + DONE; +}") + (define_expand "vec_widen_umult_hi_v8hi" [(set (match_operand:V4SI 0 "register_operand" "=r") (mult:V4SI diff --git a/gcc/config/spu/spu.opt b/gcc/config/spu/spu.opt index 6c3ab59..e8c11d1 100644 --- a/gcc/config/spu/spu.opt +++ b/gcc/config/spu/spu.opt @@ -55,3 +55,11 @@ Generate code for 32 bit addressing mfixed-range= Target RejectNegative Joined Var(spu_fixed_range_string) Specify range of registers to make fixed + +march= +Target RejectNegative Joined Var(spu_arch_string) +Generate code for given CPU + +mtune= +Target RejectNegative Joined Var(spu_tune_string) +Schedule code for given CPU diff --git a/gcc/config/spu/spu_internals.h b/gcc/config/spu/spu_internals.h index ecc8dc5..fb42c87 100644 --- a/gcc/config/spu/spu_internals.h +++ b/gcc/config/spu/spu_internals.h @@ -233,6 +233,15 @@ #define si_rchcnt(imm) __builtin_si_rchcnt(imm) #define si_wrch(imm,ra) __builtin_si_wrch(imm,ra) +/* celledp only instructions */ +#ifdef __SPU_EDP__ +#define si_dfceq(ra,rb) __builtin_si_dfceq(ra,rb) +#define si_dfcmeq(ra,rb) __builtin_si_dfcmeq(ra,rb) +#define si_dfcgt(ra,rb) __builtin_si_dfcgt(ra,rb) +#define si_dfcmgt(ra,rb) __builtin_si_dfcmgt(ra,rb) +#define si_dftsv(ra,imm) __builtin_si_dftsv(ra,imm) +#endif /* __SPU_EDP__ */ + #define si_from_char(scalar) __builtin_si_from_char(scalar) #define si_from_uchar(scalar) __builtin_si_from_uchar(scalar) #define si_from_short(scalar) __builtin_si_from_short(scalar) @@ -295,6 +304,7 @@ #define spu_cmpabsgt(ra,rb) __builtin_spu_cmpabsgt(ra,rb) #define spu_cmpeq(ra,rb) __builtin_spu_cmpeq(ra,rb) #define spu_cmpgt(ra,rb) __builtin_spu_cmpgt(ra,rb) +#define spu_testsv(ra,imm) __builtin_spu_testsv(ra,imm) #define spu_hcmpeq(ra,rb) __builtin_spu_hcmpeq(ra,rb) #define spu_hcmpgt(ra,rb) __builtin_spu_hcmpgt(ra,rb) #define spu_cntb(ra) __builtin_spu_cntb(ra) diff --git a/gcc/config/spu/spu_intrinsics.h b/gcc/config/spu/spu_intrinsics.h index ca91927..faaf8a6 100644 --- a/gcc/config/spu/spu_intrinsics.h +++ b/gcc/config/spu/spu_intrinsics.h @@ -70,6 +70,16 @@ #define MFC_WrListStallAck 26 #define MFC_RdAtomicStat 27 +/* Bit flag mnemonics for test special value. + */ +#define SPU_SV_NEG_DENORM 0x01 /* negative denormalized number */ +#define SPU_SV_POS_DENORM 0x02 /* positive denormalized number */ +#define SPU_SV_NEG_ZERO 0x04 /* negative zero */ +#define SPU_SV_POS_ZERO 0x08 /* positive zero */ +#define SPU_SV_NEG_INFINITY 0x10 /* negative infinity */ +#define SPU_SV_POS_INFINITY 0x20 /* positive infinity */ +#define SPU_SV_NAN 0x40 /* not a number */ + #include #endif /* _SPU_INTRINSICS_H */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 469fe0f..8a309c6 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,16 @@ +2007-07-13 Sa Liu + + * gcc.dg/vect/fast-math-vect-reduc-7.c: Switch on test + for V2DFmode vector conditional expression. + * gcc.target/spu/dfcmeq.c: New. Test combination of abs + and dfceq patterns. + * gcc.target/spu/dfcmgt.c: New. Test combination of abs + and dfcgt patterns. + * gcc.target/spu/intrinsics-2.c: New. Test intrinsics for + V2DFmode comparison and test special values. + * lib/target-supports.exp: Switch on test for V2DFmode + vector conditional expression. + 2007-07-13 Richard Guenther PR tree-optimization/32721 diff --git a/gcc/testsuite/gcc.dg/vect/fast-math-vect-reduc-7.c b/gcc/testsuite/gcc.dg/vect/fast-math-vect-reduc-7.c index c88a894..b25e114 100644 --- a/gcc/testsuite/gcc.dg/vect/fast-math-vect-reduc-7.c +++ b/gcc/testsuite/gcc.dg/vect/fast-math-vect-reduc-7.c @@ -50,6 +50,5 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail vect_no_compare_double } } } */ -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_no_compare_double } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 028cb34..cdcb456 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -1659,7 +1659,7 @@ proc check_effective_target_vect_double { } { return $et_vect_double_saved } -# Return 0 if the target supports hardware comparison of vectors of double, 0 otherwise. +# Return 1 if the target supports hardware comparison of vectors of double, 0 otherwise. # # This won't change for different subtargets so cache the result. @@ -1670,9 +1670,6 @@ proc check_effective_target_vect_no_compare_double { } { verbose "check_effective_target_vect_no_compare_double: using cached result" 2 } else { set et_vect_no_compare_double_saved 0 - if { [istarget spu-*-*] } { - set et_vect_no_compare_double_saved 1 - } } verbose "check_effective_target_vect_no_compare_double: returning $et_vect_no_compare_double_saved" 2 @@ -2025,6 +2022,7 @@ proc check_effective_target_vect_condition { } { if { [istarget powerpc*-*-*] || [istarget ia64-*-*] || [istarget i?86-*-*] + || [istarget spu-*-*] || [istarget x86_64-*-*] } { set et_vect_cond_saved 1 }