From: Daniel Berlin Date: Wed, 5 Dec 2001 18:00:54 +0000 (+0000) Subject: rs6000.h (enum rs6000_builtins): Add more altivec builtins. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2212663f7002e0459021fec3ef226c07b10c2c58;p=platform%2Fupstream%2Fgcc.git rs6000.h (enum rs6000_builtins): Add more altivec builtins. 2001-12-05 Daniel Berlin * config/rs6000/rs6000.h (enum rs6000_builtins): Add more altivec builtins. * config/rs6000/rs6000.md: Modeling of 7450 altivec changed to better reflect reality (change from Apple's tree). Add more of altivec instructions. Add ftruncv4sf2 pattern. Remove more unspecs (vector merge instructions, etc). * config/rs6000/rs6000.c (bdesc_3arg): New, for 3 argument altivec builtins. (bdesc_1arg): New, for 1 argument altivec builtins. (altivec_expand_builtin): Handle unary and ternary ops. (altivec_init_builtins): Ditto. From-SVN: r47681 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ef9ab80..1de656a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +2001-12-05 Daniel Berlin + + * config/rs6000/rs6000.h (enum rs6000_builtins): Add more altivec + builtins. + + * config/rs6000/rs6000.md: Modeling of 7450 altivec changed to + better reflect reality (change from Apple's tree). + Add more of altivec instructions. + Add ftruncv4sf2 pattern. + Remove more unspecs (vector merge instructions, etc). + + * config/rs6000/rs6000.c (bdesc_3arg): New, for 3 argument altivec + builtins. + (bdesc_1arg): New, for 1 argument altivec builtins. + (altivec_expand_builtin): Handle unary and ternary ops. + (altivec_init_builtins): Ditto. + Wed Dec 5 09:33:39 2001 Richard Kenner * config/alpha/vms.h (ADA_LONG_TYPE_SIZE): New macro. diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 58b30bd..bbc0294 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -161,8 +161,9 @@ static void rs6000_init_builtins PARAMS ((void)); static void altivec_init_builtins PARAMS ((void)); static rtx rs6000_expand_builtin PARAMS ((tree, rtx, rtx, enum machine_mode, int)); static rtx altivec_expand_builtin PARAMS ((tree, rtx)); +static rtx altivec_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx)); static rtx altivec_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx)); - +static rtx altivec_expand_ternop_builtin PARAMS ((enum insn_code, tree, rtx)); static void rs6000_parse_abi_options PARAMS ((void)); static int first_altivec_reg_to_save PARAMS ((void)); static unsigned int compute_vrsave_mask PARAMS ((void)); @@ -3001,8 +3002,27 @@ struct builtin_description const char *const name; const enum rs6000_builtins code; }; +/* Simple ternary operations: VECd = foo (VECa, VECb, VECc) */ +static const struct builtin_description bdesc_3arg[] = + { + { MASK_ALTIVEC, CODE_FOR_altivec_vmaddfp, "__builtin_altivec_vmaddfp", ALTIVEC_BUILTIN_VMADDFP }, + { MASK_ALTIVEC, CODE_FOR_altivec_vmhaddshs, "__builtin_altivec_vmhaddshs", ALTIVEC_BUILTIN_VMHADDSHS }, + { MASK_ALTIVEC, CODE_FOR_altivec_vmhraddshs, "__builtin_altivec_vmhraddshs", ALTIVEC_BUILTIN_VMHRADDSHS }, + { MASK_ALTIVEC, CODE_FOR_altivec_vmladduhm, "__builtin_altivec_vmladduhm", ALTIVEC_BUILTIN_VMLADDUHM}, + { MASK_ALTIVEC, CODE_FOR_altivec_vmsumubm, "__builtin_altivec_vmsumubm", ALTIVEC_BUILTIN_VMSUMUBM }, + { MASK_ALTIVEC, CODE_FOR_altivec_vmsummbm, "__builtin_altivec_vmsummbm", ALTIVEC_BUILTIN_VMSUMMBM }, + { MASK_ALTIVEC, CODE_FOR_altivec_vmsumuhm, "__builtin_altivec_vmsumuhm", ALTIVEC_BUILTIN_VMSUMUHM }, + { MASK_ALTIVEC, CODE_FOR_altivec_vmsumshm, "__builtin_altivec_vmsumshm", ALTIVEC_BUILTIN_VMSUMSHM }, + { MASK_ALTIVEC, CODE_FOR_altivec_vmsumuhs, "__builtin_altivec_vmsumuhs", ALTIVEC_BUILTIN_VMSUMUHS }, + { MASK_ALTIVEC, CODE_FOR_altivec_vmsumshs, "__builtin_altivec_vmsumshs", ALTIVEC_BUILTIN_VMSUMSHS }, + { MASK_ALTIVEC, CODE_FOR_altivec_vnmsubfp, "__builtin_altivec_vnmsubfp", ALTIVEC_BUILTIN_VNMSUBFP }, + { MASK_ALTIVEC, CODE_FOR_altivec_vperm_4sf, "__builtin_altivec_vperm_4sf", ALTIVEC_BUILTIN_VPERM_4SF }, + { MASK_ALTIVEC, CODE_FOR_altivec_vperm_4si, "__builtin_altivec_vperm_4si", ALTIVEC_BUILTIN_VPERM_4SI }, + { MASK_ALTIVEC, CODE_FOR_altivec_vperm_8hi, "__builtin_altivec_vperm_8hi", ALTIVEC_BUILTIN_VPERM_8HI }, + { MASK_ALTIVEC, CODE_FOR_altivec_vperm_16qi, "__builtin_altivec_vperm_16qi", ALTIVEC_BUILTIN_VPERM_16QI }, + }; -/* Simple binary operatiors: VECc = foo (VECa, VECb). */ +/* Simple binary operations: VECc = foo (VECa, VECb). */ static const struct builtin_description bdesc_2arg[] = { { MASK_ALTIVEC, CODE_FOR_addv16qi3, "__builtin_altivec_vaddubm", ALTIVEC_BUILTIN_VADDUBM }, @@ -3086,6 +3106,9 @@ static const struct builtin_description bdesc_2arg[] = { MASK_ALTIVEC, CODE_FOR_altivec_vslw, "__builtin_altivec_vslw", ALTIVEC_BUILTIN_VSLW }, { MASK_ALTIVEC, CODE_FOR_altivec_vsl, "__builtin_altivec_vsl", ALTIVEC_BUILTIN_VSL }, { MASK_ALTIVEC, CODE_FOR_altivec_vslo, "__builtin_altivec_vslo", ALTIVEC_BUILTIN_VSLO }, + { MASK_ALTIVEC, CODE_FOR_altivec_vspltb, "__builtin_altivec_vspltb", ALTIVEC_BUILTIN_VSPLTB }, + { MASK_ALTIVEC, CODE_FOR_altivec_vsplth, "__builtin_altivec_vsplth", ALTIVEC_BUILTIN_VSPLTH }, + { MASK_ALTIVEC, CODE_FOR_altivec_vspltw, "__builtin_altivec_vspltw", ALTIVEC_BUILTIN_VSPLTW }, { MASK_ALTIVEC, CODE_FOR_altivec_vsrb, "__builtin_altivec_vsrb", ALTIVEC_BUILTIN_VSRB }, { MASK_ALTIVEC, CODE_FOR_altivec_vsrh, "__builtin_altivec_vsrh", ALTIVEC_BUILTIN_VSRH }, { MASK_ALTIVEC, CODE_FOR_altivec_vsrw, "__builtin_altivec_vsrw", ALTIVEC_BUILTIN_VSRW }, @@ -3112,7 +3135,41 @@ static const struct builtin_description bdesc_2arg[] = { MASK_ALTIVEC, CODE_FOR_altivec_vsumsws, "__builtin_altivec_vsumsws", ALTIVEC_BUILTIN_VSUMSWS }, { MASK_ALTIVEC, CODE_FOR_xorv4si3, "__builtin_altivec_vxor", ALTIVEC_BUILTIN_VXOR }, }; +/* Simple unary operations: VECb = foo (unsigned literal). */ +static const struct builtin_description bdesc_1arg[] = +{ + { MASK_ALTIVEC, CODE_FOR_altivec_vspltisb, "__builtin_altivec_vspltisb", ALTIVEC_BUILTIN_VSPLTISB }, + { MASK_ALTIVEC, CODE_FOR_altivec_vspltish, "__builtin_altivec_vspltish", ALTIVEC_BUILTIN_VSPLTISH }, + { MASK_ALTIVEC, CODE_FOR_altivec_vspltisw, "__builtin_altivec_vspltisw", ALTIVEC_BUILTIN_VSPLTISW }, +}; + +static rtx +altivec_expand_unop_builtin (icode, arglist, target) + enum insn_code icode; + tree arglist; + rtx target; +{ + rtx pat; + tree arg0 = TREE_VALUE (arglist); + rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (target, op0); + if (! pat) + return 0; + emit_insn (pat); + + return target; +} static rtx altivec_expand_binop_builtin (icode, arglist, target) enum insn_code icode; @@ -3145,7 +3202,43 @@ altivec_expand_binop_builtin (icode, arglist, target) return target; } +static rtx +altivec_expand_ternop_builtin (icode, arglist, target) + enum insn_code icode; + tree arglist; + rtx target; +{ + rtx pat; + tree arg0 = TREE_VALUE (arglist); + tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + rtx op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + enum machine_mode mode1 = insn_data[icode].operand[2].mode; + enum machine_mode mode2 = insn_data[icode].operand[3].mode; + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) + op2 = copy_to_mode_reg (mode2, op2); + pat = GEN_FCN (icode) (target, op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + + return target; +} static rtx altivec_expand_builtin (exp, target) tree exp; @@ -3321,6 +3414,11 @@ altivec_expand_builtin (exp, target) emit_insn (pat); return NULL_RTX; } + /* Handle simple unary operations. */ + d = (struct builtin_description *) bdesc_1arg; + for (i = 0; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++) + if (d->code == fcode) + return altivec_expand_unop_builtin (d->icode, arglist, target); /* Handle simple binary operations. */ d = (struct builtin_description *) bdesc_2arg; @@ -3328,6 +3426,12 @@ altivec_expand_builtin (exp, target) if (d->code == fcode) return altivec_expand_binop_builtin (d->icode, arglist, target); + /* Handle simple ternary operations. */ + d = (struct builtin_description *) bdesc_3arg; + for (i = 0; i < sizeof (bdesc_3arg) / sizeof *d; i++, d++) + if (d->code == fcode) + return altivec_expand_ternop_builtin (d->icode, arglist, target); + abort (); return NULL_RTX; } @@ -3371,6 +3475,49 @@ altivec_init_builtins (void) tree pshort_type_node = build_pointer_type (short_integer_type_node); tree pchar_type_node = build_pointer_type (char_type_node); tree pfloat_type_node = build_pointer_type (float_type_node); + tree v4sf_ftype_v4sf_v4sf_v16qi + = build_function_type (V4SF_type_node, + tree_cons (NULL_TREE, V4SF_type_node, + tree_cons (NULL_TREE, V4SF_type_node, + tree_cons (NULL_TREE, + V16QI_type_node, + endlink)))); + tree v4si_ftype_v4si_v4si_v16qi + = build_function_type (V4SI_type_node, + tree_cons (NULL_TREE, V4SI_type_node, + tree_cons (NULL_TREE, V4SI_type_node, + tree_cons (NULL_TREE, + V16QI_type_node, + endlink)))); + tree v8hi_ftype_v8hi_v8hi_v16qi + = build_function_type (V8HI_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + tree_cons (NULL_TREE, + V16QI_type_node, + endlink)))); + tree v16qi_ftype_v16qi_v16qi_v16qi + = build_function_type (V16QI_type_node, + tree_cons (NULL_TREE, V16QI_type_node, + tree_cons (NULL_TREE, V16QI_type_node, + tree_cons (NULL_TREE, + V16QI_type_node, + endlink)))); + + /* V4SI foo (char) */ + tree v4si_ftype_char + = build_function_type (V4SI_type_node, + tree_cons (NULL_TREE, char_type_node, endlink)); + + /* V8HI foo (char) */ + tree v8hi_ftype_char + = build_function_type (V8HI_type_node, + tree_cons (NULL_TREE, char_type_node, endlink)); + + /* V16QI foo (char) */ + tree v16qi_ftype_char + = build_function_type (V16QI_type_node, + tree_cons (NULL_TREE, char_type_node, endlink)); /* V4SI foo (int *). */ tree v4si_ftype_pint @@ -3419,6 +3566,22 @@ altivec_init_builtins (void) tree_cons (NULL_TREE, V4SI_type_node, tree_cons (NULL_TREE, V4SI_type_node, endlink))); + /* These are really for the unsigned 5 bit literals */ + tree v4si_ftype_v4si_char + = build_function_type (V4SI_type_node, + tree_cons (NULL_TREE, V4SI_type_node, + tree_cons (NULL_TREE, char_type_node, + endlink))); + tree v8hi_ftype_v8hi_char + = build_function_type (V8HI_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + tree_cons (NULL_TREE, char_type_node, + endlink))); + tree v16qi_ftype_v16qi_char + = build_function_type (V16QI_type_node, + tree_cons (NULL_TREE, V16QI_type_node, + tree_cons (NULL_TREE, char_type_node, + endlink))); tree v4sf_ftype_v4sf_v4sf = build_function_type (V4SF_type_node, @@ -3426,18 +3589,47 @@ altivec_init_builtins (void) tree_cons (NULL_TREE, V4SF_type_node, endlink))); + tree v4sf_ftype_v4sf_v4sf_v4sf + = build_function_type (V4SF_type_node, + tree_cons (NULL_TREE, V4SF_type_node, + tree_cons (NULL_TREE, V4SF_type_node, + tree_cons (NULL_TREE, + V4SF_type_node, + endlink)))); + tree v8hi_ftype_v8hi_v8hi = build_function_type (V8HI_type_node, tree_cons (NULL_TREE, V8HI_type_node, tree_cons (NULL_TREE, V8HI_type_node, endlink))); - + tree v8hi_ftype_v8hi_v8hi_v8hi + = build_function_type (V8HI_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + tree_cons (NULL_TREE, + V8HI_type_node, + endlink)))); + tree v4si_ftype_v8hi_v8hi_v4si + = build_function_type (V4SI_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + tree_cons (NULL_TREE, + V4SI_type_node, + endlink)))); + tree v4si_ftype_v16qi_v16qi_v4si + = build_function_type (V4SI_type_node, + tree_cons (NULL_TREE, V16QI_type_node, + tree_cons (NULL_TREE, V16QI_type_node, + tree_cons (NULL_TREE, + V4SI_type_node, + endlink)))); + tree v16qi_ftype_v16qi_v16qi = build_function_type (V16QI_type_node, tree_cons (NULL_TREE, V16QI_type_node, tree_cons (NULL_TREE, V16QI_type_node, endlink))); - + tree v4si_ftype_v4sf_v4sf = build_function_type (V4SI_type_node, tree_cons (NULL_TREE, V4SF_type_node, @@ -3513,6 +3705,77 @@ altivec_init_builtins (void) def_builtin (MASK_ALTIVEC, "__builtin_altivec_ld_internal_16qi", v16qi_ftype_pchar, ALTIVEC_BUILTIN_LD_INTERNAL_16qi); def_builtin (MASK_ALTIVEC, "__builtin_altivec_st_internal_16qi", void_ftype_pchar_v16qi, ALTIVEC_BUILTIN_ST_INTERNAL_16qi); + /* Add the simple ternary operators. */ + d = (struct builtin_description *) bdesc_3arg; + for (i = 0; i < sizeof (bdesc_3arg) / sizeof *d; i++, d++) + { + + enum machine_mode mode0, mode1, mode2, mode3; + tree type; + + if (d->name == 0) + continue; + + mode0 = insn_data[d->icode].operand[0].mode; + mode1 = insn_data[d->icode].operand[1].mode; + mode2 = insn_data[d->icode].operand[2].mode; + mode3 = insn_data[d->icode].operand[3].mode; + + /* When all four are of the same mode. */ + if (mode0 == mode1 && mode1 == mode2 && mode2 == mode3) + { + switch (mode0) + { + case V4SFmode: + type = v4sf_ftype_v4sf_v4sf_v4sf; + break; + case V8HImode: + type = v8hi_ftype_v8hi_v8hi_v8hi; + break; + case V16QImode: + type = v16qi_ftype_v16qi_v16qi_v16qi; + break; + default: + abort(); + } + } + else if (mode0 == mode1 && mode1 == mode2 && mode3 == V16QImode) + { + switch (mode0) + { + case V4SImode: + type = v4si_ftype_v4si_v4si_v16qi; + break; + case V4SFmode: + type = v4sf_ftype_v4sf_v4sf_v16qi; + break; + case V8HImode: + type = v8hi_ftype_v8hi_v8hi_v16qi; + break; + case V16QImode: + type = v16qi_ftype_v16qi_v16qi_v16qi; + break; + default: + abort(); + } + } + else if (mode0 == V4SImode && mode1 == V16QImode && mode2 == V16QImode + && mode3 == V4SImode) + { + type = v4si_ftype_v16qi_v16qi_v4si; + } + else if (mode0 == V4SImode && mode1 == V8HImode && mode2 == V8HImode + && mode3 == V4SImode) + { + type = v4si_ftype_v8hi_v8hi_v4si; + } + + else + abort (); + + def_builtin (d->mask, d->name, type, d->code); + } + /* Add the simple binary operators. */ d = (struct builtin_description *) bdesc_2arg; for (i = 0; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++) @@ -3578,6 +3841,18 @@ altivec_init_builtins (void) /* vint, vshort, vint. */ else if (mode0 == V4SImode && mode1 == V8HImode && mode2 == V4SImode) type = v4si_ftype_v8hi_v4si; + + /* vint, vint, 5 bit literal. */ + else if (mode0 == V4SImode && mode1 == V4SImode && mode2 == QImode) + type = v4si_ftype_v4si_char; + + /* vshort, vshort, 5 bit literal. */ + else if (mode0 == V8HImode && mode1 == V8HImode && mode2 == QImode) + type = v8hi_ftype_v8hi_char; + + /* vchar, vchar, 5 bit literal. */ + else if (mode0 == V16QImode && mode1 == V16QImode && mode2 == QImode) + type = v16qi_ftype_v16qi_char; /* fixme: aldyh */ /* int, x, x. */ @@ -3607,6 +3882,30 @@ altivec_init_builtins (void) def_builtin (d->mask, d->name, type, d->code); } + /* Add the simple unary operators. */ + d = (struct builtin_description *) bdesc_1arg; + for (i = 0; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++) + { + enum machine_mode mode0, mode1; + tree type; + + if (d->name == 0) + continue; + + mode0 = insn_data[d->icode].operand[0].mode; + mode1 = insn_data[d->icode].operand[1].mode; + + if (mode0 == V4SImode && mode1 == QImode) + type = v4si_ftype_char; + else if (mode0 == V8HImode && mode1 == QImode) + type = v8hi_ftype_char; + else if (mode0 == V16QImode && mode1 == QImode) + type = v16qi_ftype_char; + else + abort (); + + def_builtin (d->mask, d->name, type, d->code); + } } diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 2e100b2..50137ee 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -2925,6 +2925,7 @@ enum rs6000_builtins ALTIVEC_BUILTIN_VCMPGTUW, ALTIVEC_BUILTIN_VCMPGTSW, ALTIVEC_BUILTIN_VCMPGTFP, + ALTIVEC_BUILTIN_VMADDFP, ALTIVEC_BUILTIN_VMAXUB, ALTIVEC_BUILTIN_VMAXSB, ALTIVEC_BUILTIN_VMAXUH, @@ -2932,12 +2933,21 @@ enum rs6000_builtins ALTIVEC_BUILTIN_VMAXUW, ALTIVEC_BUILTIN_VMAXSW, ALTIVEC_BUILTIN_VMAXFP, + ALTIVEC_BUILTIN_VMHADDSHS, + ALTIVEC_BUILTIN_VMHRADDSHS, + ALTIVEC_BUILTIN_VMLADDUHM, ALTIVEC_BUILTIN_VMRGHB, ALTIVEC_BUILTIN_VMRGHH, ALTIVEC_BUILTIN_VMRGHW, ALTIVEC_BUILTIN_VMRGLB, ALTIVEC_BUILTIN_VMRGLH, ALTIVEC_BUILTIN_VMRGLW, + ALTIVEC_BUILTIN_VMSUMUBM, + ALTIVEC_BUILTIN_VMSUMMBM, + ALTIVEC_BUILTIN_VMSUMUHM, + ALTIVEC_BUILTIN_VMSUMSHM, + ALTIVEC_BUILTIN_VMSUMUHS, + ALTIVEC_BUILTIN_VMSUMSHS, ALTIVEC_BUILTIN_VMINUB, ALTIVEC_BUILTIN_VMINSB, ALTIVEC_BUILTIN_VMINUH, @@ -2953,8 +2963,13 @@ enum rs6000_builtins ALTIVEC_BUILTIN_VMULOSB, ALTIVEC_BUILTIN_VMULOUH, ALTIVEC_BUILTIN_VMULOSH, + ALTIVEC_BUILTIN_VNMSUBFP, ALTIVEC_BUILTIN_VNOR, ALTIVEC_BUILTIN_VOR, + ALTIVEC_BUILTIN_VPERM_4SI, + ALTIVEC_BUILTIN_VPERM_4SF, + ALTIVEC_BUILTIN_VPERM_8HI, + ALTIVEC_BUILTIN_VPERM_16QI, ALTIVEC_BUILTIN_VPKUHUM, ALTIVEC_BUILTIN_VPKUWUM, ALTIVEC_BUILTIN_VPKPX, @@ -2974,6 +2989,12 @@ enum rs6000_builtins ALTIVEC_BUILTIN_VSLW, ALTIVEC_BUILTIN_VSL, ALTIVEC_BUILTIN_VSLO, + ALTIVEC_BUILTIN_VSPLTB, + ALTIVEC_BUILTIN_VSPLTH, + ALTIVEC_BUILTIN_VSPLTW, + ALTIVEC_BUILTIN_VSPLTISB, + ALTIVEC_BUILTIN_VSPLTISH, + ALTIVEC_BUILTIN_VSPLTISW, ALTIVEC_BUILTIN_VSRB, ALTIVEC_BUILTIN_VSRH, ALTIVEC_BUILTIN_VSRW, diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index a8a5a95..ed42ae1 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -297,7 +297,10 @@ ; PPC7450 has 3 integer units (for most integer insns) and one mul/div ; unit, which also does CR-logical insns and move to/from SPR. - +; It also has 4 vector units, one for each type of vector instruction. +; However, we can only dispatch 2 instructions per cycle. +; We model this as saying that dispatching two of the same type of instruction +; in a row incurs a single cycle delay. (define_function_unit "iu3" 3 0 (and (eq_attr "type" "integer") (eq_attr "cpu" "ppc7450")) @@ -317,26 +320,46 @@ (and (eq_attr "type" "cr_logical") (eq_attr "cpu" "ppc7450")) 1 1) -(define_function_unit "viu1" 1 0 +(define_function_unit "vec_alu2" 2 0 (and (eq_attr "type" "vecsimple") (eq_attr "cpu" "ppc7450")) - 1 1) -(define_function_unit "viu2" 1 0 + 1 2 [(eq_attr "type" "vecsimple")]) +(define_function_unit "vec_alu2" 2 0 + (and (eq_attr "type" "vecsimple") + (eq_attr "cpu" "ppc7450")) + 1 1 [(eq_attr "type" "!vecsimple")]) +(define_function_unit "vec_alu2" 2 0 (and (eq_attr "type" "veccomplex") (eq_attr "cpu" "ppc7450")) - 4 1) -(define_function_unit "vfpu" 1 0 + 4 2 [(eq_attr "type" "veccomplex")]) +(define_function_unit "vec_alu2" 2 0 + (and (eq_attr "type" "veccomplex") + (eq_attr "cpu" "ppc7450")) + 4 1 [(eq_attr "type" "!veccomplex")]) +(define_function_unit "vec_alu2" 2 0 (and (eq_attr "type" "veccmp") (eq_attr "cpu" "ppc7450")) - 2 1) -(define_function_unit "vfpu" 1 0 + 2 2 [(eq_attr "type" "veccmp")]) +(define_function_unit "vec_alu2" 2 0 + (and (eq_attr "type" "veccmp") + (eq_attr "cpu" "ppc7450")) + 2 1 [(eq_attr "type" "!veccmp")]) +(define_function_unit "vec_alu2" 2 0 (and (eq_attr "type" "vecfloat") (eq_attr "cpu" "ppc7450")) - 4 1) -(define_function_unit "vpu" 1 0 + 4 2 [(eq_attr "type" "vecfloat")]) +(define_function_unit "vec_alu2" 2 0 + (and (eq_attr "type" "vecfloat") + (eq_attr "cpu" "ppc7450")) + 4 1 [(eq_attr "type" "!vecfloat")]) +(define_function_unit "vec_alu2" 2 0 (and (eq_attr "type" "vecperm") (eq_attr "cpu" "ppc7450")) - 2 1) + 2 2 [(eq_attr "type" "vecperm")]) +(define_function_unit "vec_alu2" 2 0 + (and (eq_attr "type" "vecperm") + (eq_attr "cpu" "ppc7450")) + 2 1 [(eq_attr "type" "!vecperm")]) ; PPC750 has two integer units: a primary one which can perform all ; operations and a secondary one which is fed in lock step with the first @@ -9143,6 +9166,59 @@ && ! MEM_VOLATILE_P (operands[0]) && ! MEM_VOLATILE_P (operands[2]) && addrs_ok_for_quad_peep (XEXP (operands[0], 0), XEXP (operands[2], 0))" "stfq%U0%X0 %1,%0") +;; APPLE LOCAL peephole2 to eliminate unneeded computation of FP const +;; address in register. If lo part of address is reused (i.e. reg0 +;; is not dead), make the change in the fp load anyway (for scheduling) +;; but we switch the instructions in case op0==op1. +;; (define_peephole2 +;; [(set (match_operand:SI 0 "gpc_reg_operand" "") +;; (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "") +;; (match_operand:SI 2 "" ""))) +;; (set (match_operand:DF 3 "gpc_reg_operand" "") +;; (mem:DF (match_dup 0)))] +;; "TARGET_HARD_FLOAT +;; && peep2_reg_dead_p(2, operands[0]) +;; && FP_REGNO_P (REGNO (operands[3]))" +;; [(set (match_dup 3) (mem:DF (lo_sum:SI (match_dup 1) (match_dup 2))))] +;; "") + +;; (define_peephole2 +;; [(set (match_operand:SI 0 "gpc_reg_operand" "") +;; (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "") +;; (match_operand:SI 2 "" ""))) +;; (set (match_operand:DF 3 "gpc_reg_operand" "") +;; (mem:DF (match_dup 0)))] +;; "TARGET_HARD_FLOAT +;; && !peep2_reg_dead_p(2, operands[0]) +;; && FP_REGNO_P (REGNO (operands[3]))" +;; [(set (match_dup 3) (mem:DF (lo_sum:SI (match_dup 1) (match_dup 2)))) +;; (set (match_dup 0) (lo_sum:SI (match_dup 1) (match_dup 2)))] +;; "") + +;; (define_peephole2 +;; [(set (match_operand:SI 0 "gpc_reg_operand" "") +;; (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "") +;; (match_operand:SI 2 "" ""))) +;; (set (match_operand:SF 3 "gpc_reg_operand" "") +;; (mem:SF (match_dup 0)))] +;; "TARGET_HARD_FLOAT +;; && peep2_reg_dead_p(2, operands[0]) +;; && FP_REGNO_P (REGNO (operands[3]))" +;; [(set (match_dup 3) (mem:SF (lo_sum:SI (match_dup 1) (match_dup 2))))] +;; "") + +;; (define_peephole2 +;; [(set (match_operand:SI 0 "gpc_reg_operand" "") +;; (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "") +;; (match_operand:SI 2 "" ""))) +;; (set (match_operand:SF 3 "gpc_reg_operand" "") +;; (mem:SF (match_dup 0)))] +;; "TARGET_HARD_FLOAT +;; && !peep2_reg_dead_p(2, operands[0]) +;; && FP_REGNO_P (REGNO (operands[3]))" +;; [(set (match_dup 3) (mem:SF (lo_sum:SI (match_dup 1) (match_dup 2)))) +;; (set (match_dup 0) (lo_sum:SI (match_dup 1) (match_dup 2)))] +;; "") ;; Next come insns related to the calling sequence. ;; @@ -13826,8 +13902,8 @@ (define_insn "altivec_vandc" [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] 43))] + (and:V4SI (match_operand:V4SI 1 "register_operand" "v") + (not:V4SI (match_operand:V4SI 2 "register_operand" "v"))))] "TARGET_ALTIVEC" "vandc %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -13984,6 +14060,94 @@ "vcmpgtfp %0,%1,%2" [(set_attr "type" "veccmp")]) +;; Fused multiply add +(define_insn "altivec_vmaddfp" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (plus:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")) + (match_operand:V4SF 3 "register_operand" "v")))] + "TARGET_ALTIVEC" + "vmaddfp %0,%1,%2,%3" + [(set_attr "type" "vecfloat")]) + +;; The unspec here is a vec splat of 0. We do multiply as a fused +;; multiply-add with an add of a 0 vector. + +(define_expand "mulv4sf3" + [(set (match_dup 3) (unspec:V4SF [(const_int 0)] 142)) + (set (match_operand:V4SF 0 "register_operand" "=v") + (plus:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")) + (match_dup 3)))] + "TARGET_ALTIVEC && TARGET_FUSED_MADD" + " +{ operands[3] = gen_reg_rtx (V4SFmode); }") + +;; Fused multiply subtract +(define_insn "altivec_vnmsubfp" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (minus:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")) + (match_operand:V4SF 3 "register_operand" "v")))] + "TARGET_ALTIVEC" + "vmmsubfp %0,%1,%2,%3" + [(set_attr "type" "vecfloat")]) + + +(define_insn "altivec_vmsumubm" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v") + (match_operand:V4SI 3 "register_operand" "v")] 65))] + "TARGET_ALTIVEC" + "vmsumubm %0, %1, %2, %3" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmsummbm" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v") + (match_operand:V4SI 3 "register_operand" "v")] 66))] + "TARGET_ALTIVEC" + "vmsumubm %0, %1, %2, %3" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmsumuhm" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v") + (match_operand:V4SI 3 "register_operand" "v")] 67))] + "TARGET_ALTIVEC" + "vmsumuhm %0, %1, %2, %3" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmsumshm" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v") + (match_operand:V4SI 3 "register_operand" "v")] 68))] + "TARGET_ALTIVEC" + "vmsumshm %0, %1, %2, %3" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmsumuhs" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v") + (match_operand:V4SI 3 "register_operand" "v")] 69))] + "TARGET_ALTIVEC" + "vmsumuhs %0, %1, %2, %3" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmsumshs" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v") + (match_operand:V4SI 3 "register_operand" "v")] 70))] + "TARGET_ALTIVEC" + "vmsumshs %0, %1, %2, %3" + [(set_attr "type" "veccomplex")]) + (define_insn "umaxv16qi3" [(set (match_operand:V16QI 0 "register_operand" "=v") (umax:V16QI (match_operand:V16QI 1 "register_operand" "v") @@ -14040,50 +14204,137 @@ "vmaxfp %0,%1,%2" [(set_attr "type" "veccmp")]) +(define_insn "altivec_vmhaddshs" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v") + (match_operand:V8HI 3 "register_operand" "v")] 71))] + "TARGET_ALTIVEC" + "vmhaddshs %0, %1, %2, %3" + [(set_attr "type" "veccomplex")]) +(define_insn "altivec_vmhraddshs" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v") + (match_operand:V8HI 3 "register_operand" "v")] 72))] + "TARGET_ALTIVEC" + "vmhraddshs %0, %1, %2, %3" + [(set_attr "type" "veccomplex")]) +(define_insn "altivec_vmladduhm" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v") + (match_operand:V8HI 3 "register_operand" "v")] 73))] + "TARGET_ALTIVEC" + "vmladduhm %0, %1, %2, %3" + [(set_attr "type" "veccomplex")]) + (define_insn "altivec_vmrghb" [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") - (match_operand:V16QI 2 "register_operand" "v")] 70))] + (vec_merge:V16QI (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "v") + (parallel [(const_int 8) + (const_int 9) + (const_int 10) + (const_int 11) + (const_int 12) + (const_int 13) + (const_int 14) + (const_int 15) + (const_int 0) + (const_int 1) + (const_int 2) + (const_int 3) + (const_int 4) + (const_int 5) + (const_int 6) + (const_int 7)])) + (match_operand:V16QI 2 "register_operand" "v") + (const_int 255)))] "TARGET_ALTIVEC" "vmrghb %0,%1,%2" [(set_attr "type" "vecperm")]) (define_insn "altivec_vmrghh" [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] 71))] + (vec_merge:V8HI (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "v") + (parallel [(const_int 4) + (const_int 5) + (const_int 6) + (const_int 7) + (const_int 0) + (const_int 1) + (const_int 2) + (const_int 3)])) + (match_operand:V8HI 2 "register_operand" "v") + (const_int 15)))] "TARGET_ALTIVEC" "vmrghh %0,%1,%2" [(set_attr "type" "vecperm")]) (define_insn "altivec_vmrghw" [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] 72))] + (vec_merge:V4SI (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "v") + (parallel [(const_int 2) + (const_int 3) + (const_int 0) + (const_int 1)])) + (match_operand:V4SI 2 "register_operand" "v") + (const_int 12)))] "TARGET_ALTIVEC" "vmrghw %0,%1,%2" [(set_attr "type" "vecperm")]) (define_insn "altivec_vmrglb" [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") - (match_operand:V16QI 2 "register_operand" "v")] 73))] + (vec_merge:V16QI (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "v") + (parallel [(const_int 8) + (const_int 9) + (const_int 10) + (const_int 11) + (const_int 12) + (const_int 13) + (const_int 14) + (const_int 15) + (const_int 0) + (const_int 1) + (const_int 2) + (const_int 3) + (const_int 4) + (const_int 5) + (const_int 6) + (const_int 7)])) + (match_operand:V16QI 1 "register_operand" "v") + (const_int 255)))] "TARGET_ALTIVEC" "vmrglb %0,%1,%2" [(set_attr "type" "vecperm")]) (define_insn "altivec_vmrglh" [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] 74))] + (vec_merge:V8HI (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "v") + (parallel [(const_int 4) + (const_int 5) + (const_int 6) + (const_int 7) + (const_int 0) + (const_int 1) + (const_int 2) + (const_int 3)])) + (match_operand:V8HI 1 "register_operand" "v") + (const_int 15)))] "TARGET_ALTIVEC" "vmrglh %0,%1,%2" [(set_attr "type" "vecperm")]) (define_insn "altivec_vmrglw" [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] 75))] + (vec_merge:V4SI (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "v") + (parallel [(const_int 2) + (const_int 3) + (const_int 0) + (const_int 1)])) + (match_operand:V4SI 1 "register_operand" "v") + (const_int 12)))] "TARGET_ALTIVEC" "vmrglw %0,%1,%2" [(set_attr "type" "vecperm")]) @@ -14575,3 +14826,98 @@ "TARGET_ALTIVEC" "vxor %0,%1,%2" [(set_attr "type" "vecsimple")]) + +(define_insn "altivec_vspltb" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:QI 2 "immediate_operand" "i")] 136))] + "TARGET_ALTIVEC" + "vspltb %0,%1,%2" + [(set_attr "type" "vecperm")]) +(define_insn "altivec_vsplth" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:QI 2 "immediate_operand" "i")] 137))] + "TARGET_ALTIVEC" + "vsplth %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vspltw" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:QI 2 "immediate_operand" "i")] 138))] + "TARGET_ALTIVEC" + "vspltw %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vspltisb" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:QI 1 "immediate_operand" "i")] 139))] + "TARGET_ALTIVEC" + "vspltisb %0, %1" + [(set_attr "type" "vecsimple")]) + + +(define_insn "altivec_vspltish" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:QI 1 "immediate_operand" "i")] 140))] + "TARGET_ALTIVEC" + "vspltish %0, %1" + [(set_attr "type" "vecsimple")]) + +(define_insn "altivec_vspltisw" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:QI 1 "immediate_operand" "i")] 141))] + "TARGET_ALTIVEC" + "vspltisw %0, %1" + [(set_attr "type" "vecsimple")]) + +(define_insn "" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(match_operand:QI 1 "immediate_operand" "i")] 142))] + "TARGET_ALTIVEC" + "vspltisw %0, %1" + [(set_attr "type" "vecsimple")]) + +(define_insn "ftruncv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (fix:V4SF (match_operand:V4SF 1 "register_operand" "v")))] + "TARGET_ALTIVEC" + "vrfiz %0, %1" + [(set_attr "type" "vecfloat")]) + +(define_insn "altivec_vperm_4si" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v") + (match_operand:V16QI 3 "register_operand" "v")] 144))] + "TARGET_ALTIVEC" + "vperm %0,%1,%2,%3" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vperm_4sf" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v") + (match_operand:V16QI 3 "register_operand" "v")] 145))] + "TARGET_ALTIVEC" + "vperm %0,%1,%2,%3" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vperm_8hi" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v") + (match_operand:V16QI 3 "register_operand" "v")] 146))] + "TARGET_ALTIVEC" + "vperm %0,%1,%2,%3" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vperm_16qi" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v") + (match_operand:V16QI 3 "register_operand" "v")] 147))] + "TARGET_ALTIVEC" + "vperm %0,%1,%2,%3" + [(set_attr "type" "vecperm")])