From 89e0a5e5163cbc250fdcc4878899bd63d375032a Mon Sep 17 00:00:00 2001 From: meissner Date: Tue, 9 Nov 2010 21:44:19 +0000 Subject: [PATCH] Improve powerpc floating point rounding git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@166510 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 40 +++ gcc/config/rs6000/rs6000-protos.h | 1 - gcc/config/rs6000/rs6000.c | 84 +---- gcc/config/rs6000/rs6000.md | 414 ++++++++++++++--------- gcc/testsuite/ChangeLog | 11 + gcc/testsuite/gcc.target/powerpc/ppc-fpconv-11.c | 2 +- gcc/testsuite/gcc.target/powerpc/ppc-fpconv-2.c | 2 +- gcc/testsuite/gcc.target/powerpc/ppc-round.c | 37 ++ 8 files changed, 351 insertions(+), 240 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/ppc-round.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d8b2550..77b5e7e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,43 @@ +2010-11-09 Michael Meissner + + * config/rs6000/rs6000.md (floatsi2_lfiwax): Rewrite so + split occurs before reload, and we allocate memory at the time of + the split, not during expansion. Add attributes. + (floatsi2_lfiwax_mem): Ditto. + (floatunssi2_lfiwzx): Ditto. + (floatunssi2_lfiwzx_mem): Ditto. + (floatsidf2): Ditto. + (floatunssisf2): Ditto. + (floatunssidf2): Ditto. + (fix_truncsi2): Ditto. + (fix_truncsi2_stfiwx): Ditto. + (fix_truncsi2_internal): Ditto. + (fix_truncsi2): Ditto. + (fix_truncdi2): Ditto. + (fixuns_truncsi2_stfiwx): Ditto. + (floatsisf2): Ditto. + (floatdidf2_mem): Ditto. + (floatunsdidf2_mem): Ditto. + (floatunsdidf2): Ditto. + (floatdisf2_internal1): Ditto. + (floatdisf2_mem): Ditto. + (floatunsdisf2_mem): Ditto. + (floatsi2_lfiwax_mem2): Delete. + (floatunssi2_lfiwzx_mem2): Ditto. + (fix_truncsi2_mem): Ditto. + (fixuns_truncsi2_mem): Ditto. + (round322_fprs): New combiner insn to combine (double)(int) + type operations to reduce copying the values to multiple memory + slots. + (roundu322_fprs): Ditto. + + * config/rs6000/rs6000.c (rs6000_address_for_fpconvert): Handle + PRE_INC, PRE_DEC, PRE_MODIFY. + (rs6000_expand_convert_si_to_sfdf): Delete, no longer used. + + * config/rs6000/rs6000-protos.h (rs6000_expand_convert_si_to_sfdf): + Delete prototype. + 2010-11-09 Jakub Jelinek PR target/43808 diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 937f95b..e5c6f0d 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -130,7 +130,6 @@ extern rtx rs6000_machopic_legitimize_pic_address (rtx, enum machine_mode, rtx); extern rtx rs6000_address_for_fpconvert (rtx); extern rtx rs6000_allocate_stack_temp (enum machine_mode, bool, bool); -extern void rs6000_expand_convert_si_to_sfdf (rtx, rtx, bool); extern int rs6000_loop_align (rtx); #endif /* RTX_CODE */ diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index eadc122..e441c70 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -27230,82 +27230,30 @@ rs6000_address_for_fpconvert (rtx x) addr = XEXP (x, 0); if (! legitimate_indirect_address_p (addr, strict_p) && ! legitimate_indexed_address_p (addr, strict_p)) - x = replace_equiv_address (x, copy_addr_to_reg (addr)); - - return x; -} - -/* Expand 32-bit int -> floating point conversions. Return true if - successful. */ - -void -rs6000_expand_convert_si_to_sfdf (rtx dest, rtx src, bool unsigned_p) -{ - enum machine_mode dmode = GET_MODE (dest); - rtx (*func_si) (rtx, rtx, rtx, rtx); - rtx (*func_si_mem) (rtx, rtx); - rtx (*func_di) (rtx, rtx); - rtx reg, stack; - - gcc_assert (GET_MODE (src) == SImode); - - if (dmode == SFmode) { - if (unsigned_p) + if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) { - gcc_assert (TARGET_FCFIDUS && TARGET_LFIWZX); - func_si = gen_floatunssisf2_lfiwzx; - func_si_mem = gen_floatunssisf2_lfiwzx_mem; - func_di = gen_floatunsdisf2; + rtx reg = XEXP (addr, 0); + HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x)); + rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size); + gcc_assert (REG_P (reg)); + emit_insn (gen_add3_insn (reg, reg, size_rtx)); + addr = reg; } - else + else if (GET_CODE (addr) == PRE_MODIFY) { - gcc_assert (TARGET_FCFIDS && TARGET_LFIWAX); - func_si = gen_floatsisf2_lfiwax; - func_si_mem = gen_floatsisf2_lfiwax_mem; - func_di = gen_floatdisf2; + rtx reg = XEXP (addr, 0); + rtx expr = XEXP (addr, 1); + gcc_assert (REG_P (reg)); + gcc_assert (GET_CODE (expr) == PLUS); + emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1))); + addr = reg; } - } - else if (dmode == DFmode) - { - if (unsigned_p) - { - gcc_assert (TARGET_FCFIDU && TARGET_LFIWZX); - func_si = gen_floatunssidf2_lfiwzx; - func_si_mem = gen_floatunssidf2_lfiwzx_mem; - func_di = gen_floatunsdidf2; - } - else - { - gcc_assert (TARGET_FCFID && TARGET_LFIWAX); - func_si = gen_floatsidf2_lfiwax; - func_si_mem = gen_floatsidf2_lfiwax_mem; - func_di = gen_floatdidf2; - } + x = replace_equiv_address (x, copy_addr_to_reg (addr)); } - else - gcc_unreachable (); - - if (MEM_P (src)) - { - src = rs6000_address_for_fpconvert (src); - emit_insn (func_si_mem (dest, src)); - } - else if (!TARGET_MFPGPR) - { - reg = gen_reg_rtx (DImode); - stack = rs6000_allocate_stack_temp (SImode, false, true); - emit_insn (func_si (dest, src, stack, reg)); - } - else - { - if (!REG_P (src)) - src = force_reg (SImode, src); - reg = convert_to_mode (DImode, src, unsigned_p); - emit_insn (func_di (dest, reg)); - } + return x; } #include "gt-rs6000.h" diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 37f5ad2..200a3b1 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -6605,68 +6605,73 @@ "lfiwax %0,%y1" [(set_attr "type" "fpload")]) +; This split must be run before register allocation because it allocates the +; memory slot that is needed to move values to/from the FPR. We don't allocate +; it earlier to allow for the combiner to merge insns together where it might +; not be needed and also in case the insns are deleted as dead code. + (define_insn_and_split "floatsi2_lfiwax" - [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,") - (float:SFDF (match_operand:SI 1 "nonimmediate_operand" "Z,r"))) - (clobber (match_operand:SI 2 "indexed_or_indirect_operand" "=Z,Z")) - (clobber (match_operand:DI 3 "gpc_reg_operand" "=d,d"))] + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d") + (float:SFDF (match_operand:SI 1 "nonimmediate_operand" "r"))) + (clobber (match_scratch:DI 2 "=d"))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX - && " + && && can_create_pseudo_p ()" "#" - "MEM_P (operands[1]) || reload_completed" + "" [(pc)] " { - if (MEM_P (operands[1])) - { - operands[1] = rs6000_address_for_fpconvert (operands[1]); - emit_insn (gen_lfiwax (operands[3], operands[1])); - } + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp; + + if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64) + tmp = convert_to_mode (DImode, src, false); else { - emit_move_insn (operands[2], operands[1]); - emit_insn (gen_lfiwax (operands[3], operands[2])); + tmp = operands[2]; + if (GET_CODE (tmp) == SCRATCH) + tmp = gen_reg_rtx (DImode); + if (MEM_P (src)) + { + src = rs6000_address_for_fpconvert (src); + emit_insn (gen_lfiwax (tmp, src)); + } + else + { + rtx stack = rs6000_allocate_stack_temp (SImode, false, true); + emit_move_insn (stack, src); + emit_insn (gen_lfiwax (tmp, stack)); + } } - emit_insn (gen_floatdi2 (operands[0], operands[3])); + emit_insn (gen_floatdi2 (dest, tmp)); DONE; }" - [(set_attr "length" "8,12")]) + [(set_attr "length" "12") + (set_attr "type" "fpload")]) (define_insn_and_split "floatsi2_lfiwax_mem" - [(set (match_operand:SFDF 0 "gpc_reg_operand" "=") - (float:SFDF (match_operand:SI 1 "memory_operand" "Z"))) - (clobber (match_scratch:DI 2 "=d"))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX - && " - "#" - "&& reload_completed" - [(pc)] - " -{ - emit_insn (gen_lfiwax (operands[2], operands[1])); - emit_insn (gen_floatdi2 (operands[0], operands[2])); - DONE; -}" - [(set_attr "length" "8")]) - -(define_insn_and_split "floatsi2_lfiwax_mem2" - [(set (match_operand:SFDF 0 "gpc_reg_operand" "=") + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,") (float:SFDF (sign_extend:DI - (match_operand:SI 1 "memory_operand" "Z")))) - (clobber (match_scratch:DI 2 "=d"))] + (match_operand:SI 1 "memory_operand" "Z,Z")))) + (clobber (match_scratch:DI 2 "=0,d"))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX && " "#" - "&& reload_completed" + "" [(pc)] " { + operands[1] = rs6000_address_for_fpconvert (operands[1]); + if (GET_CODE (operands[2]) == SCRATCH) + operands[2] = gen_reg_rtx (DImode); emit_insn (gen_lfiwax (operands[2], operands[1])); emit_insn (gen_floatdi2 (operands[0], operands[2])); DONE; }" - [(set_attr "length" "8")]) + [(set_attr "length" "8") + (set_attr "type" "fpload")]) (define_insn "lfiwzx" [(set (match_operand:DI 0 "gpc_reg_operand" "=d") @@ -6677,67 +6682,67 @@ [(set_attr "type" "fpload")]) (define_insn_and_split "floatunssi2_lfiwzx" - [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,") - (unsigned_float:SFDF (match_operand:SI 1 "gpc_reg_operand" "Z,r"))) - (clobber (match_operand:SI 2 "indexed_or_indirect_operand" "=Z,Z")) - (clobber (match_operand:DI 3 "gpc_reg_operand" "=d,d"))] + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d") + (unsigned_float:SFDF (match_operand:SI 1 "nonimmediate_operand" "r"))) + (clobber (match_scratch:DI 2 "=d"))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX && " "#" - "MEM_P (operands[1]) || reload_completed" + "" [(pc)] " { - if (MEM_P (operands[1])) - { - operands[1] = rs6000_address_for_fpconvert (operands[1]); - emit_insn (gen_lfiwzx (operands[3], operands[1])); - } + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp; + + if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64) + tmp = convert_to_mode (DImode, src, true); else { - emit_move_insn (operands[2], operands[1]); - emit_insn (gen_lfiwzx (operands[3], operands[2])); + tmp = operands[2]; + if (GET_CODE (tmp) == SCRATCH) + tmp = gen_reg_rtx (DImode); + if (MEM_P (src)) + { + src = rs6000_address_for_fpconvert (src); + emit_insn (gen_lfiwzx (tmp, src)); + } + else + { + rtx stack = rs6000_allocate_stack_temp (SImode, false, true); + emit_move_insn (stack, src); + emit_insn (gen_lfiwzx (tmp, stack)); + } } - emit_insn (gen_floatdi2 (operands[0], operands[3])); + emit_insn (gen_floatdi2 (dest, tmp)); DONE; }" - [(set_attr "length" "8,12")]) + [(set_attr "length" "12") + (set_attr "type" "fpload")]) (define_insn_and_split "floatunssi2_lfiwzx_mem" - [(set (match_operand:SFDF 0 "gpc_reg_operand" "=") - (unsigned_float:SFDF (match_operand:SI 1 "memory_operand" "Z"))) - (clobber (match_scratch:DI 2 "=d"))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX - && " - "#" - "&& reload_completed" - [(pc)] - " -{ - emit_insn (gen_lfiwzx (operands[2], operands[1])); - emit_insn (gen_floatdi2 (operands[0], operands[2])); - DONE; -}" - [(set_attr "length" "8")]) - -(define_insn_and_split "floatunssi2_lfiwzx_mem2" - [(set (match_operand:SFDF 0 "gpc_reg_operand" "=") + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,") (unsigned_float:SFDF (zero_extend:DI - (match_operand:SI 1 "memory_operand" "Z")))) - (clobber (match_scratch:DI 2 "=d"))] + (match_operand:SI 1 "memory_operand" "Z,Z")))) + (clobber (match_scratch:DI 2 "=0,d"))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX && " "#" - "&& reload_completed" + "" [(pc)] " { + operands[1] = rs6000_address_for_fpconvert (operands[1]); + if (GET_CODE (operands[2]) == SCRATCH) + operands[2] = gen_reg_rtx (DImode); emit_insn (gen_lfiwzx (operands[2], operands[1])); emit_insn (gen_floatdi2 (operands[0], operands[2])); DONE; }" - [(set_attr "length" "8")]) + [(set_attr "length" "8") + (set_attr "type" "fpload")]) ; For each of these conversions, there is a define_expand, a define_insn ; with a '#' template, and a define_split (with C code). The idea is @@ -6765,7 +6770,7 @@ } else if (TARGET_LFIWAX && TARGET_FCFID) { - rs6000_expand_convert_si_to_sfdf (operands[0], operands[1], false); + emit_insn (gen_floatsidf2_lfiwax (operands[0], operands[1])); DONE; } else if (TARGET_FCFID) @@ -6819,7 +6824,8 @@ emit_insn (gen_subdf3 (operands[0], operands[5], operands[3])); DONE; }" - [(set_attr "length" "24")]) + [(set_attr "length" "24") + (set_attr "type" "fp")]) ;; If we don't have a direct conversion to single precision, don't enable this ;; conversion for 32-bit without fast math, because we don't have the insn to @@ -6842,7 +6848,7 @@ } else if (TARGET_LFIWZX && TARGET_FCFIDUS) { - rs6000_expand_convert_si_to_sfdf (operands[0], operands[1], true); + emit_insn (gen_floatunssisf2_lfiwzx (operands[0], operands[1])); DONE; } else @@ -6876,8 +6882,8 @@ } else if (TARGET_LFIWZX && TARGET_FCFID) { - rs6000_expand_convert_si_to_sfdf (operands[0], operands[1], true); - DONE; + emit_insn (gen_floatunssidf2_lfiwzx (operands[0], operands[1])); + DONE; } else if (TARGET_FCFID) { @@ -6927,7 +6933,8 @@ emit_insn (gen_subdf3 (operands[0], operands[5], operands[3])); DONE; }" - [(set_attr "length" "20")]) + [(set_attr "length" "20") + (set_attr "type" "fp")]) (define_expand "fix_truncsi2" [(set (match_operand:SI 0 "gpc_reg_operand" "") @@ -6941,12 +6948,7 @@ rtx tmp, stack; if (TARGET_STFIWX) - { - tmp = gen_reg_rtx (DImode); - stack = rs6000_allocate_stack_temp (SImode, false, true); - emit_insn (gen_fix_truncsi2_stfiwx (operands[0], operands[1], - tmp, stack)); - } + emit_insn (gen_fix_truncsi2_stfiwx (operands[0], operands[1])); else { tmp = gen_reg_rtx (DImode); @@ -6958,58 +6960,56 @@ } }") +; Like the convert to float patterns, this insn must be split before +; register allocation so that it can allocate the memory slot if it +; needed (define_insn_and_split "fix_truncsi2_stfiwx" - [(set (match_operand:SI 0 "gpc_reg_operand" "=r") - (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" ""))) - (clobber (match_operand:DI 2 "gpc_reg_operand" "=d")) - (clobber (match_operand:SI 3 "indexed_or_indirect_operand" "=Z"))] + [(set (match_operand:SI 0 "general_operand" "=rm") + (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d"))) + (clobber (match_scratch:DI 2 "=d"))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && (mode != SFmode || TARGET_SINGLE_FLOAT) - && TARGET_STFIWX" + && TARGET_STFIWX && can_create_pseudo_p ()" "#" - "&& reload_completed" + "" [(pc)] - " { - emit_insn (gen_fctiwz_ (operands[2], operands[1])); - if (TARGET_MFPGPR && TARGET_POWERPC64 && REG_P (operands[0]) - && INT_REGNO_P (REGNO (operands[0]))) + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = operands[2]; + + if (GET_CODE (tmp) == SCRATCH) + tmp = gen_reg_rtx (DImode); + + emit_insn (gen_fctiwz_ (tmp, src)); + if (MEM_P (dest)) { - rtx reg = gen_lowpart (DImode, operands[0]); - emit_move_insn (reg, operands[2]); + dest = rs6000_address_for_fpconvert (dest); + emit_insn (gen_stfiwx (dest, tmp)); + DONE; + } + else if (TARGET_MFPGPR && TARGET_POWERPC64) + { + dest = gen_lowpart (DImode, dest); + emit_move_insn (dest, tmp); + DONE; } else { - emit_insn (gen_stfiwx (operands[3], operands[2])); - emit_move_insn (operands[0], operands[3]); + rtx stack = rs6000_allocate_stack_temp (SImode, false, true); + emit_insn (gen_stfiwx (stack, tmp)); + emit_move_insn (dest, stack); + DONE; } - DONE; -}" - [(set_attr "length" "12")]) - -(define_insn_and_split "*fix_truncsi2_mem" - [(set (match_operand:SI 0 "memory_operand" "=Z") - (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" ""))) - (clobber (match_scratch:DI 2 "=d"))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT - && (mode != SFmode || TARGET_SINGLE_FLOAT) - && TARGET_STFIWX" - "#" - "&& reload_completed" - [(pc)] - " -{ - emit_insn (gen_fctiwz_ (operands[2], operands[1])); - emit_insn (gen_stfiwx (operands[0], operands[2])); - DONE; -}" - [(set_attr "length" "8")]) +} + [(set_attr "length" "12") + (set_attr "type" "fp")]) (define_insn_and_split "fix_truncsi2_internal" - [(set (match_operand:SI 0 "gpc_reg_operand" "=r") - (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" ""))) - (clobber (match_operand:DI 2 "gpc_reg_operand" "=d")) - (clobber (match_operand:DI 3 "offsettable_mem_operand" "=o"))] + [(set (match_operand:SI 0 "gpc_reg_operand" "=r,?r") + (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d,"))) + (clobber (match_operand:DI 2 "gpc_reg_operand" "=1,d")) + (clobber (match_operand:DI 3 "offsettable_mem_operand" "=o,o"))] "(TARGET_POWER2 || TARGET_POWERPC) && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" "#" @@ -7026,7 +7026,8 @@ emit_move_insn (operands[0], lowword); DONE; }" - [(set_attr "length" "16")]) + [(set_attr "length" "16") + (set_attr "type" "fp")]) (define_expand "fix_truncdi2" [(set (match_operand:DI 0 "gpc_reg_operand" "") @@ -7053,59 +7054,51 @@ { if (!) { - rtx tmp = gen_reg_rtx (DImode); - rtx stack = rs6000_allocate_stack_temp (SImode, false, true); - emit_insn (gen_fixuns_truncsi2_stfiwx (operands[0], operands[1], - tmp, stack)); + emit_insn (gen_fixuns_truncsi2_stfiwx (operands[0], operands[1])); DONE; } }") (define_insn_and_split "fixuns_truncsi2_stfiwx" - [(set (match_operand:SI 0 "gpc_reg_operand" "=r") - (unsigned_fix:SI - (match_operand:SFDF 1 "gpc_reg_operand" ""))) - (clobber (match_operand:DI 2 "gpc_reg_operand" "=d")) - (clobber (match_operand:SI 3 "indexed_or_indirect_operand" "=Z"))] + [(set (match_operand:SI 0 "general_operand" "=rm") + (unsigned_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d"))) + (clobber (match_scratch:DI 2 "=d"))] "TARGET_HARD_FLOAT && TARGET_FPRS && && TARGET_FCTIWUZ - && TARGET_STFIWX" + && TARGET_STFIWX && can_create_pseudo_p ()" "#" - "&& reload_completed" + "" [(pc)] - " { - emit_insn (gen_fctiwuz_ (operands[2], operands[1])); - if (TARGET_MFPGPR && TARGET_POWERPC64 && REG_P (operands[0]) - && INT_REGNO_P (REGNO (operands[0]))) + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = operands[2]; + + if (GET_CODE (tmp) == SCRATCH) + tmp = gen_reg_rtx (DImode); + + emit_insn (gen_fctiwuz_ (tmp, src)); + if (MEM_P (dest)) { - rtx reg = gen_lowpart (DImode, operands[0]); - emit_move_insn (reg, operands[2]); + dest = rs6000_address_for_fpconvert (dest); + emit_insn (gen_stfiwx (dest, tmp)); + DONE; + } + else if (TARGET_MFPGPR && TARGET_POWERPC64) + { + dest = gen_lowpart (DImode, dest); + emit_move_insn (dest, tmp); + DONE; } else { - emit_insn (gen_stfiwx (operands[3], operands[2])); - emit_move_insn (operands[0], operands[3]); + rtx stack = rs6000_allocate_stack_temp (SImode, false, true); + emit_insn (gen_stfiwx (stack, tmp)); + emit_move_insn (dest, stack); + DONE; } - DONE; -}" - [(set_attr "length" "12")]) - -(define_insn_and_split "*fixuns_truncsi2_mem" - [(set (match_operand:SI 0 "memory_operand" "=Z") - (unsigned_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" ""))) - (clobber (match_scratch:DI 2 "=d"))] - "TARGET_HARD_FLOAT && TARGET_FPRS && && TARGET_FCTIWUZ - && TARGET_STFIWX" - "#" - "&& reload_completed" - [(pc)] - " -{ - emit_insn (gen_fctiwuz_ (operands[2], operands[1])); - emit_insn (gen_stfiwx (operands[0], operands[2])); - DONE; -}" - [(set_attr "length" "8")]) +} + [(set_attr "length" "12") + (set_attr "type" "fp")]) (define_expand "fixuns_truncdi2" [(set (match_operand:DI 0 "register_operand" "") @@ -7155,6 +7148,76 @@ "friz %0,%1" [(set_attr "type" "fp")]) +;; Since FCTIWZ doesn't sign extend the upper bits, we have to do a store and a +;; load to properly sign extend the value, but at least doing a store, load +;; into a GPR to sign extend, a store from the GPR and a load back into the FPR +;; if we have 32-bit memory ops +(define_insn_and_split "*round322_fprs" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d") + (float:SFDF + (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d")))) + (clobber (match_scratch:DI 2 "=d")) + (clobber (match_scratch:DI 3 "=d"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && && TARGET_LFIWAX && TARGET_STFIWX && TARGET_FCFID + && can_create_pseudo_p ()" + "#" + "" + [(pc)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp1 = operands[2]; + rtx tmp2 = operands[3]; + rtx stack = rs6000_allocate_stack_temp (SImode, false, true); + + if (GET_CODE (tmp1) == SCRATCH) + tmp1 = gen_reg_rtx (DImode); + if (GET_CODE (tmp2) == SCRATCH) + tmp2 = gen_reg_rtx (DImode); + + emit_insn (gen_fctiwz_ (tmp1, src)); + emit_insn (gen_stfiwx (stack, tmp1)); + emit_insn (gen_lfiwax (tmp2, stack)); + emit_insn (gen_floatdi2 (dest, tmp2)); + DONE; +} + [(set_attr "type" "fpload") + (set_attr "length" "16")]) + +(define_insn_and_split "*roundu322_fprs" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d") + (unsigned_float:SFDF + (unsigned_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d")))) + (clobber (match_scratch:DI 2 "=d")) + (clobber (match_scratch:DI 3 "=d"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && TARGET_LFIWZX && TARGET_STFIWX && TARGET_FCFIDU + && can_create_pseudo_p ()" + "#" + "" + [(pc)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp1 = operands[2]; + rtx tmp2 = operands[3]; + rtx stack = rs6000_allocate_stack_temp (SImode, false, true); + + if (GET_CODE (tmp1) == SCRATCH) + tmp1 = gen_reg_rtx (DImode); + if (GET_CODE (tmp2) == SCRATCH) + tmp2 = gen_reg_rtx (DImode); + + emit_insn (gen_fctiwuz_ (tmp1, src)); + emit_insn (gen_stfiwx (stack, tmp1)); + emit_insn (gen_lfiwzx (tmp2, stack)); + emit_insn (gen_floatdi2 (dest, tmp2)); + DONE; +} + [(set_attr "type" "fpload") + (set_attr "length" "16")]) + ;; No VSX equivalent to fctid (define_insn "lrintdi2" [(set (match_operand:DI 0 "gpc_reg_operand" "=d") @@ -7251,7 +7314,14 @@ } else if (TARGET_FCFIDS && TARGET_LFIWAX) { - rs6000_expand_convert_si_to_sfdf (operands[0], operands[1], false); + emit_insn (gen_floatsisf2_lfiwax (operands[0], operands[1])); + DONE; + } + else if (TARGET_FCFID && TARGET_LFIWAX) + { + rtx dfreg = gen_reg_rtx (DFmode); + emit_insn (gen_floatsidf2_lfiwax (dfreg, operands[1])); + emit_insn (gen_truncdfsf2 (operands[0], dfreg)); DONE; } else @@ -7294,7 +7364,8 @@ [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (float:DF (match_dup 2)))] "" - [(set_attr "length" "8")]) + [(set_attr "length" "8") + (set_attr "type" "fpload")]) (define_expand "floatunsdidf2" [(set (match_operand:DF 0 "gpc_reg_operand" "") @@ -7321,7 +7392,8 @@ [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (unsigned_float:DF (match_dup 2)))] "" - [(set_attr "length" "8")]) + [(set_attr "length" "8") + (set_attr "type" "fpload")]) (define_expand "floatdisf2" [(set (match_operand:SF 0 "gpc_reg_operand" "") @@ -7373,6 +7445,7 @@ ;; This is not IEEE compliant if rounding mode is "round to nearest". ;; If the DI->DF conversion is inexact, then it's possible to suffer ;; from double rounding. +;; Instead of creating a new cpu type for two FP operations, just use fp (define_insn_and_split "floatdisf2_internal1" [(set (match_operand:SF 0 "gpc_reg_operand" "=f") (float:SF (match_operand:DI 1 "gpc_reg_operand" "d"))) @@ -7384,7 +7457,9 @@ (float:DF (match_dup 1))) (set (match_dup 0) (float_truncate:SF (match_dup 2)))] - "") + "" + [(set_attr "length" "8") + (set_attr "type" "fp")]) ;; Twiddles bits to avoid double rounding. ;; Bits that might be truncated when converting to DFmode are replaced @@ -7448,7 +7523,8 @@ emit_insn (gen_floatunsdisf2_fcfidus (operands[0], operands[2])); DONE; }" - [(set_attr "length" "8")]) + [(set_attr "length" "8") + (set_attr "type" "fpload")]) ;; Define the DImode operations that can be done in a small number ;; of instructions. The & constraints are to prevent the register diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3e451d0..19f58b1 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,14 @@ +2010-11-09 Michael Meissner + + * gcc.target/powerpc/ppc-fpconv-11.c: Use -mcpu=power5+, not + power5 to enable generation of FRIZ. + + * gcc.target/powerpc/ppc-round.c: New file, test (double)(int) + optimization. + + * gcc.target/powerpc/ppc-fpconv-2.c: Update # times lfiwax is + expected. + 2010-11-09 Jakub Jelinek PR target/43808 diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-11.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-11.c index f6d28cd..2eebbb4 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-11.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-11.c @@ -1,6 +1,6 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ -/* { dg-options "-O2 -mcpu=power5 -ffast-math" } */ +/* { dg-options "-O2 -mcpu=power5+ -ffast-math" } */ /* { dg-final { scan-assembler-not "xsrdpiz" } } */ /* { dg-final { scan-assembler "friz" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-2.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-2.c index f90a35b..e0a8342 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-2.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-2.c @@ -2,7 +2,7 @@ /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ /* { dg-require-effective-target powerpc_vsx_ok } */ /* { dg-options "-O2 -mcpu=power6 -ffast-math" } */ -/* { dg-final { scan-assembler-times "lfiwax" 1 } } */ +/* { dg-final { scan-assembler-times "lfiwax" 2 } } */ /* { dg-final { scan-assembler-not "lfiwzx" } } */ /* { dg-final { scan-assembler-times "fcfid " 10 } } */ /* { dg-final { scan-assembler-not "fcfids" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-round.c b/gcc/testsuite/gcc.target/powerpc/ppc-round.c new file mode 100644 index 0000000..20262aa --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/ppc-round.c @@ -0,0 +1,37 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mcpu=power7" } */ +/* { dg-final { scan-assembler-times "stfiwx" 4 } } */ +/* { dg-final { scan-assembler-times "lfiwax" 2 } } */ +/* { dg-final { scan-assembler-times "lfiwzx" 2 } } */ +/* { dg-final { scan-assembler-times "fctiwz" 2 } } */ +/* { dg-final { scan-assembler-times "xscvsxddp" 2 } } */ +/* { dg-final { scan-assembler-times "fcfids" 2 } } */ +/* { dg-final { scan-assembler-not "lwz" } } */ +/* { dg-final { scan-assembler-not "stw" } } */ + +/* Make sure we don't have loads/stores to the GPR unit. */ +double +round_double_int (double a) +{ + return (double)(int)a; +} + +float +round_float_int (float a) +{ + return (float)(int)a; +} + +double +round_double_uint (double a) +{ + return (double)(unsigned int)a; +} + +float +round_float_uint (float a) +{ + return (float)(unsigned int)a; +} -- 2.7.4