From b17d542637d69e2e0ea09434a7892706a335dc28 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 30 May 2011 22:55:30 +0200 Subject: [PATCH] i386.md (*movxf_internal): Penalize FYx*r->o alternative to prevent partial memory stalls. * config/i386/i386.md (*movxf_internal): Penalize FYx*r->o alternative to prevent partial memory stalls. Do not move CONST_DOUBLEs directly to memory for !TARGET_MEMORY_MISMATCH_STALL. (*movdf_internal_rex64): Do not penalize F->r alternative. (*movdf_internal): Penalize FYd*r->o alternative to prevent partial memory stalls. Generate SSE and x87 CONST_DOUBLE immediates only when optimizing function for size. Do not move CONST_DOUBLEs directly to memory for !TARGET_MEMORY_MISMATCH_STALL. (FP move splitters): Merge {TF,XF,DF}mode move splitters. Do not handle SUBREGs. Do not check for MEM_P operands in the insn condition, check for ANY_FP_REGNO_P instead. * config/i386/constraints.md (Yd): Enable GENERAL_REGS for TARGET_64BIT and for TARGET_INTEGER_DFMODE_MOVES when optimizing function for speed. * config/i386/i386.c (ix86_option_override_internal): Do not set TARGET_INTEGER_DFMODE_MOVES here. From-SVN: r174453 --- gcc/ChangeLog | 24 +++++++- gcc/config/i386/constraints.md | 4 +- gcc/config/i386/i386.c | 7 --- gcc/config/i386/i386.md | 130 +++++++++++++---------------------------- 4 files changed, 66 insertions(+), 99 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 45212c0..20a3938 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2011-05-30 Uros Bizjak + + * config/i386/i386.md (*movxf_internal): Penalize FYx*r->o alternative + to prevent partial memory stalls. Do not move CONST_DOUBLEs directly + to memory for !TARGET_MEMORY_MISMATCH_STALL. + (*movdf_internal_rex64): Do not penalize F->r alternative. + (*movdf_internal): Penalize FYd*r->o alternative to prevent partial + memory stalls. Generate SSE and x87 CONST_DOUBLE immediates only + when optimizing function for size. Do not move CONST_DOUBLEs + directly to memory for !TARGET_MEMORY_MISMATCH_STALL. + (FP move splitters): Merge {TF,XF,DF}mode move splitters. Do not + handle SUBREGs. Do not check for MEM_P operands in the insn condition, + check for ANY_FP_REGNO_P instead. + * config/i386/constraints.md (Yd): Enable GENERAL_REGS for + TARGET_64BIT and for TARGET_INTEGER_DFMODE_MOVES when optimizing + function for speed. + * config/i386/i386.c (ix86_option_override_internal): Do not + set TARGET_INTEGER_DFMODE_MOVES here. + 2011-05-30 H.J. Lu PR target/49168 @@ -18,7 +37,7 @@ (gimple_expand_builtin_pow): Reorder args for build_and_insert_call; use build_and_insert_binop; add more optimizations for fractional exponents. - + 2011-05-30 Nathan Froyd PR bootstrap/49190 @@ -86,8 +105,7 @@ PR tree-optimization/49199 * tree-vect-loop.c (vect_is_slp_reduction): Check that the - non-reduction operands are either defined in the loop or - by induction. + non-reduction operands are either defined in the loop or by induction. 2011-05-29 Xinliang David Li diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index 458364b..3197ba2 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -108,7 +108,9 @@ "@internal Any MMX register, when inter-unit moves are enabled.") (define_register_constraint "Yd" - "TARGET_INTEGER_DFMODE_MOVES ? GENERAL_REGS : NO_REGS" + "(TARGET_64BIT + || (TARGET_INTEGER_DFMODE_MOVES && optimize_function_for_speed_p (cfun))) + ? GENERAL_REGS : NO_REGS" "@internal Any integer register when integer DFmode moves are enabled.") (define_register_constraint "Yx" diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ae81d62..85d145a 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -3947,13 +3947,6 @@ ix86_option_override_internal (bool main_args_p) if (!TARGET_80387) target_flags |= MASK_NO_FANCY_MATH_387; - /* On 32bit targets, avoid moving DFmode values in - integer registers when optimizing for size. */ - if (TARGET_64BIT) - target_flags |= TARGET_INTEGER_DFMODE_MOVES; - else if (optimize_size) - target_flags &= ~TARGET_INTEGER_DFMODE_MOVES; - /* Turn on MMX builtins for -msse. */ if (TARGET_SSE) { diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6d3ae80..cf1e883 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2833,8 +2833,8 @@ "ix86_expand_move (mode, operands); DONE;") (define_insn "*movtf_internal" - [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o") - (match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))] + [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r ,?o") + (match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))] "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { @@ -2890,24 +2890,19 @@ (const_string "TI"))] (const_string "DI")))]) -(define_split - [(set (match_operand:TF 0 "nonimmediate_operand" "") - (match_operand:TF 1 "general_operand" ""))] - "reload_completed - && !(SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]))" - [(const_int 0)] - "ix86_split_long_move (operands); DONE;") - +;; Possible store forwarding (partial memory) stall in alternative 4. (define_insn "*movxf_internal" - [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,Yx*r ,o") + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,Yx*r ,!o") (match_operand:XF 1 "general_operand" "fm,f,G,Yx*roF,FYx*r"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (!can_create_pseudo_p () || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || GET_CODE (operands[1]) != CONST_DOUBLE || (optimize_function_for_size_p (cfun) - && standard_80387_constant_p (operands[1]) > 0) - || memory_operand (operands[0], XFmode))" + && standard_80387_constant_p (operands[1]) > 0 + && !memory_operand (operands[0], XFmode)) + || (!TARGET_MEMORY_MISMATCH_STALL + && memory_operand (operands[0], XFmode)))" { switch (which_alternative) { @@ -2918,8 +2913,10 @@ case 2: return standard_80387_constant_opcode (operands[1]); - case 3: case 4: + case 3: + case 4: return "#"; + default: gcc_unreachable (); } @@ -2927,25 +2924,11 @@ [(set_attr "type" "fmov,fmov,fmov,multi,multi") (set_attr "mode" "XF,XF,XF,SI,SI")]) -(define_split - [(set (match_operand:XF 0 "nonimmediate_operand" "") - (match_operand:XF 1 "general_operand" ""))] - "reload_completed - && !(MEM_P (operands[0]) && MEM_P (operands[1])) - && ! (FP_REG_P (operands[0]) || - (GET_CODE (operands[0]) == SUBREG - && FP_REG_P (SUBREG_REG (operands[0])))) - && ! (FP_REG_P (operands[1]) || - (GET_CODE (operands[1]) == SUBREG - && FP_REG_P (SUBREG_REG (operands[1]))))" - [(const_int 0)] - "ix86_split_long_move (operands); DONE;") - (define_insn "*movdf_internal_rex64" [(set (match_operand:DF 0 "nonimmediate_operand" - "=f,m,f,r ,m,!r,!m,Y2*x,Y2*x,Y2*x,m ,Yi,r ") + "=f,m,f,r ,m,r,!m,Y2*x,Y2*x,Y2*x,m ,Yi,r ") (match_operand:DF 1 "general_operand" - "fm,f,G,rm,r,F ,F ,C ,Y2*x,m ,Y2*x,r ,Yi"))] + "fm,f,G,rm,r,F,F ,C ,Y2*x,m ,Y2*x,r ,Yi"))] "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1])) && (!can_create_pseudo_p () || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) @@ -3093,21 +3076,20 @@ ;; Possible store forwarding (partial memory) stall in alternative 4. (define_insn "*movdf_internal" [(set (match_operand:DF 0 "nonimmediate_operand" - "=f,m,f,Yd*r ,o ,Y2*x,Y2*x,Y2*x,m ") + "=f,m,f,Yd*r ,!o ,Y2*x,Y2*x,Y2*x,m ") (match_operand:DF 1 "general_operand" "fm,f,G,Yd*roF,FYd*r,C ,Y2*x,m ,Y2*x"))] "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1])) && (!can_create_pseudo_p () || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || GET_CODE (operands[1]) != CONST_DOUBLE - || (!TARGET_INTEGER_DFMODE_MOVES + || (optimize_function_for_size_p (cfun) && ((!(TARGET_SSE2 && TARGET_SSE_MATH) && standard_80387_constant_p (operands[1]) > 0) || (TARGET_SSE2 && TARGET_SSE_MATH && standard_sse_constant_p (operands[1]))) && !memory_operand (operands[0], DFmode)) - || ((TARGET_INTEGER_DFMODE_MOVES - || !TARGET_MEMORY_MISMATCH_STALL) + || (!TARGET_MEMORY_MISMATCH_STALL && memory_operand (operands[0], DFmode)))" { switch (which_alternative) @@ -3228,20 +3210,6 @@ ] (const_string "DF")))]) -(define_split - [(set (match_operand:DF 0 "nonimmediate_operand" "") - (match_operand:DF 1 "general_operand" ""))] - "reload_completed - && !(MEM_P (operands[0]) && MEM_P (operands[1])) - && ! (ANY_FP_REG_P (operands[0]) || - (GET_CODE (operands[0]) == SUBREG - && ANY_FP_REG_P (SUBREG_REG (operands[0])))) - && ! (ANY_FP_REG_P (operands[1]) || - (GET_CODE (operands[1]) == SUBREG - && ANY_FP_REG_P (SUBREG_REG (operands[1]))))" - [(const_int 0)] - "ix86_split_long_move (operands); DONE;") - (define_insn "*movsf_internal" [(set (match_operand:SF 0 "nonimmediate_operand" "=f,m,f,r ,m ,x,x,x ,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r") @@ -3344,31 +3312,19 @@ [(set (match_operand 0 "register_operand" "") (match_operand 1 "memory_operand" ""))] "reload_completed - && MEM_P (operands[1]) && (GET_MODE (operands[0]) == TFmode || GET_MODE (operands[0]) == XFmode || GET_MODE (operands[0]) == DFmode || GET_MODE (operands[0]) == SFmode) + && ANY_FP_REGNO_P (REGNO (operands[0])) && (operands[2] = find_constant_src (insn))" [(set (match_dup 0) (match_dup 2))] { rtx c = operands[2]; - rtx r = operands[0]; - - if (GET_CODE (r) == SUBREG) - r = SUBREG_REG (r); + int r = REGNO (operands[0]); - if (SSE_REG_P (r)) - { - if (!standard_sse_constant_p (c)) - FAIL; - } - else if (FP_REG_P (r)) - { - if (standard_80387_constant_p (c) < 1) - FAIL; - } - else if (MMX_REG_P (r)) + if ((SSE_REGNO_P (r) && !standard_sse_constant_p (c)) + || (FP_REGNO_P (r) && standard_80387_constant_p (c) < 1)) FAIL; }) @@ -3376,31 +3332,18 @@ [(set (match_operand 0 "register_operand" "") (float_extend (match_operand 1 "memory_operand" "")))] "reload_completed - && MEM_P (operands[1]) && (GET_MODE (operands[0]) == TFmode || GET_MODE (operands[0]) == XFmode - || GET_MODE (operands[0]) == DFmode - || GET_MODE (operands[0]) == SFmode) + || GET_MODE (operands[0]) == DFmode) + && ANY_FP_REGNO_P (REGNO (operands[0])) && (operands[2] = find_constant_src (insn))" [(set (match_dup 0) (match_dup 2))] { rtx c = operands[2]; - rtx r = operands[0]; - - if (GET_CODE (r) == SUBREG) - r = SUBREG_REG (r); + int r = REGNO (operands[0]); - if (SSE_REG_P (r)) - { - if (!standard_sse_constant_p (c)) - FAIL; - } - else if (FP_REG_P (r)) - { - if (standard_80387_constant_p (c) < 1) - FAIL; - } - else if (MMX_REG_P (r)) + if ((SSE_REGNO_P (r) && !standard_sse_constant_p (c)) + || (FP_REGNO_P (r) && standard_80387_constant_p (c) < 1)) FAIL; }) @@ -3424,6 +3367,17 @@ operands[1] = CONST1_RTX (mode); }) +(define_split + [(set (match_operand 0 "nonimmediate_operand" "") + (match_operand 1 "general_operand" ""))] + "reload_completed + && (GET_MODE (operands[0]) == TFmode + || GET_MODE (operands[0]) == XFmode + || GET_MODE (operands[0]) == DFmode) + && !(ANY_FP_REG_P (operands[0]) || ANY_FP_REG_P (operands[1]))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + (define_insn "swapxf" [(set (match_operand:XF 0 "register_operand" "+f") (match_operand:XF 1 "register_operand" "+f")) @@ -16663,7 +16617,7 @@ [(set (match_operand:SWI 0 "push_operand" "") (match_operand:SWI 1 "memory_operand" "")) (match_scratch:SWI 2 "")] - "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY + "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ()) && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))]) @@ -16674,7 +16628,7 @@ [(set (match_operand:SF 0 "push_operand" "") (match_operand:SF 1 "memory_operand" "")) (match_scratch:SF 2 "r")] - "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY + "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ()) && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))]) @@ -16826,7 +16780,7 @@ [(match_dup 0) (match_operand:SI 1 "memory_operand" "")])) (clobber (reg:CC FLAGS_REG))])] - "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY" + "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())" [(set (match_dup 2) (match_dup 1)) (parallel [(set (match_dup 0) (match_op_dup 3 [(match_dup 0) (match_dup 2)])) @@ -16839,7 +16793,7 @@ [(match_operand:SI 1 "memory_operand" "") (match_dup 0)])) (clobber (reg:CC FLAGS_REG))])] - "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY" + "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())" [(set (match_dup 2) (match_dup 1)) (parallel [(set (match_dup 0) (match_op_dup 3 [(match_dup 2) (match_dup 0)])) @@ -16892,7 +16846,7 @@ [(match_dup 0) (match_operand:SI 1 "nonmemory_operand" "")])) (clobber (reg:CC FLAGS_REG))])] - "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE + "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) /* Do not split stack checking probes. */ && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx" [(set (match_dup 2) (match_dup 0)) @@ -16908,7 +16862,7 @@ [(match_operand:SI 1 "nonmemory_operand" "") (match_dup 0)])) (clobber (reg:CC FLAGS_REG))])] - "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE + "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) /* Do not split stack checking probes. */ && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx" [(set (match_dup 2) (match_dup 0)) -- 2.7.4