From 00fcb892ccf023d378e45b71b93b62c8e5399b60 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 29 Apr 2013 13:00:10 +0200 Subject: [PATCH] re PR target/54349 (_mm_cvtsi128_si64 unnecessary stores value at stack) PR target/54349 * config/i386/i386.h (enum ix86_tune_indices) : New, split from X86_TUNE_INTER_UNIT_MOVES. : Remove. (TARGET_INTER_UNIT_MOVES_TO_VEC): New define. (TARGET_INTER_UNIT_MOVES_FROM_VEC): Ditto. (TARGET_INTER_UNIT_MOVES): Remove. * config/i386/i386.c (initial_ix86_tune_features): Update. Disable X86_TUNE_INTER_UNIT_MOVES_FROM_VEC for m_ATHLON_K8 only. (ix86_expand_convert_uns_didf_sse): Use TARGET_INTER_UNIT_MOVES_TO_VEC instead of TARGET_INTER_UNIT_MOVES. (ix86_expand_vector_init_one_nonzero): Ditto. (ix86_expand_vector_init_interleave): Ditto. (inline_secondary_memory_needed): Return true for moves from SSE class registers for !TARGET_INTER_UNIT_MOVES_FROM_VEC targets and for moves to SSE class registers for !TARGET_INTER_UNIT_MOVES_TO_VEC targets. * config/i386/constraints.md (Yi, Ym): Depend on TARGET_INTER_UNIT_MOVES_TO_VEC. (Yj, Yn): New constraints. * config/i386/i386.md (*movdi_internal): Change constraints of operand 1 from Yi to Yj and from Ym to Yn. (*movsi_internal): Ditto. (*movdf_internal): Ditto. (*movsf_internal): Ditto. (*float2_1): Use TARGET_INTER_UNIT_MOVES_TO_VEC instead of TARGET_INTER_UNIT_MOVES. (*float2_1 splitters): Ditto. (floatdi2_i387_with_xmm): Ditto. (floatdi2_i387_with_xmm splitters): Ditto. * config/i386/sse.md (movdi_to_sse): Ditto. (sse2_stored): Change constraint of operand 1 from Yi to Yj. Use TARGET_INTER_UNIT_MOVES_FROM_VEC instead of TARGET_INTER_UNIT_MOVES. (sse_storeq_rex64): Change constraint of operand 1 from Yi to Yj. (sse_storeq_rex64 splitter): Use TARGET_INTER_UNIT_MOVES_FROM_VEC instead of TARGET_INTER_UNIT_MOVES. * config/i386/mmx.md (*mov_internal): Change constraint of operand 1 from Yi to Yj and from Ym to Yn. From-SVN: r198401 --- gcc/ChangeLog | 42 ++++++++++++++++++++++++++++++++++++++++++ gcc/config/i386/constraints.md | 22 ++++++++++++++++------ gcc/config/i386/i386.c | 17 ++++++++++------- gcc/config/i386/i386.h | 10 +++++++--- gcc/config/i386/i386.md | 22 +++++++++++----------- gcc/config/i386/mmx.md | 2 +- gcc/config/i386/sse.md | 10 +++++----- 7 files changed, 92 insertions(+), 33 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5f0e495..2ee4a0c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,45 @@ +2013-04-29 Uros Bizjak + + PR target/54349 + * config/i386/i386.h (enum ix86_tune_indices) + : + New, split from X86_TUNE_INTER_UNIT_MOVES. + : Remove. + (TARGET_INTER_UNIT_MOVES_TO_VEC): New define. + (TARGET_INTER_UNIT_MOVES_FROM_VEC): Ditto. + (TARGET_INTER_UNIT_MOVES): Remove. + * config/i386/i386.c (initial_ix86_tune_features): Update. + Disable X86_TUNE_INTER_UNIT_MOVES_FROM_VEC for m_ATHLON_K8 only. + (ix86_expand_convert_uns_didf_sse): Use + TARGET_INTER_UNIT_MOVES_TO_VEC instead of TARGET_INTER_UNIT_MOVES. + (ix86_expand_vector_init_one_nonzero): Ditto. + (ix86_expand_vector_init_interleave): Ditto. + (inline_secondary_memory_needed): Return true for moves from SSE class + registers for !TARGET_INTER_UNIT_MOVES_FROM_VEC targets and for moves + to SSE class registers for !TARGET_INTER_UNIT_MOVES_TO_VEC targets. + * config/i386/constraints.md (Yi, Ym): Depend on + TARGET_INTER_UNIT_MOVES_TO_VEC. + (Yj, Yn): New constraints. + * config/i386/i386.md (*movdi_internal): Change constraints of + operand 1 from Yi to Yj and from Ym to Yn. + (*movsi_internal): Ditto. + (*movdf_internal): Ditto. + (*movsf_internal): Ditto. + (*float2_1): Use + TARGET_INTER_UNIT_MOVES_TO_VEC instead of TARGET_INTER_UNIT_MOVES. + (*float2_1 splitters): Ditto. + (floatdi2_i387_with_xmm): Ditto. + (floatdi2_i387_with_xmm splitters): Ditto. + * config/i386/sse.md (movdi_to_sse): Ditto. + (sse2_stored): Change constraint of operand 1 from Yi to Yj. + Use TARGET_INTER_UNIT_MOVES_FROM_VEC instead of + TARGET_INTER_UNIT_MOVES. + (sse_storeq_rex64): Change constraint of operand 1 from Yi to Yj. + (sse_storeq_rex64 splitter): Use TARGET_INTER_UNIT_MOVES_FROM_VEC + instead of TARGET_INTER_UNIT_MOVES. + * config/i386/mmx.md (*mov_internal): Change constraint of + operand 1 from Yi to Yj and from Ym to Yn. + 2013-04-29 James Greenhalgh * config/aarch64/aarch64-simd-builtins.def (vec_unpacks_hi_): New. diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index ec7c856..6cb53b8 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -87,8 +87,10 @@ ;; We use the Y prefix to denote any number of conditional register sets: ;; z First SSE register. -;; i SSE2 inter-unit moves enabled -;; m MMX inter-unit moves enabled +;; i SSE2 inter-unit moves to SSE register enabled +;; j SSE2 inter-unit moves from SSE register enabled +;; m MMX inter-unit moves to MMX register enabled +;; n MMX inter-unit moves from MMX register enabled ;; a Integer register when zero extensions with AND are disabled ;; p Integer register when TARGET_PARTIAL_REG_STALL is disabled ;; d Integer register when integer DFmode moves are enabled @@ -99,12 +101,20 @@ "First SSE register (@code{%xmm0}).") (define_register_constraint "Yi" - "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES ? SSE_REGS : NO_REGS" - "@internal Any SSE register, when SSE2 and inter-unit moves are enabled.") + "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC ? SSE_REGS : NO_REGS" + "@internal Any SSE register, when SSE2 and inter-unit moves to vector registers are enabled.") + +(define_register_constraint "Yj" + "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC ? SSE_REGS : NO_REGS" + "@internal Any SSE register, when SSE2 and inter-unit moves from vector registers are enabled.") (define_register_constraint "Ym" - "TARGET_MMX && TARGET_INTER_UNIT_MOVES ? MMX_REGS : NO_REGS" - "@internal Any MMX register, when inter-unit moves are enabled.") + "TARGET_MMX && TARGET_INTER_UNIT_MOVES_TO_VEC ? MMX_REGS : NO_REGS" + "@internal Any MMX register, when inter-unit moves to vector registers are enabled.") + +(define_register_constraint "Yn" + "TARGET_MMX && TARGET_INTER_UNIT_MOVES_FROM_VEC ? MMX_REGS : NO_REGS" + "@internal Any MMX register, when inter-unit moves from vector registers are enabled.") (define_register_constraint "Yp" "TARGET_PARTIAL_REG_STALL ? NO_REGS : GENERAL_REGS" diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 06aeecf..7a58a76 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1931,9 +1931,12 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_USE_FFREEP */ m_AMD_MULTIPLE, - /* X86_TUNE_INTER_UNIT_MOVES */ + /* X86_TUNE_INTER_UNIT_MOVES_TO_VEC */ ~(m_AMD_MULTIPLE | m_GENERIC), + /* X86_TUNE_INTER_UNIT_MOVES_FROM_VEC */ + ~m_ATHLON_K8, + /* X86_TUNE_INTER_UNIT_CONVERSIONS */ ~(m_AMDFAM10 | m_BDVER ), @@ -17867,7 +17870,7 @@ ix86_expand_convert_uns_didf_sse (rtx target, rtx input) rtx x; int_xmm = gen_reg_rtx (V4SImode); - if (TARGET_INTER_UNIT_MOVES) + if (TARGET_INTER_UNIT_MOVES_TO_VEC) emit_insn (gen_movdi_to_sse (int_xmm, input)); else if (TARGET_SSE_SPLIT_REGS) { @@ -33668,7 +33671,8 @@ inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2, /* If the target says that inter-unit moves are more expensive than moving through memory, then don't generate them. */ - if (!TARGET_INTER_UNIT_MOVES) + if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC) + || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC)) return true; /* Between SSE and general, we have moves no larger than word size. */ @@ -35891,9 +35895,8 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode, /* For SSE4.1, we normally use vector set. But if the second element is zero and inter-unit moves are OK, we use movq instead. */ - use_vector_set = (TARGET_64BIT - && TARGET_SSE4_1 - && !(TARGET_INTER_UNIT_MOVES + use_vector_set = (TARGET_64BIT && TARGET_SSE4_1 + && !(TARGET_INTER_UNIT_MOVES_TO_VEC && one_var == 0)); break; case V16QImode: @@ -36428,7 +36431,7 @@ half: /* Don't use ix86_expand_vector_init_interleave if we can't move from GPR to SSE register directly. */ - if (!TARGET_INTER_UNIT_MOVES) + if (!TARGET_INTER_UNIT_MOVES_TO_VEC) break; n = GET_MODE_NUNITS (mode); diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index d0f5f6f..6601567 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -304,7 +304,8 @@ enum ix86_tune_indices { X86_TUNE_EPILOGUE_USING_MOVE, X86_TUNE_SHIFT1, X86_TUNE_USE_FFREEP, - X86_TUNE_INTER_UNIT_MOVES, + X86_TUNE_INTER_UNIT_MOVES_TO_VEC, + X86_TUNE_INTER_UNIT_MOVES_FROM_VEC, X86_TUNE_INTER_UNIT_CONVERSIONS, X86_TUNE_FOUR_JUMP_LIMIT, X86_TUNE_SCHEDULE, @@ -395,8 +396,11 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_EPILOGUE_USING_MOVE] #define TARGET_SHIFT1 ix86_tune_features[X86_TUNE_SHIFT1] #define TARGET_USE_FFREEP ix86_tune_features[X86_TUNE_USE_FFREEP] -#define TARGET_INTER_UNIT_MOVES ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES] -#define TARGET_INTER_UNIT_CONVERSIONS\ +#define TARGET_INTER_UNIT_MOVES_TO_VEC \ + ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES_TO_VEC] +#define TARGET_INTER_UNIT_MOVES_FROM_VEC \ + ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES_FROM_VEC] +#define TARGET_INTER_UNIT_CONVERSIONS \ ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS] #define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT] #define TARGET_SCHEDULE ix86_tune_features[X86_TUNE_SCHEDULE] diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 759c867..f6ffc01 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1846,7 +1846,7 @@ [(set (match_operand:DI 0 "nonimmediate_operand" "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*x,*x,*x,m ,?r ,?*Yi,?*Ym,?*Yi") (match_operand:DI 1 "general_operand" - "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Ym,r ,C ,*x,m ,*x,*Yi,r ,*Yi ,*Ym"))] + "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*x,m ,*x,*Yj,r ,*Yj ,*Yn"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -1994,7 +1994,7 @@ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m ,*y,*y,?rm,?*y,*x,*x,*x,m ,?r ,?*Yi") (match_operand:SI 1 "general_operand" - "g ,re,C ,*y,*y ,rm ,C ,*x,m ,*x,*Yi,r"))] + "g ,re,C ,*y,*y ,rm ,C ,*x,m ,*x,*Yj,r"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -2662,7 +2662,7 @@ [(set (match_operand:DF 0 "nonimmediate_operand" "=Yf*f,m ,Yf*f,?Yd*r ,!o ,?r,?m,?r,?r,x,x,x,m,*x,*x,*x,m ,r ,Yi") (match_operand:DF 1 "general_operand" - "Yf*fm,Yf*f,G ,Yd*roF,Yd*rF,rm,rC,C ,F ,C,x,m,x,C ,*x,m ,*x,Yi,r"))] + "Yf*fm,Yf*f,G ,Yd*roF,Yd*rF,rm,rC,C ,F ,C,x,m,x,C ,*x,m ,*x,Yj,r"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (!can_create_pseudo_p () || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) @@ -2831,7 +2831,7 @@ [(set (match_operand:SF 0 "nonimmediate_operand" "=Yf*f,m ,Yf*f,?r ,?m,x,x,x,m,?r,?Yi,!*y,!*y,!m,!r ,!*Ym") (match_operand:SF 1 "general_operand" - "Yf*fm,Yf*f,G ,rmF,rF,C,x,m,x,Yi,r ,*y ,m ,*y,*Ym,r"))] + "Yf*fm,Yf*f,G ,rmF,rF,C,x,m,x,Yj,r ,*y ,m ,*y,*Yn,r"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (!can_create_pseudo_p () || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) @@ -4424,7 +4424,7 @@ /* Avoid store forwarding (partial memory) stall penalty by passing DImode value through XMM registers. */ if (mode == DImode && !TARGET_64BIT - && TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES + && TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC && optimize_function_for_speed_p (cfun)) { emit_insn (gen_floatdi2_i387_with_xmm (operands[0], @@ -4601,7 +4601,7 @@ if (GET_CODE (op1) == SUBREG) op1 = SUBREG_REG (op1); - if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES) + if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES_TO_VEC) { operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); emit_insn (gen_sse2_loadld (operands[4], @@ -4671,7 +4671,7 @@ if (GENERAL_REG_P (op1)) { operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); - if (TARGET_INTER_UNIT_MOVES) + if (TARGET_INTER_UNIT_MOVES_TO_VEC) emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode), operands[1])); else @@ -4870,7 +4870,7 @@ (clobber (match_scratch:V4SI 4 "=X,x")) (clobber (match_operand:DI 2 "memory_operand" "=X,m"))] "TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode) - && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES + && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC && !TARGET_64BIT && optimize_function_for_speed_p (cfun)" "#" [(set_attr "type" "multi") @@ -4885,7 +4885,7 @@ (clobber (match_scratch:V4SI 4)) (clobber (match_operand:DI 2 "memory_operand"))] "TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode) - && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES + && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC && !TARGET_64BIT && optimize_function_for_speed_p (cfun) && reload_completed" [(set (match_dup 2) (match_dup 3)) @@ -4910,14 +4910,14 @@ (clobber (match_scratch:V4SI 4)) (clobber (match_operand:DI 2 "memory_operand"))] "TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode) - && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES + && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC && !TARGET_64BIT && optimize_function_for_speed_p (cfun) && reload_completed" [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]) ;; Avoid store forwarding (partial memory) stall penalty by extending ;; SImode value to DImode through XMM register instead of pushing two -;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES +;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES_TO_VEC ;; targets benefit from this optimization. Also note that fild ;; loads from memory only. diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index fb75d49..4911cb2 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -80,7 +80,7 @@ [(set (match_operand:MMXMODE 0 "nonimmediate_operand" "=r ,o ,r,r ,m ,?!y,!y,?!y,m ,r ,?!Ym,x,x,x,m,*x,*x,*x,m ,r ,Yi,!Ym,*Yi") (match_operand:MMXMODE 1 "vector_move_operand" - "rCo,rC,C,rm,rC,C ,!y,m ,?!y,?!Ym,r ,C,x,m,x,C ,*x,m ,*x,Yi,r ,*Yi,!Ym"))] + "rCo,rC,C,rm,rC,C ,!y,m ,?!y,?!Yn,r ,C,x,m,x,C ,*x,m ,*x,Yj,r ,*Yj,!Yn"))] "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index f630303e..354d4c9 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -523,7 +523,7 @@ [(set (match_operand:V4SI 0 "register_operand" "=?x,x") (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0)) (clobber (match_scratch:V4SI 2 "=&x,X"))])] - "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES" + "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC" "#" "&& reload_completed" [(const_int 0)] @@ -7360,12 +7360,12 @@ (define_insn_and_split "sse2_stored" [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r") (vec_select:SI - (match_operand:V4SI 1 "register_operand" "x,Yi") + (match_operand:V4SI 1 "register_operand" "x,Yj") (parallel [(const_int 0)])))] "TARGET_SSE" "#" "&& reload_completed - && (TARGET_INTER_UNIT_MOVES + && (TARGET_INTER_UNIT_MOVES_FROM_VEC || MEM_P (operands [0]) || !GENERAL_REGNO_P (true_regnum (operands [0])))" [(set (match_dup 0) (match_dup 1))] @@ -7397,7 +7397,7 @@ (define_insn "*sse2_storeq_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r") (vec_select:DI - (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o") + (match_operand:V2DI 1 "nonimmediate_operand" "x,Yj,o") (parallel [(const_int 0)])))] "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ @@ -7422,7 +7422,7 @@ (parallel [(const_int 0)])))] "TARGET_SSE && reload_completed - && (TARGET_INTER_UNIT_MOVES + && (TARGET_INTER_UNIT_MOVES_FROM_VEC || MEM_P (operands [0]) || !GENERAL_REGNO_P (true_regnum (operands [0])))" [(set (match_dup 0) (match_dup 1))] -- 2.7.4