re PR target/57954 (AVX missing vxorps (zeroing) before vcvtsi2s %edx, slow down...
authorUros Bizjak <uros@gcc.gnu.org>
Mon, 29 Jul 2013 11:17:51 +0000 (13:17 +0200)
committerUros Bizjak <uros@gcc.gnu.org>
Mon, 29 Jul 2013 11:17:51 +0000 (13:17 +0200)
2013-07-29  Uros Bizjak  <ubizjak@gmail.com>

* config/i386/i386.md (float post-reload splitters): Do not check
for subregs of SSE registers.

2013-07-29  Uros Bizjak  <ubizjak@gmail.com>
    H.J. Lu  <hongjiu.lu@intel.com>

PR target/57954
PR target/57988
* config/i386/i386.md (post-reload splitter
to avoid partial SSE reg dependency stalls): New pattern.

From-SVN: r201308

gcc/ChangeLog
gcc/config/i386/i386.md

index 26ec34a..e30f63b 100644 (file)
@@ -1,3 +1,16 @@
+2013-07-29  Uros Bizjak  <ubizjak@gmail.com>
+
+       * config/i386/i386.md (float post-reload splitters): Do not check
+       for subregs of SSE registers.
+
+2013-07-29  Uros Bizjak  <ubizjak@gmail.com>
+           H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR target/57954
+       PR target/57988
+       * config/i386/i386.md (post-reload splitter
+       to avoid partial SSE reg dependency stalls): New pattern.
+
 2013-07-29  Dominik Vogt  <vogt@linux.vnet.ibm.com>
 
        * config/s390/s390.md ("movcc"): Swap load and store instructions.
        * config/aarch64/iterators.md: Add attributes rtn and vas.
 
 2013-07-26  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-            Richard Earnshaw  <richard.earnshaw@arm.com>
+           Richard Earnshaw  <richard.earnshaw@arm.com>
 
        * combine.c (simplify_comparison): Re-canonicalize operands
-       where appropriate.      
+       where appropriate.
        * config/arm/arm.md (movcond_addsi): New splitter.
 
 2013-07-25  Sterling Augustine  <saugustine@google.com>
index c67ed31..84515ae 100644 (file)
    (clobber (match_operand:SWI48 2 "memory_operand"))]
   "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
    && TARGET_INTER_UNIT_CONVERSIONS
-   && reload_completed
-   && (SSE_REG_P (operands[0])
-       || (GET_CODE (operands[0]) == SUBREG
-          && SSE_REG_P (SUBREG_REG (operands[0]))))"
+   && reload_completed && SSE_REG_P (operands[0])"
   [(set (match_dup 0) (float:MODEF (match_dup 1)))])
 
 (define_split
    (clobber (match_operand:SWI48 2 "memory_operand"))]
   "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
    && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
-   && reload_completed
-   && (SSE_REG_P (operands[0])
-       || (GET_CODE (operands[0]) == SUBREG
-          && SSE_REG_P (SUBREG_REG (operands[0]))))"
+   && reload_completed && SSE_REG_P (operands[0])"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (float:MODEF (match_dup 2)))])
 
    (clobber (match_operand:SI 2 "memory_operand"))]
   "TARGET_SSE2 && TARGET_SSE_MATH
    && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
-   && reload_completed
-   && (SSE_REG_P (operands[0])
-       || (GET_CODE (operands[0]) == SUBREG
-          && SSE_REG_P (SUBREG_REG (operands[0]))))"
+   && reload_completed && SSE_REG_P (operands[0])"
   [(const_int 0)]
 {
   rtx op1 = operands[1];
    (clobber (match_operand:SI 2 "memory_operand"))]
   "TARGET_SSE2 && TARGET_SSE_MATH
    && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
-   && reload_completed
-   && (SSE_REG_P (operands[0])
-       || (GET_CODE (operands[0]) == SUBREG
-          && SSE_REG_P (SUBREG_REG (operands[0]))))"
+   && reload_completed && SSE_REG_P (operands[0])"
   [(const_int 0)]
 {
   operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
        (float:MODEF (match_operand:SI 1 "register_operand")))]
   "TARGET_SSE2 && TARGET_SSE_MATH
    && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
-   && reload_completed
-   && (SSE_REG_P (operands[0])
-       || (GET_CODE (operands[0]) == SUBREG
-          && SSE_REG_P (SUBREG_REG (operands[0]))))"
+   && reload_completed && SSE_REG_P (operands[0])"
   [(const_int 0)]
 {
   rtx op1 = operands[1];
        (float:MODEF (match_operand:SI 1 "memory_operand")))]
   "TARGET_SSE2 && TARGET_SSE_MATH
    && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
-   && reload_completed
-   && (SSE_REG_P (operands[0])
-       || (GET_CODE (operands[0]) == SUBREG
-          && SSE_REG_P (SUBREG_REG (operands[0]))))"
+   && reload_completed && SSE_REG_P (operands[0])"
   [(const_int 0)]
 {
   operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
    (clobber (match_operand:SWI48 2 "memory_operand"))]
   "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
    && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
-   && reload_completed
-   && (SSE_REG_P (operands[0])
-       || (GET_CODE (operands[0]) == SUBREG
-          && SSE_REG_P (SUBREG_REG (operands[0]))))"
+   && reload_completed && SSE_REG_P (operands[0])"
   [(set (match_dup 0) (float:MODEF (match_dup 1)))])
 
 (define_insn "*float<SWI48:mode><MODEF:mode>2_sse_nointerunit"
    (clobber (match_operand:SWI48 2 "memory_operand"))]
   "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
    && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
-   && reload_completed
-   && (SSE_REG_P (operands[0])
-       || (GET_CODE (operands[0]) == SUBREG
-          && SSE_REG_P (SUBREG_REG (operands[0]))))"
+   && reload_completed && SSE_REG_P (operands[0])"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (float:MODEF (match_dup 2)))])
 
        (float:MODEF (match_operand:SWI48 1 "memory_operand")))
    (clobber (match_operand:SWI48 2 "memory_operand"))]
   "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
-   && reload_completed
-   && (SSE_REG_P (operands[0])
-       || (GET_CODE (operands[0]) == SUBREG
-          && SSE_REG_P (SUBREG_REG (operands[0]))))"
+   && reload_completed && SSE_REG_P (operands[0])"
   [(set (match_dup 0) (float:MODEF (match_dup 1)))])
 
 (define_insn "*float<SWI48x:mode><X87MODEF:mode>2_i387_with_temp"
    && reload_completed"
   [(set (match_dup 0) (float:X87MODEF (match_dup 1)))])
 
+;; Avoid partial SSE register dependency stalls
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand")
+       (float:MODEF (match_operand:SI 1 "nonimmediate_operand")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_SSE_PARTIAL_REG_DEPENDENCY
+   && optimize_function_for_speed_p (cfun)
+   && reload_completed && SSE_REG_P (operands[0])"
+  [(set (match_dup 0)
+       (vec_merge:<ssevecmode>
+         (vec_duplicate:<ssevecmode>
+           (float:MODEF (match_dup 1)))
+         (match_dup 0)
+         (const_int 1)))]
+{
+  operands[0] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+                                    <MODE>mode, 0);
+  emit_move_insn (operands[0], CONST0_RTX (<ssevecmode>mode));
+})
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand")
+       (float:MODEF (match_operand:DI 1 "nonimmediate_operand")))]
+  "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_SSE_PARTIAL_REG_DEPENDENCY
+   && optimize_function_for_speed_p (cfun)
+   && reload_completed && SSE_REG_P (operands[0])"
+  [(set (match_dup 0)
+       (vec_merge:<ssevecmode>
+         (vec_duplicate:<ssevecmode>
+           (float:MODEF (match_dup 1)))
+         (match_dup 0)
+         (const_int 1)))]
+{
+  operands[0] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+                                    <MODE>mode, 0);
+  emit_move_insn (operands[0], CONST0_RTX (<ssevecmode>mode));
+})
+
 ;; Avoid store forwarding (partial memory) stall penalty
 ;; by passing DImode value through XMM registers.  */