x86-tune.def (DEF_TUNE): Remove m_CORE_ALL.
authorWei Mi <wmi@google.com>
Tue, 1 Oct 2013 23:32:55 +0000 (23:32 +0000)
committerWei Mi <wmi@gcc.gnu.org>
Tue, 1 Oct 2013 23:32:55 +0000 (23:32 +0000)
2013-10-01  Wei Mi  <wmi@google.com>

        * config/i386/x86-tune.def (DEF_TUNE): Remove
        m_CORE_ALL.
        * config/i386/i386.md: Add define_peephole2 to
        break partial reg stall for cvtss2sd/cvtsd2ss.

From-SVN: r203095

gcc/ChangeLog
gcc/config/i386/i386.md
gcc/config/i386/x86-tune.def

index 7ff5bd2..e0b30a3 100644 (file)
@@ -1,3 +1,10 @@
+2013-10-01  Wei Mi  <wmi@google.com>
+
+       * config/i386/x86-tune.def (DEF_TUNE): Remove
+       m_CORE_ALL.
+       * config/i386/i386.md: Add define_peephole2 to
+       break partial reg stall for cvtss2sd/cvtsd2ss.
+
 2013-10-01  Joern Rennecke  <joern.rennecke@embecosm.com>
 
        * config/arc/arc.c (pass_arc_ifcvt::clone):
index 03b3842..7368719 100644 (file)
   emit_move_insn (operands[0], CONST0_RTX (<ssevecmode>mode));
 })
 
+;; Break partial reg stall for cvtsd2ss.
+
+(define_peephole2
+  [(set (match_operand:SF 0 "register_operand")
+        (float_truncate:SF
+         (match_operand:DF 1 "nonimmediate_operand")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_SSE_PARTIAL_REG_DEPENDENCY
+   && optimize_function_for_speed_p (cfun)
+   && SSE_REG_P (operands[0])
+   && (!SSE_REG_P (operands[1])
+       || REGNO (operands[0]) != REGNO (operands[1]))"
+  [(set (match_dup 0)
+       (vec_merge:V4SF
+         (vec_duplicate:V4SF
+           (float_truncate:V2SF
+             (match_dup 1)))
+         (match_dup 0)
+         (const_int 1)))]
+{
+  operands[0] = simplify_gen_subreg (V4SFmode, operands[0],
+                                    SFmode, 0);
+  operands[1] = simplify_gen_subreg (V2DFmode, operands[1],
+                                    DFmode, 0);
+  emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
+})
+
+;; Break partial reg stall for cvtss2sd.
+
+(define_peephole2
+  [(set (match_operand:DF 0 "register_operand")
+        (float_extend:DF
+          (match_operand:SF 1 "nonimmediate_operand")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_SSE_PARTIAL_REG_DEPENDENCY
+   && optimize_function_for_speed_p (cfun)
+   && SSE_REG_P (operands[0])
+   && (!SSE_REG_P (operands[1])
+       || REGNO (operands[0]) != REGNO (operands[1]))"
+  [(set (match_dup 0)
+        (vec_merge:V2DF
+          (float_extend:V2DF
+            (vec_select:V2SF
+              (match_dup 1)
+              (parallel [(const_int 0) (const_int 1)])))
+          (match_dup 0)
+          (const_int 1)))]
+{
+  operands[0] = simplify_gen_subreg (V2DFmode, operands[0],
+                                    DFmode, 0);
+  operands[1] = simplify_gen_subreg (V4SFmode, operands[1],
+                                    SFmode, 0);
+  emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
+})
+
 ;; Avoid store forwarding (partial memory) stall penalty
 ;; by passing DImode value through XMM registers.  */
 
index c3cf00f..6b0a593 100644 (file)
@@ -346,7 +346,7 @@ DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode", m_K6)
    from FP to FP.  This form of instructions avoids partial write to the
    destination.  */
 DEF_TUNE (X86_TUNE_USE_VECTOR_FP_CONVERTS, "use_vector_fp_converts",
-          m_CORE_ALL | m_AMDFAM10 | m_GENERIC)
+          m_AMDFAM10 | m_GENERIC)
 
 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
    from integer to FP. */