i386.h (enum ix86_tune_indices): Add X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE.
authorUros Bizjak <uros@gcc.gnu.org>
Wed, 19 Dec 2012 16:04:11 +0000 (17:04 +0100)
committerUros Bizjak <uros@gcc.gnu.org>
Wed, 19 Dec 2012 16:04:11 +0000 (17:04 +0100)
* config/i386/i386.h (enum ix86_tune_indices): Add
X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE.
(TARGET_AVOID_MEM_OPND_FOR_CMOVE): New define.
* config/i386/i386.c (initial_ix86_tune_features)
<X86TUNE_AVOID_MEM_OPND_FOR_CMOVE>: Initialize.
* config/i386/i386.md (splitters to avoid cmove memory operands): New.
(peephole2s to avoid cmove memory operands): New.

From-SVN: r194614

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/config/i386/i386.h
gcc/config/i386/i386.md

index e60c4d9..be21ba9 100644 (file)
@@ -1,3 +1,14 @@
+2012-12-19  Uros Bizjak  <ubizjak@gmail.com>
+           Yuri Rumyantsev  <ysrumyan@gmail.com>
+
+       * config/i386/i386.h (enum ix86_tune_indices): Add
+       X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE.
+       (TARGET_AVOID_MEM_OPND_FOR_CMOVE): New define.
+       * config/i386/i386.c (initial_ix86_tune_features)
+       <X86TUNE_AVOID_MEM_OPND_FOR_CMOVE>: Initialize.
+       * config/i386/i386.md (splitters to avoid cmove memory operands): New.
+       (peephole2s to avoid cmove memory operands): New.
+
 2012-12-19  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
 
        * config/arm/arm.md (f_minmaxs, f_minmaxd): New types.
        * combine.c (try_combine): Adjust to use the target hook.
        * config/alpha/alpha.h (CANONICALIZE_COMPARISON): Remove macro
        definition.
-       * config/alpha/alpha.c (alpha_canonicalize_comparison): New
-       function.
+       * config/alpha/alpha.c (alpha_canonicalize_comparison): New function.
        (TARGET_CANONICALIZE_COMPARISON): New macro definition.
        * config/arm/arm-protos.h (arm_canonicalize_comparison): Remove
        prototype.
-       * config/arm/arm.c (arm_canonicalize_comparison): Add new
-       parameter.
+       * config/arm/arm.c (arm_canonicalize_comparison): Add new parameter.
        (TARGET_CANONICALIZE_COMPARISON): New macro definition.
-       * config/arm/arm.h (CANONICALIZE_COMPARISON): Remove macro
-       definition.
+       * config/arm/arm.h (CANONICALIZE_COMPARISON): Remove macro definition.
        * config/s390/s390-protos.h (s390_canonicalize_comparison): Remove
        prototype.
-       * config/s390/s390.c (s390_canonicalize_comparison): Add new
-       parameter.
+       * config/s390/s390.c (s390_canonicalize_comparison): Add new parameter.
        (TARGET_CANONICALIZE_COMPARISON): New macro definition.
-       * config/s390/s390.h (CANONICALIZE_COMPARISON): Remove macro
-       definition.
-       * config/sh/sh-protos.h (sh_canonicalize_comparison): Remove
-       prototype.
+       * config/s390/s390.h (CANONICALIZE_COMPARISON): Remove macro definition.
+       * config/sh/sh-protos.h (sh_canonicalize_comparison): Remove prototype.
        * config/sh/sh.c (sh_canonicalize_comparison): Add new prototype.  New
        function overloading the old one.
        (TARGET_CANONICALIZE_COMPARISON): New macro definition.
-       * config/sh/sh.h (CANONICALIZE_COMPARISON): Remove macro
-       definition.
+       * config/sh/sh.h (CANONICALIZE_COMPARISON): Remove macro definition.
        * config/spu/spu.c (spu_canonicalize_comparison): New function.
        (TARGET_CANONICALIZE_COMPARISON): New macro definition.
-       * config/spu/spu.h (CANONICALIZE_COMPARISON): Remove macro
-       definition.
+       * config/spu/spu.h (CANONICALIZE_COMPARISON): Remove macro definition.
 
 2012-12-19  Jakub Jelinek  <jakub@redhat.com>
 
@@ -74,7 +77,8 @@
 2012-12-18  Jan Hubicka  <jh@suse.cz>
 
        PR tree-optimization/55683
-       * ipa-prop.c (try_make_edge_direct_virtual_call): Look into constants for binfo.
+       * ipa-prop.c (try_make_edge_direct_virtual_call): Look into constants
+       for binfo.
 
 2012-12-19  Terry Guo  <terry.guo@arm.com>
 
 
 2012-12-18  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
 
-       * config/arm/driver-arm.c (arm_cpu_table):
-       Add Cortex-A7.
+       * config/arm/driver-arm.c (arm_cpu_table): Add Cortex-A7.
 
 2012-12-18  Aldy Hernandez  <aldyh@redhat.com>
 
        gen_lowpart_if_possible.
        (gen_lowpart_no_emit_general): Remove prototype.
        * rtlhooks.c (gen_lowpart_no_emit_general): Removed.
-       * simplify-rtx.c (simplify_unary_operation_1, 
+       * simplify-rtx.c (simplify_unary_operation_1,
        simplify_binary_operation_1): Continue simplifying if
        rtl_hooks.gen_lowpart_no_emit returns NULL_RTX.
        * dwarf2out.c (mem_loc_descriptor) <case TRUNCATE>: Handle
index 69f44aa..b466a4f 100644 (file)
@@ -2026,7 +2026,11 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
 
   /* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
      regs instead of memory.  */
-  m_COREI7 | m_CORE2I7
+  m_COREI7 | m_CORE2I7,
+
+  /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
+     a conditional move.  */
+  m_ATOM
 };
 
 /* Feature tests against the various architecture variations.  */
index 3ac3451..d2f535a 100644 (file)
@@ -331,6 +331,7 @@ enum ix86_tune_indices {
   X86_TUNE_REASSOC_INT_TO_PARALLEL,
   X86_TUNE_REASSOC_FP_TO_PARALLEL,
   X86_TUNE_GENERAL_REGS_SSE_SPILL,
+  X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE,
 
   X86_TUNE_LAST
 };
@@ -436,6 +437,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
        ix86_tune_features[X86_TUNE_REASSOC_FP_TO_PARALLEL]
 #define TARGET_GENERAL_REGS_SSE_SPILL \
        ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL]
+#define TARGET_AVOID_MEM_OPND_FOR_CMOVE \
+       ix86_tune_features[X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE]
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
index 3846065..95a52cd 100644 (file)
   [(set_attr "type" "icmov")
    (set_attr "mode" "<MODE>")])
 
+;; Don't do conditional moves with memory inputs.  This splitter helps
+;; register starved x86_32 by forcing inputs into registers before reload.
+(define_split
+  [(set (match_operand:SWI248 0 "register_operand")
+       (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
+                              [(reg FLAGS_REG) (const_int 0)])
+         (match_operand:SWI248 2 "nonimmediate_operand")
+         (match_operand:SWI248 3 "nonimmediate_operand")))]
+  "!TARGET_64BIT && TARGET_CMOVE
+   && TARGET_AVOID_MEM_OPND_FOR_CMOVE
+   && (MEM_P (operands[2]) || MEM_P (operands[3]))
+   && can_create_pseudo_p ()
+   && optimize_insn_for_speed_p ()"
+  [(set (match_dup 0)
+       (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))]
+{
+  if (MEM_P (operands[2]))
+    operands[2] = force_reg (<MODE>mode, operands[2]);
+  if (MEM_P (operands[3]))
+    operands[3] = force_reg (<MODE>mode, operands[3]);
+})
+
 (define_insn "*movqicc_noc"
   [(set (match_operand:QI 0 "register_operand" "=r,r")
        (if_then_else:QI (match_operator 1 "ix86_comparison_operator"
    (set_attr "mode" "QI")])
 
 (define_split
-  [(set (match_operand 0 "register_operand")
-       (if_then_else (match_operator 1 "ix86_comparison_operator"
-                       [(reg FLAGS_REG) (const_int 0)])
-                     (match_operand 2 "register_operand")
-                     (match_operand 3 "register_operand")))]
+  [(set (match_operand:SWI12 0 "register_operand")
+       (if_then_else:SWI12 (match_operator 1 "ix86_comparison_operator"
+                             [(reg FLAGS_REG) (const_int 0)])
+                     (match_operand:SWI12 2 "register_operand")
+                     (match_operand:SWI12 3 "register_operand")))]
   "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL
-   && (GET_MODE (operands[0]) == QImode
-       || GET_MODE (operands[0]) == HImode)
    && reload_completed"
   [(set (match_dup 0)
        (if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))]
   operands[3] = gen_lowpart (SImode, operands[3]);
 })
 
+;; Don't do conditional moves with memory inputs
+(define_peephole2
+  [(match_scratch:SWI248 2 "r")
+   (set (match_operand:SWI248 0 "register_operand")
+       (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
+                              [(reg FLAGS_REG) (const_int 0)])
+         (match_dup 0)
+         (match_operand:SWI248 3 "memory_operand")))]
+  "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE
+   && optimize_insn_for_speed_p ()"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0)
+       (if_then_else:SWI248 (match_dup 1) (match_dup 0) (match_dup 2)))])
+
+(define_peephole2
+  [(match_scratch:SWI248 2 "r")
+   (set (match_operand:SWI248 0 "register_operand")
+       (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
+                              [(reg FLAGS_REG) (const_int 0)])
+         (match_operand:SWI248 3 "memory_operand")
+         (match_dup 0)))]
+  "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE
+   && optimize_insn_for_speed_p ()"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0)
+       (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 0)))])
+
 (define_expand "mov<mode>cc"
   [(set (match_operand:X87MODEF 0 "register_operand")
        (if_then_else:X87MODEF
   [(set_attr "type" "fcmov,fcmov,icmov,icmov")
    (set_attr "mode" "SF,SF,SI,SI")])
 
+;; Don't do conditional moves with memory inputs.  This splitter helps
+;; register starved x86_32 by forcing inputs into registers before reload.
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand")
+       (if_then_else:MODEF (match_operator 1 "ix86_comparison_operator"
+                             [(reg FLAGS_REG) (const_int 0)])
+         (match_operand:MODEF 2 "nonimmediate_operand")
+         (match_operand:MODEF 3 "nonimmediate_operand")))]
+  "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE
+   && TARGET_AVOID_MEM_OPND_FOR_CMOVE
+   && (MEM_P (operands[2]) || MEM_P (operands[3]))
+   && can_create_pseudo_p ()
+   && optimize_insn_for_speed_p ()"
+  [(set (match_dup 0)
+       (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))]
+{
+  if (MEM_P (operands[2]))
+    operands[2] = force_reg (<MODE>mode, operands[2]);
+  if (MEM_P (operands[3]))
+    operands[3] = force_reg (<MODE>mode, operands[3]);
+})
+
+;; Don't do conditional moves with memory inputs
+(define_peephole2
+  [(match_scratch:MODEF 2 "r")
+   (set (match_operand:MODEF 0 "register_and_not_any_fp_reg_operand")
+       (if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator"
+                             [(reg FLAGS_REG) (const_int 0)])
+         (match_dup 0)
+         (match_operand:MODEF 3 "memory_operand")))]
+  "(<MODE>mode != DFmode || TARGET_64BIT)
+   && TARGET_80387 && TARGET_CMOVE
+   && TARGET_AVOID_MEM_OPND_FOR_CMOVE
+   && optimize_insn_for_speed_p ()"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0)
+       (if_then_else:MODEF (match_dup 1) (match_dup 0) (match_dup 2)))])
+
+(define_peephole2
+  [(match_scratch:MODEF 2 "r")
+   (set (match_operand:MODEF 0 "register_and_not_any_fp_reg_operand")
+       (if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator"
+                             [(reg FLAGS_REG) (const_int 0)])
+         (match_operand:MODEF 3 "memory_operand")
+         (match_dup 0)))]
+  "(<MODE>mode != DFmode || TARGET_64BIT)
+   && TARGET_80387 && TARGET_CMOVE
+   && TARGET_AVOID_MEM_OPND_FOR_CMOVE
+   && optimize_insn_for_speed_p ()"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0)
+       (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 0)))])
+
 ;; All moves in XOP pcmov instructions are 128 bits and hence we restrict
 ;; the scalar versions to have only XMM registers as operands.