* i386.h (ix86_tune_indices): Add X86_TUNE_INTER_UNIT_CONVERSIONS.
authorhubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4>
Tue, 11 Sep 2007 11:38:05 +0000 (11:38 +0000)
committerhubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4>
Tue, 11 Sep 2007 11:38:05 +0000 (11:38 +0000)
(TARGET_INTER_UNIT_CONVERSIONS): New.
* i386.md (floatsi expanders): Remove redundant check for SImode
source; offload to memory when asked for.
(floatsisf2_mixed, floatsisf2_sse, floatsidf2_mixed, floatsidf2_sse
floatdisf2_mixed, floatsisf2_sse, floatsidf2_mixed, floatsidf2_sse):
Update conditions;
(floatsisf2_mixed_memory, floatsisf2_sse_memory,
floatsidf2_mixed_memory, floatsidf2_sse_memory
floatdisf2_mixed_memory, floatsisf2_sse_memory,
floatsidf2_mixed_memory, floatsidf2_sse_memory): New.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@128369 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/config/i386/i386.h
gcc/config/i386/i386.md

index 1706076..cc96242 100644 (file)
@@ -1,5 +1,19 @@
 2007-09-11  Jan Hubicka <jh@suse.cz>
 
+       * i386.h (ix86_tune_indices): Add X86_TUNE_INTER_UNIT_CONVERSIONS.
+       (TARGET_INTER_UNIT_CONVERSIONS): New.
+       * i386.md (floatsi expanders): Remove redundant check for SImode
+       source; offload to memory when asked for.
+       (floatsisf2_mixed, floatsisf2_sse, floatsidf2_mixed, floatsidf2_sse
+       floatdisf2_mixed, floatsisf2_sse, floatsidf2_mixed, floatsidf2_sse):
+       Update conditions;
+       (floatsisf2_mixed_memory, floatsisf2_sse_memory,
+       floatsidf2_mixed_memory, floatsidf2_sse_memory
+       floatdisf2_mixed_memory, floatsisf2_sse_memory,
+       floatsidf2_mixed_memory, floatsidf2_sse_memory): New.
+
+2007-09-11  Jan Hubicka <jh@suse.cz>
+
        * toplev.c (process_options): all frontends now do unit-at-a-time.
        * cgraphunit.c: update comments.
        (cgraph_expand_function): call passmanager dirrectly; emit thunks.
index 57fb628..ecec205 100644 (file)
@@ -1376,6 +1376,9 @@ unsigned int ix86_tune_features[X86_TUNE_LAST] = {
   /* X86_TUNE_INTER_UNIT_MOVES */
   ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
 
+  /* X86_TUNE_INTER_UNIT_CONVERSIONS */
+  ~(m_AMDFAM10),
+
   /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
      than 4 branch instructions in the 16 byte window.  */
   m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
index a14c74b..451df2e 100644 (file)
@@ -259,6 +259,7 @@ enum ix86_tune_indices {
   X86_TUNE_SHIFT1,
   X86_TUNE_USE_FFREEP,
   X86_TUNE_INTER_UNIT_MOVES,
+  X86_TUNE_INTER_UNIT_CONVERSIONS,
   X86_TUNE_FOUR_JUMP_LIMIT,
   X86_TUNE_SCHEDULE,
   X86_TUNE_USE_BT,
@@ -336,6 +337,8 @@ extern unsigned int ix86_tune_features[X86_TUNE_LAST];
 #define TARGET_SHIFT1          ix86_tune_features[X86_TUNE_SHIFT1]
 #define TARGET_USE_FFREEP      ix86_tune_features[X86_TUNE_USE_FFREEP]
 #define TARGET_INTER_UNIT_MOVES        ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES]
+#define TARGET_INTER_UNIT_CONVERSIONS\
+       ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS]
 #define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT]
 #define TARGET_SCHEDULE                ix86_tune_features[X86_TUNE_SCHEDULE]
 #define TARGET_USE_BT          ix86_tune_features[X86_TUNE_USE_BT]
index 5b50274..0c625da 100644 (file)
   "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
   "
    /* When we use vector converts, we can't have input in memory.  */
-   if (GET_MODE (operands[0]) == DFmode && GET_MODE (operands[1]) == SImode
+   if (GET_MODE (operands[0]) == DFmode
        && TARGET_USE_VECTOR_CONVERTS && !optimize_size && TARGET_SSE_MATH
        && SSE_FLOAT_MODE_P (DFmode))
      operands[1] = force_reg (SImode, operands[1]);
-   
-   if (GET_MODE (operands[0]) == SFmode && GET_MODE (operands[1]) == SImode
-       && !optimize_size && TARGET_USE_VECTOR_CONVERTS && TARGET_SSE_MATH
-       && SSE_FLOAT_MODE_P (SFmode))
+   else if (GET_MODE (operands[0]) == SFmode
+            && !optimize_size && TARGET_USE_VECTOR_CONVERTS && TARGET_SSE_MATH
+            && SSE_FLOAT_MODE_P (SFmode))
      {
        /* When !flag_trapping_math, we handle SImode->SFmode vector
          conversions same way as SImode->DFmode.
           operands[1] = tmp;
         }
      }
+   /* Offload operand of cvtsi2ss and cvtsi2sd into memory for
+      !TARGET_INTER_UNIT_CONVERSIONS
+      It is neccesary for the patterns to not accept nonemmory operands
+      as we would optimize out later.  */
+   else if (!TARGET_INTER_UNIT_CONVERSIONS
+           && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+           && !optimize_size
+           && !MEM_P (operands[1]))
+     {
+       rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), SLOT_VIRTUAL);
+       emit_move_insn (tmp, operands[1]);
+       operands[1] = tmp;
+     }
   ")
 
 (define_insn "*floatsisf2_mixed_vector"
   [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x")
        (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,m")))]
   "TARGET_MIX_SSE_I387
-   && (!TARGET_USE_VECTOR_CONVERTS || optimize_size)"
+   && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
+       || optimize_size)"
   "@
    fild%z1\t%1
    #
    (set_attr "amdfam10_decode" "*,*,vector,double")
    (set_attr "fp_int_src" "true")])
 
+(define_insn "*floatsisf2_mixed_memory"
+  [(set (match_operand:SF 0 "register_operand" "=f,x")
+       (float:SF (match_operand:SI 1 "memory_operand" "m,m")))]
+  "TARGET_MIX_SSE_I387
+   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+  "@
+   fild%z1\t%1
+   cvtsi2ss\t{%1, %0|%0, %1}"
+  [(set_attr "type" "fmov,sseicvt")
+   (set_attr "mode" "SF")
+   (set_attr "athlon_decode" "*,double")
+   (set_attr "amdfam10_decode" "*,double")
+   (set_attr "fp_int_src" "true")])
+
 (define_insn "*floatsisf2_sse_vector_nointernunit"
   [(set (match_operand:SF 0 "register_operand" "=x")
        (float:SF (match_operand:SI 1 "memory_operand" "m")))]
   [(set (match_operand:SF 0 "register_operand" "=x,x")
        (float:SF (match_operand:SI 1 "nonimmediate_operand" "r,m")))]
   "TARGET_SSE_MATH
-   && (!TARGET_USE_VECTOR_CONVERTS || optimize_size)"
+   && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
+       || optimize_size)"
   "cvtsi2ss\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "mode" "SF")
    (set_attr "amdfam10_decode" "vector,double")
    (set_attr "fp_int_src" "true")])
 
+(define_insn "*floatsisf2_sse_memory"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+       (float:SF (match_operand:SI 1 "memory_operand" "m")))]
+  "TARGET_SSE_MATH
+   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+  "cvtsi2ss\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "SF")
+   (set_attr "athlon_decode" "double")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "fp_int_src" "true")])
+
 (define_insn "*floatsidf2_mixed_vector"
   [(set (match_operand:DF 0 "register_operand" "=x,f,f")
        (float:DF (match_operand:SI 1 "nonimmediate_operand" "x,m,r")))]
   [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x,!x")
        (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,m,x")))]
   "TARGET_SSE2 && TARGET_MIX_SSE_I387
-    && (!TARGET_USE_VECTOR_CONVERTS || !optimize_size)"
+   && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
+       || optimize_size)"
   "@
    fild%z1\t%1
    #
    (set_attr "amdfam10_decode" "*,*,vector,double,double")
    (set_attr "fp_int_src" "true,true,true,true,false")])
 
+(define_insn "*floatsidf2_mixed_memory"
+  [(set (match_operand:DF 0 "register_operand" "=f,x")
+       (float:DF (match_operand:SI 1 "memory_operand" "m,m")))]
+  "TARGET_SSE2 && TARGET_MIX_SSE_I387
+   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+  "@
+   fild%z1\t%1
+   cvtsi2sd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "fmov,sseicvt")
+   (set_attr "mode" "DF")
+   (set_attr "athlon_decode" "*,direct")
+   (set_attr "amdfam10_decode" "*,double")
+   (set_attr "fp_int_src" "true")])
+
 (define_insn "*floatsidf2_sse_vector"
   [(set (match_operand:DF 0 "register_operand" "=x")
        (float:DF (match_operand:SI 1 "register_operand" "x")))]
   [(set (match_operand:DF 0 "register_operand" "=x,x,!x")
        (float:DF (match_operand:SI 1 "nonimmediate_operand" "r,m,x")))]
   "TARGET_SSE2 && TARGET_SSE_MATH
-   && (!TARGET_USE_VECTOR_CONVERTS || optimize_size)"
+   && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
+       || optimize_size)"
   "@
    cvtsi2sd\t{%1, %0|%0, %1}
    cvtsi2sd\t{%1, %0|%0, %1}
    (set_attr "amdfam10_decode" "vector,double,double")
    (set_attr "fp_int_src" "true")])
 
+(define_insn "*floatsidf2_memory"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+       (float:DF (match_operand:SI 1 "memory_operand" "x")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
+       || optimize_size)"
+  "cvtsi2sd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "DF")
+   (set_attr "athlon_decode" "direct")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "fp_int_src" "true")])
+
 (define_insn "*floatsi<mode>2_i387"
   [(set (match_operand:MODEF 0 "register_operand" "=f,f")
        (float:MODEF
   [(set (match_operand:SF 0 "register_operand" "")
        (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))]
   "TARGET_80387 || (TARGET_64BIT && TARGET_SSE_MATH)"
-  "")
+{
+  if (!TARGET_INTER_UNIT_CONVERSIONS && TARGET_64BIT
+      && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (SFmode)
+      && !optimize_size
+      && !MEM_P (operands[1]))
+    {
+       rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), SLOT_VIRTUAL);
+       emit_move_insn (tmp, operands[1]);
+       operands[1] = tmp;
+    }
+})
 
 (define_insn "*floatdisf2_mixed"
   [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x")
        (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,m")))]
-  "TARGET_64BIT && TARGET_MIX_SSE_I387"
+  "TARGET_64BIT && TARGET_MIX_SSE_I387
+   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
   "@
    fild%z1\t%1
    #
    (set_attr "amdfam10_decode" "*,*,vector,double")
    (set_attr "fp_int_src" "true")])
 
+(define_insn "*floatdisf2_mixed"
+  [(set (match_operand:SF 0 "register_operand" "=f,x")
+       (float:SF (match_operand:DI 1 "memory_operand" "m,m")))]
+  "TARGET_64BIT && TARGET_MIX_SSE_I387
+   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+  "@
+   fild%z1\t%1
+   cvtsi2ss{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "fmov,sseicvt")
+   (set_attr "mode" "SF")
+   (set_attr "athlon_decode" "*,double")
+   (set_attr "amdfam10_decode" "*,double")
+   (set_attr "fp_int_src" "true")])
+
 (define_insn "*floatdisf2_sse"
   [(set (match_operand:SF 0 "register_operand" "=x,x")
        (float:SF (match_operand:DI 1 "nonimmediate_operand" "r,m")))]
-  "TARGET_64BIT && TARGET_SSE_MATH"
+  "TARGET_64BIT && TARGET_SSE_MATH
+   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
   "cvtsi2ss{q}\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "mode" "SF")
    (set_attr "amdfam10_decode" "vector,double")
    (set_attr "fp_int_src" "true")])
 
+(define_insn "*floatdisf2_memory"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+       (float:SF (match_operand:DI 1 "memory_operand" "m")))]
+  "TARGET_64BIT && TARGET_SSE_MATH
+   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+  "cvtsi2ss{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "SF")
+   (set_attr "athlon_decode" "double")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "fp_int_src" "true")])
+
 (define_expand "floatdidf2"
   [(set (match_operand:DF 0 "register_operand" "")
        (float:DF (match_operand:DI 1 "nonimmediate_operand" "")))]
       ix86_expand_convert_sign_didf_sse (operands[0], operands[1]);
       DONE;
     }
+  if (!TARGET_INTER_UNIT_CONVERSIONS && TARGET_64BIT
+      && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (DFmode)
+      && !optimize_size
+      && !MEM_P (operands[1]))
+    {
+       rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), SLOT_VIRTUAL);
+       emit_move_insn (tmp, operands[1]);
+       operands[1] = tmp;
+    }
 })
 
 (define_insn "*floatdidf2_mixed"
   [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x")
        (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,m")))]
-  "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387"
+  "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387
+   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
   "@
    fild%z1\t%1
    #
    (set_attr "amdfam10_decode" "*,*,vector,double")
    (set_attr "fp_int_src" "true")])
 
+(define_insn "*floatdidf2_mixed_memory"
+  [(set (match_operand:DF 0 "register_operand" "=f,x")
+       (float:DF (match_operand:DI 1 "memory_operand" "m,m")))]
+  "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387
+   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+  "@
+   fild%z1\t%1
+   cvtsi2sd{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "fmov,sseicvt")
+   (set_attr "mode" "DF")
+   (set_attr "athlon_decode" "*,direct")
+   (set_attr "amdfam10_decode" "*,double")
+   (set_attr "fp_int_src" "true")])
+
 (define_insn "*floatdidf2_sse"
   [(set (match_operand:DF 0 "register_operand" "=x,x")
        (float:DF (match_operand:DI 1 "nonimmediate_operand" "r,m")))]
-  "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH"
+  "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
+   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
   "cvtsi2sd{q}\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "mode" "DF")
    (set_attr "amdfam10_decode" "vector,double")
    (set_attr "fp_int_src" "true")])
 
+(define_insn "*floatdidf2_sse_memory"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+       (float:DF (match_operand:DI 1 "memory_operand" "m")))]
+  "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
+   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+  "cvtsi2sd{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "DF")
+   (set_attr "athlon_decode" "direct")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "fp_int_src" "true")])
+
 (define_insn "*floatdi<mode>2_i387"
   [(set (match_operand:MODEF 0 "register_operand" "=f,f")
        (float:MODEF
          (match_operand:DI 1 "nonimmediate_operand" "m,?r")))]
-  "TARGET_80387"
+  "TARGET_80387
+   && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0])))"
   "@
    fild%z1\t%1
    #"