* config/i386/sse.md (round<mode>2_vec_pack_sfix): Optimize V2DFmode
authoruros <uros@138bc75d-0d04-0410-961f-82ee72b054a4>
Wed, 16 Nov 2011 18:28:08 +0000 (18:28 +0000)
committeruros <uros@138bc75d-0d04-0410-961f-82ee72b054a4>
Wed, 16 Nov 2011 18:28:08 +0000 (18:28 +0000)
sequence for AVX.
(<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>): Ditto.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@181421 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/config/i386/sse.md

index 218681a..dd03e1a 100644 (file)
@@ -1,3 +1,9 @@
+2011-11-16  Uros Bizjak  <ubizjak@gmail.com>
+
+       * config/i386/sse.md (round<mode>2_vec_pack_sfix): Optimize V2DFmode
+       sequence for AVX.
+       (<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>): Ditto.
+
 2011-11-16  Venkataramanan Kumar  <venkataramanan.kumar@amd.com>
 
        * doc/invoke.texi: Document AMD bdver1 and btver1.
        the base reg is stored iff compiling for Thumb1.
 
 2011-11-16  Razya Ladelsky  <razya@il.ibm.com>
-       
+
        PR tree-optimization/49960
-       * tree-data-ref.c (initialize_data_dependence_relation): Add initializations. 
+       * tree-data-ref.c (initialize_data_dependence_relation): Add
+       initializations.
        Remove call to compute_self_dependence.
        (compute_affine_dependence): Remove the !DDR_SELF_REFERENCE condition.
-       (compute_self_dependence): Remove old code. Add call to compute_affine_dependence.
-       (compute_all_dependences): Remove call to compute_self_dependence. 
+       (compute_self_dependence): Remove old code. Add call to
+       compute_affine_dependence.
+       (compute_all_dependences): Remove call to compute_self_dependence.
        Add call to compute_affine_dependence.
 
 2011-11-16  Andreas Krebbel  <Andreas.Krebbel@de.ibm.com>
index b8e821d..d04902b 100644 (file)
 {
   rtx tmp0, tmp1;
 
-  tmp0 = gen_reg_rtx (<MODE>mode);
-  tmp1 = gen_reg_rtx (<MODE>mode);
+  if (<MODE>mode == V2DFmode
+      && TARGET_AVX && !TARGET_PREFER_AVX128)
+    {
+      rtx tmp2 = gen_reg_rtx (V4DFmode);
 
-  emit_insn
-    (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
-                                                      operands[3]));
-  emit_insn
-    (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
-                                                      operands[3]));
-  emit_insn
-    (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
+      tmp0 = gen_reg_rtx (V4DFmode);
+      tmp1 = force_reg (V2DFmode, operands[1]);
+
+      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
+      emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
+      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
+    }
+  else
+    {
+      tmp0 = gen_reg_rtx (<MODE>mode);
+      tmp1 = gen_reg_rtx (<MODE>mode);
+
+      emit_insn
+       (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
+                                                         operands[3]));
+      emit_insn
+       (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
+                                                         operands[3]));
+      emit_insn
+       (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
+    }
   DONE;
 })
 
 {
   rtx tmp0, tmp1;
 
-  tmp0 = gen_reg_rtx (<MODE>mode);
-  tmp1 = gen_reg_rtx (<MODE>mode);
+  if (<MODE>mode == V2DFmode
+      && TARGET_AVX && !TARGET_PREFER_AVX128)
+    {
+      rtx tmp2 = gen_reg_rtx (V4DFmode);
 
-  emit_insn (gen_round<mode>2 (tmp0, operands[1]));
-  emit_insn (gen_round<mode>2 (tmp1, operands[2]));
+      tmp0 = gen_reg_rtx (V4DFmode);
+      tmp1 = force_reg (V2DFmode, operands[1]);
 
-  emit_insn
-    (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
+      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
+      emit_insn (gen_roundv4df2 (tmp2, tmp0));
+      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
+    }
+  else
+    {
+      tmp0 = gen_reg_rtx (<MODE>mode);
+      tmp1 = gen_reg_rtx (<MODE>mode);
+
+      emit_insn (gen_round<mode>2 (tmp0, operands[1]));
+      emit_insn (gen_round<mode>2 (tmp1, operands[2]));
+
+      emit_insn
+       (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
+    }
   DONE;
 })