* config/i386/sse.md (sseunpackmode): New mode attribute.
authoruros <uros@138bc75d-0d04-0410-961f-82ee72b054a4>
Sat, 16 Apr 2011 12:56:44 +0000 (12:56 +0000)
committeruros <uros@138bc75d-0d04-0410-961f-82ee72b054a4>
Sat, 16 Apr 2011 12:56:44 +0000 (12:56 +0000)
(ssepackmode): Ditto.
(vec_pack_trunc_<mode>): Macroize expander from
vec_pack_trunc_{v8hi,v4si,v2di} using VI248_128 mode iterator.
(vec_unpacks_lo_<mode>): Macroize expander from
vec_unpacks_lo_{v16qi,v8hi,v4si} using VI124_128 mode iterator.
(vec_unpacks_hi_<mode>): Macroize expander from
vec_unpacks_hi_{v16qi,v8hi,v4si} using VI124_128 mode iterator.
(vec_unpacku_lo_<mode>): Macroize expander from
vec_unpacku_lo_{v16qi,v8hi,v4si} using VI124_128 mode iterator.
(vec_unpacku_hi_<mode>): Macroize expander from
vec_unpacks_hi_{v16qi,v8hi,v4si} using VI124_128 mode iterator.
* config/i386/i386.c (ix86_expand_sse_unpack): Merge with
ix86_expand_sse4_unpack.
* config/i386/i386-protos.h (ix86_expand_sse4_unpack): Remove.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@172585 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/config/i386/i386-protos.h
gcc/config/i386/i386.c
gcc/config/i386/sse.md

index 237f2fd..da5797e 100644 (file)
@@ -1,3 +1,21 @@
+2011-04-16  Uros Bizjak  <ubizjak@gmail.com>
+
+       * config/i386/sse.md (sseunpackmode): New mode attribute.
+       (ssepackmode): Ditto.
+       (vec_pack_trunc_<mode>): Macroize expander from
+       vec_pack_trunc_{v8hi,v4si,v2di} using VI248_128 mode iterator.
+       (vec_unpacks_lo_<mode>): Macroize expander from
+       vec_unpacks_lo_{v16qi,v8hi,v4si} using VI124_128 mode iterator.
+       (vec_unpacks_hi_<mode>): Macroize expander from
+       vec_unpacks_hi_{v16qi,v8hi,v4si} using VI124_128 mode iterator.
+       (vec_unpacku_lo_<mode>): Macroize expander from
+       vec_unpacku_lo_{v16qi,v8hi,v4si} using VI124_128 mode iterator.
+       (vec_unpacku_hi_<mode>): Macroize expander from
+       vec_unpacks_hi_{v16qi,v8hi,v4si} using VI124_128 mode iterator.
+       * config/i386/i386.c (ix86_expand_sse_unpack): Merge with
+       ix86_expand_sse4_unpack.
+       * config/i386/i386-protos.h (ix86_expand_sse4_unpack): Remove.
+
 2011-04-16  Jan Hubicka  <jh@suse.cz>
 
        * cgraphbuild.c: Include ipa-inline.h.
@@ -10,7 +28,8 @@
        and disregard_inline_limits flags.
        (cgrpah_global_info): Remove estimated_stack_size, stack_frame_offset,
        time, size, estimated_growth.
-       * ipa-cp.c (ipcp_versionable_function_p, ipcp_generate_summary): Update.
+       * ipa-cp.c (ipcp_versionable_function_p, ipcp_generate_summary):
+       Update.
        * cgraphunit.c (cgraph_decide_is_function_needed): Use
        DECL_DISREGARD_INLINE_LIMITS.
        (cgraph_analyze_function): Do not initialize
        * lto-cgraph.c (lto_output_node, input_overwrite_node): Do not stream
        inlinable, versionable and disregard_inline_limits.
        * ipa-inline.c (cgraph_clone_inlined_nodes, cgraph_mark_inline_edge,
-       cgraph_check_inline_limits, cgraph_default_inline_p, cgraph_edge_badness,
-       update_caller_keys, update_callee_keys, add_new_edges_to_heap): Update.
-       (cgraph_decide_inlining_of_small_function): Update; set CIF_FUNCTION_NOT_INLINABLE
-       for uninlinable functions.
+       cgraph_check_inline_limits, cgraph_default_inline_p,
+       cgraph_edge_badness, update_caller_keys, update_callee_keys,
+       add_new_edges_to_heap): Update.
+       (cgraph_decide_inlining_of_small_function): Update; set
+       CIF_FUNCTION_NOT_INLINABLE for uninlinable functions.
        (cgraph_decide_inlining, cgraph_edge_early_inlinable_p,
        cgraph_decide_inlining_incrementally): Update.
-       * ipa-inline.h (inline_summary): Add inlinable, versionable, disregard_inline_limits,
-       estimated_stack_size, stack_frame_offset, time, size and estimated_growth
-       parameters.
+       * ipa-inline.h (inline_summary): Add inlinable, versionable,
+       disregard_inline_limits, estimated_stack_size, stack_frame_offset,
+       time, size and estimated_growth parameters.
        (estimate_edge_growth): Update.
        (initialize_inline_failed): Declare.
        * ipa-split.c: Include ipa-inline.h
index 4b34e41..c651687 100644 (file)
@@ -114,7 +114,6 @@ extern bool ix86_expand_fp_movcc (rtx[]);
 extern bool ix86_expand_fp_vcond (rtx[]);
 extern bool ix86_expand_int_vcond (rtx[]);
 extern void ix86_expand_sse_unpack (rtx[], bool, bool);
-extern void ix86_expand_sse4_unpack (rtx[], bool, bool);
 extern bool ix86_expand_int_addcc (rtx[]);
 extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
 extern void ix86_split_call_vzeroupper (rtx, rtx);
index 36320d8..b6d41f0 100644 (file)
@@ -19100,91 +19100,87 @@ void
 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
 {
   enum machine_mode imode = GET_MODE (operands[1]);
-  rtx (*unpack)(rtx, rtx, rtx);
-  rtx se, dest;
+  rtx tmp, dest;
 
-  switch (imode)
+  if (TARGET_SSE4_1)
     {
-    case V16QImode:
-      if (high_p)
-        unpack = gen_vec_interleave_highv16qi;
-      else
-        unpack = gen_vec_interleave_lowv16qi;
-      break;
-    case V8HImode:
-      if (high_p)
-        unpack = gen_vec_interleave_highv8hi;
-      else
-        unpack = gen_vec_interleave_lowv8hi;
-      break;
-    case V4SImode:
+      rtx (*unpack)(rtx, rtx);
+
+      switch (imode)
+       {
+       case V16QImode:
+         if (unsigned_p)
+           unpack = gen_sse4_1_zero_extendv8qiv8hi2;
+         else
+           unpack = gen_sse4_1_sign_extendv8qiv8hi2;
+         break;
+       case V8HImode:
+         if (unsigned_p)
+           unpack = gen_sse4_1_zero_extendv4hiv4si2;
+         else
+           unpack = gen_sse4_1_sign_extendv4hiv4si2;
+         break;
+       case V4SImode:
+         if (unsigned_p)
+           unpack = gen_sse4_1_zero_extendv2siv2di2;
+         else
+           unpack = gen_sse4_1_sign_extendv2siv2di2;
+         break;
+       default:
+         gcc_unreachable ();
+       }
+
       if (high_p)
-        unpack = gen_vec_interleave_highv4si;
+       {
+         /* Shift higher 8 bytes to lower 8 bytes.  */
+         tmp = gen_reg_rtx (imode);
+         emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
+                                        gen_lowpart (V1TImode, operands[1]),
+                                        GEN_INT (64)));
+       }
       else
-        unpack = gen_vec_interleave_lowv4si;
-      break;
-    default:
-      gcc_unreachable ();
-    }
-
-  dest = gen_lowpart (imode, operands[0]);
+       tmp = operands[1];
 
-  if (unsigned_p)
-    se = force_reg (imode, CONST0_RTX (imode));
+      emit_insn (unpack (operands[0], tmp));
+    }
   else
-    se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
-                              operands[1], pc_rtx, pc_rtx);
-
-  emit_insn (unpack (dest, operands[1], se));
-}
+    {
+      rtx (*unpack)(rtx, rtx, rtx);
 
-/* This function performs the same task as ix86_expand_sse_unpack,
-   but with SSE4.1 instructions.  */
+      switch (imode)
+       {
+       case V16QImode:
+         if (high_p)
+           unpack = gen_vec_interleave_highv16qi;
+         else
+           unpack = gen_vec_interleave_lowv16qi;
+         break;
+       case V8HImode:
+         if (high_p)
+           unpack = gen_vec_interleave_highv8hi;
+         else
+           unpack = gen_vec_interleave_lowv8hi;
+         break;
+       case V4SImode:
+         if (high_p)
+           unpack = gen_vec_interleave_highv4si;
+         else
+           unpack = gen_vec_interleave_lowv4si;
+         break;
+       default:
+         gcc_unreachable ();
+       }
 
-void
-ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
-{
-  enum machine_mode imode = GET_MODE (operands[1]);
-  rtx (*unpack)(rtx, rtx);
-  rtx src, dest;
+      dest = gen_lowpart (imode, operands[0]);
 
-  switch (imode)
-    {
-    case V16QImode:
       if (unsigned_p)
-       unpack = gen_sse4_1_zero_extendv8qiv8hi2;
+       tmp = force_reg (imode, CONST0_RTX (imode));
       else
-       unpack = gen_sse4_1_sign_extendv8qiv8hi2;
-      break;
-    case V8HImode:
-      if (unsigned_p)
-       unpack = gen_sse4_1_zero_extendv4hiv4si2;
-      else
-       unpack = gen_sse4_1_sign_extendv4hiv4si2;
-      break;
-    case V4SImode:
-      if (unsigned_p)
-       unpack = gen_sse4_1_zero_extendv2siv2di2;
-      else
-       unpack = gen_sse4_1_sign_extendv2siv2di2;
-      break;
-    default:
-      gcc_unreachable ();
-    }
+       tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
+                                  operands[1], pc_rtx, pc_rtx);
 
-  dest = operands[0];
-  if (high_p)
-    {
-      /* Shift higher 8 bytes to lower 8 bytes.  */
-      src = gen_reg_rtx (imode);
-      emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
-                                    gen_lowpart (V1TImode, operands[1]),
-                                    GEN_INT (64)));
+      emit_insn (unpack (dest, operands[1], tmp));
     }
-  else
-    src = operands[1];
-
-  emit_insn (unpack (dest, src));
 }
 
 /* Expand conditional increment or decrement using adb/sbb instructions.
index 40e3972..928bf78 100644 (file)
 (define_mode_iterator VI24_128 [V8HI V4SI])
 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
 
+;; Mapping from float mode to required SSE level
+(define_mode_attr sse
+  [(SF "sse") (DF "sse2")
+   (V4SF "sse") (V2DF "sse2")
+   (V8SF "avx") (V4DF "avx")])
+
+(define_mode_attr sse2
+  [(V16QI "sse2") (V32QI "avx")
+   (V2DI "sse2") (V4DI "avx")])
+
+(define_mode_attr sse3
+  [(V16QI "sse3") (V32QI "avx")])
+
+(define_mode_attr sse4_1
+  [(V4SF "sse4_1") (V2DF "sse4_1")
+   (V8SF "avx") (V4DF "avx")])
+
+;; Pack/unpack vector modes
+(define_mode_attr sseunpackmode
+  [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")])
+
+(define_mode_attr ssepackmode
+  [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")])
+
+
 
 ;; Instruction suffix for sign and zero extensions.
 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
    (V2DF "TARGET_SSE") (V4SF "TARGET_SSE")
    (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
 
-;; Mapping from float mode to required SSE level
-(define_mode_attr sse
-  [(SF "sse") (DF "sse2")
-   (V4SF "sse") (V2DF "sse2")
-   (V8SF "avx") (V4DF "avx")])
-
-(define_mode_attr sse2
-  [(V16QI "sse2") (V32QI "avx")
-   (V2DI "sse2") (V4DI "avx")])
-
-(define_mode_attr sse3
-  [(V16QI "sse3") (V32QI "avx")])
-
-(define_mode_attr sse4_1
-  [(V4SF "sse4_1") (V2DF "sse4_1")
-   (V8SF "avx") (V4DF "avx")])
-
 ;; Mapping from integer vector mode to mnemonic suffix
 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
 
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_expand "vec_pack_trunc_v8hi"
-  [(match_operand:V16QI 0 "register_operand" "")
-   (match_operand:V8HI 1 "register_operand" "")
-   (match_operand:V8HI 2 "register_operand" "")]
-  "TARGET_SSE2"
-{
-  rtx op1 = gen_lowpart (V16QImode, operands[1]);
-  rtx op2 = gen_lowpart (V16QImode, operands[2]);
-  ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
-  DONE;
-})
-
-(define_expand "vec_pack_trunc_v4si"
-  [(match_operand:V8HI 0 "register_operand" "")
-   (match_operand:V4SI 1 "register_operand" "")
-   (match_operand:V4SI 2 "register_operand" "")]
-  "TARGET_SSE2"
-{
-  rtx op1 = gen_lowpart (V8HImode, operands[1]);
-  rtx op2 = gen_lowpart (V8HImode, operands[2]);
-  ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
-  DONE;
-})
-
-(define_expand "vec_pack_trunc_v2di"
-  [(match_operand:V4SI 0 "register_operand" "")
-   (match_operand:V2DI 1 "register_operand" "")
-   (match_operand:V2DI 2 "register_operand" "")]
+(define_expand "vec_pack_trunc_<mode>"
+  [(match_operand:<ssepackmode> 0 "register_operand" "")
+   (match_operand:VI248_128 1 "register_operand" "")
+   (match_operand:VI248_128 2 "register_operand" "")]
   "TARGET_SSE2"
 {
-  rtx op1 = gen_lowpart (V4SImode, operands[1]);
-  rtx op2 = gen_lowpart (V4SImode, operands[2]);
+  rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
+  rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
   ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
   DONE;
 })
    (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
    (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
 
-(define_expand "vec_unpacku_hi_v16qi"
-  [(match_operand:V8HI 0 "register_operand" "")
-   (match_operand:V16QI 1 "register_operand" "")]
-  "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, true, true);
-  else
-    ix86_expand_sse_unpack (operands, true, true);
-  DONE;
-})
-
-(define_expand "vec_unpacks_hi_v16qi"
-  [(match_operand:V8HI 0 "register_operand" "")
-   (match_operand:V16QI 1 "register_operand" "")]
-  "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, false, true);
-  else
-    ix86_expand_sse_unpack (operands, false, true);
-  DONE;
-})
-
-(define_expand "vec_unpacku_lo_v16qi"
-  [(match_operand:V8HI 0 "register_operand" "")
-   (match_operand:V16QI 1 "register_operand" "")]
-  "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, true, false);
-  else
-    ix86_expand_sse_unpack (operands, true, false);
-  DONE;
-})
-
-(define_expand "vec_unpacks_lo_v16qi"
-  [(match_operand:V8HI 0 "register_operand" "")
-   (match_operand:V16QI 1 "register_operand" "")]
-  "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, false, false);
-  else
-    ix86_expand_sse_unpack (operands, false, false);
-  DONE;
-})
-
-(define_expand "vec_unpacku_hi_v8hi"
-  [(match_operand:V4SI 0 "register_operand" "")
-   (match_operand:V8HI 1 "register_operand" "")]
-  "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, true, true);
-  else
-    ix86_expand_sse_unpack (operands, true, true);
-  DONE;
-})
-
-(define_expand "vec_unpacks_hi_v8hi"
-  [(match_operand:V4SI 0 "register_operand" "")
-   (match_operand:V8HI 1 "register_operand" "")]
-  "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, false, true);
-  else
-    ix86_expand_sse_unpack (operands, false, true);
-  DONE;
-})
-
-(define_expand "vec_unpacku_lo_v8hi"
-  [(match_operand:V4SI 0 "register_operand" "")
-   (match_operand:V8HI 1 "register_operand" "")]
-  "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, true, false);
-  else
-    ix86_expand_sse_unpack (operands, true, false);
-  DONE;
-})
-
-(define_expand "vec_unpacks_lo_v8hi"
-  [(match_operand:V4SI 0 "register_operand" "")
-   (match_operand:V8HI 1 "register_operand" "")]
-  "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, false, false);
-  else
-    ix86_expand_sse_unpack (operands, false, false);
-  DONE;
-})
-
-(define_expand "vec_unpacku_hi_v4si"
-  [(match_operand:V2DI 0 "register_operand" "")
-   (match_operand:V4SI 1 "register_operand" "")]
+(define_expand "vec_unpacks_lo_<mode>"
+  [(match_operand:<sseunpackmode> 0 "register_operand" "")
+   (match_operand:VI124_128 1 "register_operand" "")]
   "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, true, true);
-  else
-    ix86_expand_sse_unpack (operands, true, true);
-  DONE;
-})
+  "ix86_expand_sse_unpack (operands, false, false); DONE;")
 
-(define_expand "vec_unpacks_hi_v4si"
-  [(match_operand:V2DI 0 "register_operand" "")
-   (match_operand:V4SI 1 "register_operand" "")]
+(define_expand "vec_unpacks_hi_<mode>"
+  [(match_operand:<sseunpackmode> 0 "register_operand" "")
+   (match_operand:VI124_128 1 "register_operand" "")]
   "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, false, true);
-  else
-    ix86_expand_sse_unpack (operands, false, true);
-  DONE;
-})
+  "ix86_expand_sse_unpack (operands, false, true); DONE;")
 
-(define_expand "vec_unpacku_lo_v4si"
-  [(match_operand:V2DI 0 "register_operand" "")
-   (match_operand:V4SI 1 "register_operand" "")]
+(define_expand "vec_unpacku_lo_<mode>"
+  [(match_operand:<sseunpackmode> 0 "register_operand" "")
+   (match_operand:VI124_128 1 "register_operand" "")]
   "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, true, false);
-  else
-    ix86_expand_sse_unpack (operands, true, false);
-  DONE;
-})
+  "ix86_expand_sse_unpack (operands, true, false); DONE;")
 
-(define_expand "vec_unpacks_lo_v4si"
-  [(match_operand:V2DI 0 "register_operand" "")
-   (match_operand:V4SI 1 "register_operand" "")]
+(define_expand "vec_unpacku_hi_<mode>"
+  [(match_operand:<sseunpackmode> 0 "register_operand" "")
+   (match_operand:VI124_128 1 "register_operand" "")]
   "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, false, false);
-  else
-    ix86_expand_sse_unpack (operands, false, false);
-  DONE;
-})
+  "ix86_expand_sse_unpack (operands, true, true); DONE;")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
    (set_attr "prefix" "vex")
    (set_attr "mode" "OI")])
 
-(define_insn_and_split "vec_dup<mode>"
+(define_insn "vec_dup<mode>"
   [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
        (vec_duplicate:AVX256MODE24P
          (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
   "@
    vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
    #"
-  "&& reload_completed && REG_P (operands[1])"
-  [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
-   (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
-  "operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));"
   [(set_attr "type" "ssemov")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "vex")
    (set_attr "mode" "V8SF")])
 
+(define_split
+  [(set (match_operand:AVX256MODE24P 0 "register_operand" "")
+       (vec_duplicate:AVX256MODE24P
+         (match_operand:<avxscalarmode> 1 "register_operand" "")))]
+  "TARGET_AVX && reload_completed"
+  [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
+   (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
+  "operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));")
+
 (define_insn "avx_vbroadcastf128_<mode>"
   [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
        (vec_concat:AVX256MODE