From b6fc71686efe34d2e213ead9b86aa6bd973625d4 Mon Sep 17 00:00:00 2001 From: uros Date: Sat, 16 Apr 2011 12:56:44 +0000 Subject: [PATCH] * config/i386/sse.md (sseunpackmode): New mode attribute. (ssepackmode): Ditto. (vec_pack_trunc_): Macroize expander from vec_pack_trunc_{v8hi,v4si,v2di} using VI248_128 mode iterator. (vec_unpacks_lo_): Macroize expander from vec_unpacks_lo_{v16qi,v8hi,v4si} using VI124_128 mode iterator. (vec_unpacks_hi_): Macroize expander from vec_unpacks_hi_{v16qi,v8hi,v4si} using VI124_128 mode iterator. (vec_unpacku_lo_): Macroize expander from vec_unpacku_lo_{v16qi,v8hi,v4si} using VI124_128 mode iterator. (vec_unpacku_hi_): Macroize expander from vec_unpacks_hi_{v16qi,v8hi,v4si} using VI124_128 mode iterator. * config/i386/i386.c (ix86_expand_sse_unpack): Merge with ix86_expand_sse4_unpack. * config/i386/i386-protos.h (ix86_expand_sse4_unpack): Remove. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@172585 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 36 +++++-- gcc/config/i386/i386-protos.h | 1 - gcc/config/i386/i386.c | 140 ++++++++++++------------ gcc/config/i386/sse.md | 245 ++++++++++-------------------------------- 4 files changed, 153 insertions(+), 269 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 237f2fd..da5797e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2011-04-16 Uros Bizjak + + * config/i386/sse.md (sseunpackmode): New mode attribute. + (ssepackmode): Ditto. + (vec_pack_trunc_): Macroize expander from + vec_pack_trunc_{v8hi,v4si,v2di} using VI248_128 mode iterator. + (vec_unpacks_lo_): Macroize expander from + vec_unpacks_lo_{v16qi,v8hi,v4si} using VI124_128 mode iterator. + (vec_unpacks_hi_): Macroize expander from + vec_unpacks_hi_{v16qi,v8hi,v4si} using VI124_128 mode iterator. + (vec_unpacku_lo_): Macroize expander from + vec_unpacku_lo_{v16qi,v8hi,v4si} using VI124_128 mode iterator. + (vec_unpacku_hi_): Macroize expander from + vec_unpacks_hi_{v16qi,v8hi,v4si} using VI124_128 mode iterator. + * config/i386/i386.c (ix86_expand_sse_unpack): Merge with + ix86_expand_sse4_unpack. + * config/i386/i386-protos.h (ix86_expand_sse4_unpack): Remove. + 2011-04-16 Jan Hubicka * cgraphbuild.c: Include ipa-inline.h. @@ -10,7 +28,8 @@ and disregard_inline_limits flags. (cgrpah_global_info): Remove estimated_stack_size, stack_frame_offset, time, size, estimated_growth. - * ipa-cp.c (ipcp_versionable_function_p, ipcp_generate_summary): Update. + * ipa-cp.c (ipcp_versionable_function_p, ipcp_generate_summary): + Update. * cgraphunit.c (cgraph_decide_is_function_needed): Use DECL_DISREGARD_INLINE_LIMITS. (cgraph_analyze_function): Do not initialize @@ -18,15 +37,16 @@ * lto-cgraph.c (lto_output_node, input_overwrite_node): Do not stream inlinable, versionable and disregard_inline_limits. * ipa-inline.c (cgraph_clone_inlined_nodes, cgraph_mark_inline_edge, - cgraph_check_inline_limits, cgraph_default_inline_p, cgraph_edge_badness, - update_caller_keys, update_callee_keys, add_new_edges_to_heap): Update. - (cgraph_decide_inlining_of_small_function): Update; set CIF_FUNCTION_NOT_INLINABLE - for uninlinable functions. + cgraph_check_inline_limits, cgraph_default_inline_p, + cgraph_edge_badness, update_caller_keys, update_callee_keys, + add_new_edges_to_heap): Update. + (cgraph_decide_inlining_of_small_function): Update; set + CIF_FUNCTION_NOT_INLINABLE for uninlinable functions. (cgraph_decide_inlining, cgraph_edge_early_inlinable_p, cgraph_decide_inlining_incrementally): Update. - * ipa-inline.h (inline_summary): Add inlinable, versionable, disregard_inline_limits, - estimated_stack_size, stack_frame_offset, time, size and estimated_growth - parameters. + * ipa-inline.h (inline_summary): Add inlinable, versionable, + disregard_inline_limits, estimated_stack_size, stack_frame_offset, + time, size and estimated_growth parameters. (estimate_edge_growth): Update. (initialize_inline_failed): Declare. * ipa-split.c: Include ipa-inline.h diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 4b34e41..c651687 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -114,7 +114,6 @@ extern bool ix86_expand_fp_movcc (rtx[]); extern bool ix86_expand_fp_vcond (rtx[]); extern bool ix86_expand_int_vcond (rtx[]); extern void ix86_expand_sse_unpack (rtx[], bool, bool); -extern void ix86_expand_sse4_unpack (rtx[], bool, bool); extern bool ix86_expand_int_addcc (rtx[]); extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int); extern void ix86_split_call_vzeroupper (rtx, rtx); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 36320d8..b6d41f0 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -19100,91 +19100,87 @@ void ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p) { enum machine_mode imode = GET_MODE (operands[1]); - rtx (*unpack)(rtx, rtx, rtx); - rtx se, dest; + rtx tmp, dest; - switch (imode) + if (TARGET_SSE4_1) { - case V16QImode: - if (high_p) - unpack = gen_vec_interleave_highv16qi; - else - unpack = gen_vec_interleave_lowv16qi; - break; - case V8HImode: - if (high_p) - unpack = gen_vec_interleave_highv8hi; - else - unpack = gen_vec_interleave_lowv8hi; - break; - case V4SImode: + rtx (*unpack)(rtx, rtx); + + switch (imode) + { + case V16QImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv8qiv8hi2; + else + unpack = gen_sse4_1_sign_extendv8qiv8hi2; + break; + case V8HImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv4hiv4si2; + else + unpack = gen_sse4_1_sign_extendv4hiv4si2; + break; + case V4SImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv2siv2di2; + else + unpack = gen_sse4_1_sign_extendv2siv2di2; + break; + default: + gcc_unreachable (); + } + if (high_p) - unpack = gen_vec_interleave_highv4si; + { + /* Shift higher 8 bytes to lower 8 bytes. */ + tmp = gen_reg_rtx (imode); + emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp), + gen_lowpart (V1TImode, operands[1]), + GEN_INT (64))); + } else - unpack = gen_vec_interleave_lowv4si; - break; - default: - gcc_unreachable (); - } - - dest = gen_lowpart (imode, operands[0]); + tmp = operands[1]; - if (unsigned_p) - se = force_reg (imode, CONST0_RTX (imode)); + emit_insn (unpack (operands[0], tmp)); + } else - se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode), - operands[1], pc_rtx, pc_rtx); - - emit_insn (unpack (dest, operands[1], se)); -} + { + rtx (*unpack)(rtx, rtx, rtx); -/* This function performs the same task as ix86_expand_sse_unpack, - but with SSE4.1 instructions. */ + switch (imode) + { + case V16QImode: + if (high_p) + unpack = gen_vec_interleave_highv16qi; + else + unpack = gen_vec_interleave_lowv16qi; + break; + case V8HImode: + if (high_p) + unpack = gen_vec_interleave_highv8hi; + else + unpack = gen_vec_interleave_lowv8hi; + break; + case V4SImode: + if (high_p) + unpack = gen_vec_interleave_highv4si; + else + unpack = gen_vec_interleave_lowv4si; + break; + default: + gcc_unreachable (); + } -void -ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p) -{ - enum machine_mode imode = GET_MODE (operands[1]); - rtx (*unpack)(rtx, rtx); - rtx src, dest; + dest = gen_lowpart (imode, operands[0]); - switch (imode) - { - case V16QImode: if (unsigned_p) - unpack = gen_sse4_1_zero_extendv8qiv8hi2; + tmp = force_reg (imode, CONST0_RTX (imode)); else - unpack = gen_sse4_1_sign_extendv8qiv8hi2; - break; - case V8HImode: - if (unsigned_p) - unpack = gen_sse4_1_zero_extendv4hiv4si2; - else - unpack = gen_sse4_1_sign_extendv4hiv4si2; - break; - case V4SImode: - if (unsigned_p) - unpack = gen_sse4_1_zero_extendv2siv2di2; - else - unpack = gen_sse4_1_sign_extendv2siv2di2; - break; - default: - gcc_unreachable (); - } + tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode), + operands[1], pc_rtx, pc_rtx); - dest = operands[0]; - if (high_p) - { - /* Shift higher 8 bytes to lower 8 bytes. */ - src = gen_reg_rtx (imode); - emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src), - gen_lowpart (V1TImode, operands[1]), - GEN_INT (64))); + emit_insn (unpack (dest, operands[1], tmp)); } - else - src = operands[1]; - - emit_insn (unpack (dest, src)); } /* Expand conditional increment or decrement using adb/sbb instructions. diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 40e3972..928bf78 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -70,6 +70,31 @@ (define_mode_iterator VI24_128 [V8HI V4SI]) (define_mode_iterator VI248_128 [V8HI V4SI V2DI]) +;; Mapping from float mode to required SSE level +(define_mode_attr sse + [(SF "sse") (DF "sse2") + (V4SF "sse") (V2DF "sse2") + (V8SF "avx") (V4DF "avx")]) + +(define_mode_attr sse2 + [(V16QI "sse2") (V32QI "avx") + (V2DI "sse2") (V4DI "avx")]) + +(define_mode_attr sse3 + [(V16QI "sse3") (V32QI "avx")]) + +(define_mode_attr sse4_1 + [(V4SF "sse4_1") (V2DF "sse4_1") + (V8SF "avx") (V4DF "avx")]) + +;; Pack/unpack vector modes +(define_mode_attr sseunpackmode + [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")]) + +(define_mode_attr ssepackmode + [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")]) + + ;; Instruction suffix for sign and zero extensions. (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")]) @@ -126,23 +151,6 @@ (V2DF "TARGET_SSE") (V4SF "TARGET_SSE") (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")]) -;; Mapping from float mode to required SSE level -(define_mode_attr sse - [(SF "sse") (DF "sse2") - (V4SF "sse") (V2DF "sse2") - (V8SF "avx") (V4DF "avx")]) - -(define_mode_attr sse2 - [(V16QI "sse2") (V32QI "avx") - (V2DI "sse2") (V4DI "avx")]) - -(define_mode_attr sse3 - [(V16QI "sse3") (V32QI "avx")]) - -(define_mode_attr sse4_1 - [(V4SF "sse4_1") (V2DF "sse4_1") - (V8SF "avx") (V4DF "avx")]) - ;; Mapping from integer vector mode to mnemonic suffix (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")]) @@ -5856,38 +5864,14 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define_expand "vec_pack_trunc_v8hi" - [(match_operand:V16QI 0 "register_operand" "") - (match_operand:V8HI 1 "register_operand" "") - (match_operand:V8HI 2 "register_operand" "")] - "TARGET_SSE2" -{ - rtx op1 = gen_lowpart (V16QImode, operands[1]); - rtx op2 = gen_lowpart (V16QImode, operands[2]); - ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); - DONE; -}) - -(define_expand "vec_pack_trunc_v4si" - [(match_operand:V8HI 0 "register_operand" "") - (match_operand:V4SI 1 "register_operand" "") - (match_operand:V4SI 2 "register_operand" "")] - "TARGET_SSE2" -{ - rtx op1 = gen_lowpart (V8HImode, operands[1]); - rtx op2 = gen_lowpart (V8HImode, operands[2]); - ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); - DONE; -}) - -(define_expand "vec_pack_trunc_v2di" - [(match_operand:V4SI 0 "register_operand" "") - (match_operand:V2DI 1 "register_operand" "") - (match_operand:V2DI 2 "register_operand" "")] +(define_expand "vec_pack_trunc_" + [(match_operand: 0 "register_operand" "") + (match_operand:VI248_128 1 "register_operand" "") + (match_operand:VI248_128 2 "register_operand" "")] "TARGET_SSE2" { - rtx op1 = gen_lowpart (V4SImode, operands[1]); - rtx op2 = gen_lowpart (V4SImode, operands[2]); + rtx op1 = gen_lowpart (mode, operands[1]); + rtx op2 = gen_lowpart (mode, operands[2]); ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); DONE; }) @@ -6767,149 +6751,29 @@ (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex") (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")]) -(define_expand "vec_unpacku_hi_v16qi" - [(match_operand:V8HI 0 "register_operand" "") - (match_operand:V16QI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, true, true); - else - ix86_expand_sse_unpack (operands, true, true); - DONE; -}) - -(define_expand "vec_unpacks_hi_v16qi" - [(match_operand:V8HI 0 "register_operand" "") - (match_operand:V16QI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, false, true); - else - ix86_expand_sse_unpack (operands, false, true); - DONE; -}) - -(define_expand "vec_unpacku_lo_v16qi" - [(match_operand:V8HI 0 "register_operand" "") - (match_operand:V16QI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, true, false); - else - ix86_expand_sse_unpack (operands, true, false); - DONE; -}) - -(define_expand "vec_unpacks_lo_v16qi" - [(match_operand:V8HI 0 "register_operand" "") - (match_operand:V16QI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, false, false); - else - ix86_expand_sse_unpack (operands, false, false); - DONE; -}) - -(define_expand "vec_unpacku_hi_v8hi" - [(match_operand:V4SI 0 "register_operand" "") - (match_operand:V8HI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, true, true); - else - ix86_expand_sse_unpack (operands, true, true); - DONE; -}) - -(define_expand "vec_unpacks_hi_v8hi" - [(match_operand:V4SI 0 "register_operand" "") - (match_operand:V8HI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, false, true); - else - ix86_expand_sse_unpack (operands, false, true); - DONE; -}) - -(define_expand "vec_unpacku_lo_v8hi" - [(match_operand:V4SI 0 "register_operand" "") - (match_operand:V8HI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, true, false); - else - ix86_expand_sse_unpack (operands, true, false); - DONE; -}) - -(define_expand "vec_unpacks_lo_v8hi" - [(match_operand:V4SI 0 "register_operand" "") - (match_operand:V8HI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, false, false); - else - ix86_expand_sse_unpack (operands, false, false); - DONE; -}) - -(define_expand "vec_unpacku_hi_v4si" - [(match_operand:V2DI 0 "register_operand" "") - (match_operand:V4SI 1 "register_operand" "")] +(define_expand "vec_unpacks_lo_" + [(match_operand: 0 "register_operand" "") + (match_operand:VI124_128 1 "register_operand" "")] "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, true, true); - else - ix86_expand_sse_unpack (operands, true, true); - DONE; -}) + "ix86_expand_sse_unpack (operands, false, false); DONE;") -(define_expand "vec_unpacks_hi_v4si" - [(match_operand:V2DI 0 "register_operand" "") - (match_operand:V4SI 1 "register_operand" "")] +(define_expand "vec_unpacks_hi_" + [(match_operand: 0 "register_operand" "") + (match_operand:VI124_128 1 "register_operand" "")] "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, false, true); - else - ix86_expand_sse_unpack (operands, false, true); - DONE; -}) + "ix86_expand_sse_unpack (operands, false, true); DONE;") -(define_expand "vec_unpacku_lo_v4si" - [(match_operand:V2DI 0 "register_operand" "") - (match_operand:V4SI 1 "register_operand" "")] +(define_expand "vec_unpacku_lo_" + [(match_operand: 0 "register_operand" "") + (match_operand:VI124_128 1 "register_operand" "")] "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, true, false); - else - ix86_expand_sse_unpack (operands, true, false); - DONE; -}) + "ix86_expand_sse_unpack (operands, true, false); DONE;") -(define_expand "vec_unpacks_lo_v4si" - [(match_operand:V2DI 0 "register_operand" "") - (match_operand:V4SI 1 "register_operand" "")] +(define_expand "vec_unpacku_hi_" + [(match_operand: 0 "register_operand" "") + (match_operand:VI124_128 1 "register_operand" "")] "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, false, false); - else - ix86_expand_sse_unpack (operands, false, false); - DONE; -}) + "ix86_expand_sse_unpack (operands, true, true); DONE;") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; @@ -10062,7 +9926,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "OI")]) -(define_insn_and_split "vec_dup" +(define_insn "vec_dup" [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x") (vec_duplicate:AVX256MODE24P (match_operand: 1 "nonimmediate_operand" "m,?x")))] @@ -10070,15 +9934,20 @@ "@ vbroadcast\t{%1, %0|%0, %1} #" - "&& reload_completed && REG_P (operands[1])" - [(set (match_dup 2) (vec_duplicate: (match_dup 1))) - (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))] - "operands[2] = gen_rtx_REG (mode, REGNO (operands[0]));" [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) +(define_split + [(set (match_operand:AVX256MODE24P 0 "register_operand" "") + (vec_duplicate:AVX256MODE24P + (match_operand: 1 "register_operand" "")))] + "TARGET_AVX && reload_completed" + [(set (match_dup 2) (vec_duplicate: (match_dup 1))) + (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))] + "operands[2] = gen_rtx_REG (mode, REGNO (operands[0]));") + (define_insn "avx_vbroadcastf128_" [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x") (vec_concat:AVX256MODE -- 2.7.4