From ff97910daf84acf7f3073b6b53660efcad9e6480 Mon Sep 17 00:00:00 2001 From: Vladimir Yakovlev Date: Tue, 6 Nov 2012 10:29:23 +0000 Subject: [PATCH] i386-protos.h (emit_i387_cw_initialization): Deleted. * config/i386/i386-protos.h (emit_i387_cw_initialization): Deleted. (emit_vzero): Added prototype. (ix86_mode_entry): Likewise. (ix86_mode_exit): Likewise. (ix86_emit_mode_set): Likewise. * config/i386/i386.c (typedef struct block_info_def): Deleted. (define BLOCK_INFO): Deleted. (check_avx256_stores): Added checking for MEM_P. (move_or_delete_vzeroupper_2): Deleted. (move_or_delete_vzeroupper_1): Deleted. (move_or_delete_vzeroupper): Deleted. (ix86_maybe_emit_epilogue_vzeroupper): Deleted. (function_pass_avx256_p): Deleted. (ix86_function_ok_for_sibcall): Deleted disabling sibcall. (nit_cumulative_args): Deleted initialization of of avx256 fields of cfun->machine. (ix86_emit_restore_sse_regs_using_mov): Deleted vzeroupper generation. (ix86_expand_epilogue): Likewise. (ix86_avx_u128_mode_needed): New. (ix86_i387_mode_needed): Renamed ix86_mode_needed. (ix86_mode_needed): New. (ix86_avx_u128_mode_after): New. (ix86_mode_after): New. (ix86_avx_u128_mode_entry): New. (ix86_mode_entry): New. (ix86_avx_u128_mode_exit): New. (ix86_mode_exit): New. (ix86_emit_mode_set): New. (ix86_expand_call): Deleted vzeroupper generation. (ix86_split_call_vzeroupper): Deleted. (ix86_init_machine_status): Initialzed optimize_mode_switching. (ix86_expand_special_args_builtin): Changed. (ix86_reorg): Deleted a call of move_or_delete_vzeroupper. * config/i386/i386.h (VALID_AVX256_REG_OR_OI_MODE): New. (AVX_U128): New. (avx_u128_state): New. (NUM_MODES_FOR_MODE_SWITCHING): Added AVX_U128_ANY. (MODE_AFTER): New. (MODE_ENTRY): New. (MODE_EXIT): New. (EMIT_MODE_SET): Changed. (machine_function): Deleted avx256 fields. * config/i386/i386.md (UNSPEC_CALL_NEEDS_VZEROUPPER): Deleted. (define_insn_and_split "*call_vzeroupper"): Deleted. (define_insn_and_split "*call_rex64_ms_sysv_vzeroupper"): Deleted. (define_insn_and_split "*sibcall_vzeroupper"): Deleted. (define_insn_and_split "*call_pop_vzeroupper"): Deleted. (define_insn_and_split "*sibcall_pop_vzeroupper"): Deleted. (define_insn_and_split "*call_value_vzeroupper"): Deleted. (define_insn_and_split "*sibcall_value_vzeroupper"): Deleted. (define_insn_and_split "*call_value_rex64_ms_sysv_vzeroupper"): Deleted. (define_insn_and_split "*call_value_pop_vzeroupper"): Deleted. (define_insn_and_split "*sibcall_value_pop_vzeroupper"): Deleted. (define_expand "return"): Deleted vzeroupper emitting. (define_expand "simple_return"): Deleted. * config/i386/predicates.md (vzeroupper_operation): New. * config/i386/sse.md (avx_vzeroupper): Changed. testsuite/ChangeLog: * gcc.target/i386/avx-vzeroupper-5.c: Changed scan-assembler-times. * gcc.target/i386/avx-vzeroupper-8.c: Likewise. * gcc.target/i386/avx-vzeroupper-9.c: Likewise. * gcc.target/i386/avx-vzeroupper-10.c: Likewise. * gcc.target/i386/avx-vzeroupper-11.c: Likewise. * gcc.target/i386/avx-vzeroupper-12.c: Likewise. * gcc.target/i386/avx-vzeroupper-19.c: Likewis. * gcc.target/i386/avx-vzeroupper-27.c: New. From-SVN: r193229 --- gcc/ChangeLog | 65 ++ gcc/config/i386/i386-protos.h | 7 +- gcc/config/i386/i386.c | 774 +++++++--------------- gcc/config/i386/i386.h | 57 +- gcc/config/i386/i386.md | 166 ----- gcc/config/i386/predicates.md | 7 + gcc/config/i386/sse.md | 3 +- gcc/testsuite/ChangeLog | 11 + gcc/testsuite/gcc.target/i386/avx-vzeroupper-10.c | 2 +- gcc/testsuite/gcc.target/i386/avx-vzeroupper-11.c | 2 +- gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c | 2 +- gcc/testsuite/gcc.target/i386/avx-vzeroupper-19.c | 2 +- gcc/testsuite/gcc.target/i386/avx-vzeroupper-27.c | 26 + gcc/testsuite/gcc.target/i386/avx-vzeroupper-5.c | 2 +- gcc/testsuite/gcc.target/i386/avx-vzeroupper-8.c | 2 +- gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c | 2 +- 16 files changed, 382 insertions(+), 748 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx-vzeroupper-27.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5f9bb44..255ce39 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,68 @@ +2012-11-06 Vladimir Yakovlev + + * config/i386/i386-protos.h (emit_i387_cw_initialization): Deleted. + (emit_vzero): Added prototype. + (ix86_mode_entry): Likewise. + (ix86_mode_exit): Likewise. + (ix86_emit_mode_set): Likewise. + + * config/i386/i386.c (typedef struct block_info_def): Deleted. + (define BLOCK_INFO): Deleted. + (check_avx256_stores): Added checking for MEM_P. + (move_or_delete_vzeroupper_2): Deleted. + (move_or_delete_vzeroupper_1): Deleted. + (move_or_delete_vzeroupper): Deleted. + (ix86_maybe_emit_epilogue_vzeroupper): Deleted. + (function_pass_avx256_p): Deleted. + (ix86_function_ok_for_sibcall): Deleted disabling sibcall. + (nit_cumulative_args): Deleted initialization of of avx256 fields of + cfun->machine. + (ix86_emit_restore_sse_regs_using_mov): Deleted vzeroupper generation. + (ix86_expand_epilogue): Likewise. + (ix86_avx_u128_mode_needed): New. + (ix86_i387_mode_needed): Renamed ix86_mode_needed. + (ix86_mode_needed): New. + (ix86_avx_u128_mode_after): New. + (ix86_mode_after): New. + (ix86_avx_u128_mode_entry): New. + (ix86_mode_entry): New. + (ix86_avx_u128_mode_exit): New. + (ix86_mode_exit): New. + (ix86_emit_mode_set): New. + (ix86_expand_call): Deleted vzeroupper generation. + (ix86_split_call_vzeroupper): Deleted. + (ix86_init_machine_status): Initialzed optimize_mode_switching. + (ix86_expand_special_args_builtin): Changed. + (ix86_reorg): Deleted a call of move_or_delete_vzeroupper. + + * config/i386/i386.h (VALID_AVX256_REG_OR_OI_MODE): New. + (AVX_U128): New. + (avx_u128_state): New. + (NUM_MODES_FOR_MODE_SWITCHING): Added AVX_U128_ANY. + (MODE_AFTER): New. + (MODE_ENTRY): New. + (MODE_EXIT): New. + (EMIT_MODE_SET): Changed. + (machine_function): Deleted avx256 fields. + + * config/i386/i386.md (UNSPEC_CALL_NEEDS_VZEROUPPER): Deleted. + (define_insn_and_split "*call_vzeroupper"): Deleted. + (define_insn_and_split "*call_rex64_ms_sysv_vzeroupper"): Deleted. + (define_insn_and_split "*sibcall_vzeroupper"): Deleted. + (define_insn_and_split "*call_pop_vzeroupper"): Deleted. + (define_insn_and_split "*sibcall_pop_vzeroupper"): Deleted. + (define_insn_and_split "*call_value_vzeroupper"): Deleted. + (define_insn_and_split "*sibcall_value_vzeroupper"): Deleted. + (define_insn_and_split "*call_value_rex64_ms_sysv_vzeroupper"): Deleted. + (define_insn_and_split "*call_value_pop_vzeroupper"): Deleted. + (define_insn_and_split "*sibcall_value_pop_vzeroupper"): Deleted. + (define_expand "return"): Deleted vzeroupper emitting. + (define_expand "simple_return"): Deleted. + + * config/i386/predicates.md (vzeroupper_operation): New. + + * config/i386/sse.md (avx_vzeroupper): Changed. + 2012-11-06 Uros Bizjak Kaz Kojima diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 96971ae..0d643b1 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -167,8 +167,13 @@ extern bool ix86_secondary_memory_needed (enum reg_class, enum reg_class, enum machine_mode, int); extern bool ix86_cannot_change_mode_class (enum machine_mode, enum machine_mode, enum reg_class); + extern int ix86_mode_needed (int, rtx); -extern void emit_i387_cw_initialization (int); +extern int ix86_mode_after (int, int, rtx); +extern int ix86_mode_entry (int); +extern int ix86_mode_exit (int); +extern void ix86_emit_mode_set (int, int); + extern void x86_order_regs_for_local_alloc (void); extern void x86_function_profiler (FILE *, int); extern void x86_emit_floatuns (rtx [2]); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index a8b0962..d747a5a 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -72,48 +72,16 @@ enum upper_128bits_state used }; -typedef struct block_info_def -{ - /* State of the upper 128bits of AVX registers at exit. */ - enum upper_128bits_state state; - /* TRUE if state of the upper 128bits of AVX registers is unchanged - in this block. */ - bool unchanged; - /* TRUE if block has been processed. */ - bool processed; - /* TRUE if block has been scanned. */ - bool scanned; - /* Previous state of the upper 128bits of AVX registers at entry. */ - enum upper_128bits_state prev; -} *block_info; - -#define BLOCK_INFO(B) ((block_info) (B)->aux) - -enum call_avx256_state -{ - /* Callee returns 256bit AVX register. */ - callee_return_avx256 = -1, - /* Callee returns and passes 256bit AVX register. */ - callee_return_pass_avx256, - /* Callee passes 256bit AVX register. */ - callee_pass_avx256, - /* Callee doesn't return nor passe 256bit AVX register, or no - 256bit AVX register in function return. */ - call_no_avx256, - /* vzeroupper intrinsic. */ - vzeroupper_intrinsic -}; - /* Check if a 256bit AVX register is referenced in stores. */ static void check_avx256_stores (rtx dest, const_rtx set, void *data) { - if ((REG_P (dest) - && VALID_AVX256_REG_MODE (GET_MODE (dest))) + if (((REG_P (dest) || MEM_P(dest)) + && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (dest))) || (GET_CODE (set) == SET - && REG_P (SET_SRC (set)) - && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set))))) + && (REG_P (SET_SRC (set)) || MEM_P (SET_SRC (set))) + && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (SET_SRC (set))))) { enum upper_128bits_state *state = (enum upper_128bits_state *) data; @@ -121,377 +89,6 @@ check_avx256_stores (rtx dest, const_rtx set, void *data) } } -/* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper - in basic block BB. Delete it if upper 128bit AVX registers are - unused. If it isn't deleted, move it to just before a jump insn. - - STATE is state of the upper 128bits of AVX registers at entry. */ - -static void -move_or_delete_vzeroupper_2 (basic_block bb, - enum upper_128bits_state state) -{ - rtx insn, bb_end; - rtx vzeroupper_insn = NULL_RTX; - rtx pat; - int avx256; - bool unchanged; - - if (BLOCK_INFO (bb)->unchanged) - { - if (dump_file) - fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n", - bb->index, state); - - BLOCK_INFO (bb)->state = state; - return; - } - - if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state) - { - if (dump_file) - fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n", - bb->index, BLOCK_INFO (bb)->state); - return; - } - - BLOCK_INFO (bb)->prev = state; - - if (dump_file) - fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n", - bb->index, state); - - unchanged = true; - - /* BB_END changes when it is deleted. */ - bb_end = BB_END (bb); - insn = BB_HEAD (bb); - while (insn != bb_end) - { - insn = NEXT_INSN (insn); - - if (!NONDEBUG_INSN_P (insn)) - continue; - - /* Move vzeroupper before jump/call. */ - if (JUMP_P (insn) || CALL_P (insn)) - { - if (!vzeroupper_insn) - continue; - - if (PREV_INSN (insn) != vzeroupper_insn) - { - if (dump_file) - { - fprintf (dump_file, "Move vzeroupper after:\n"); - print_rtl_single (dump_file, PREV_INSN (insn)); - fprintf (dump_file, "before:\n"); - print_rtl_single (dump_file, insn); - } - reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn, - PREV_INSN (insn)); - } - vzeroupper_insn = NULL_RTX; - continue; - } - - pat = PATTERN (insn); - - /* Check insn for vzeroupper intrinsic. */ - if (GET_CODE (pat) == UNSPEC_VOLATILE - && XINT (pat, 1) == UNSPECV_VZEROUPPER) - { - if (dump_file) - { - /* Found vzeroupper intrinsic. */ - fprintf (dump_file, "Found vzeroupper:\n"); - print_rtl_single (dump_file, insn); - } - } - else - { - /* Check insn for vzeroall intrinsic. */ - if (GET_CODE (pat) == PARALLEL - && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE - && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL) - { - state = unused; - unchanged = false; - - /* Delete pending vzeroupper insertion. */ - if (vzeroupper_insn) - { - delete_insn (vzeroupper_insn); - vzeroupper_insn = NULL_RTX; - } - } - else if (state != used) - { - note_stores (pat, check_avx256_stores, &state); - if (state == used) - unchanged = false; - } - continue; - } - - /* Process vzeroupper intrinsic. */ - avx256 = INTVAL (XVECEXP (pat, 0, 0)); - - if (state == unused) - { - /* Since the upper 128bits are cleared, callee must not pass - 256bit AVX register. We only need to check if callee - returns 256bit AVX register. */ - if (avx256 == callee_return_avx256) - { - state = used; - unchanged = false; - } - - /* Remove unnecessary vzeroupper since upper 128bits are - cleared. */ - if (dump_file) - { - fprintf (dump_file, "Delete redundant vzeroupper:\n"); - print_rtl_single (dump_file, insn); - } - delete_insn (insn); - } - else - { - /* Set state to UNUSED if callee doesn't return 256bit AVX - register. */ - if (avx256 != callee_return_pass_avx256) - state = unused; - - if (avx256 == callee_return_pass_avx256 - || avx256 == callee_pass_avx256) - { - /* Must remove vzeroupper since callee passes in 256bit - AVX register. */ - if (dump_file) - { - fprintf (dump_file, "Delete callee pass vzeroupper:\n"); - print_rtl_single (dump_file, insn); - } - delete_insn (insn); - } - else - { - vzeroupper_insn = insn; - unchanged = false; - } - } - } - - BLOCK_INFO (bb)->state = state; - BLOCK_INFO (bb)->unchanged = unchanged; - BLOCK_INFO (bb)->scanned = true; - - if (dump_file) - fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n", - bb->index, unchanged ? "unchanged" : "changed", - state); -} - -/* Helper function for move_or_delete_vzeroupper. Process vzeroupper - in BLOCK and check its predecessor blocks. Treat UNKNOWN state - as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit - state is changed. */ - -static bool -move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused) -{ - edge e; - edge_iterator ei; - enum upper_128bits_state state, old_state, new_state; - bool seen_unknown; - - if (dump_file) - fprintf (dump_file, " Process [bb %i]: status: %d\n", - block->index, BLOCK_INFO (block)->processed); - - if (BLOCK_INFO (block)->processed) - return false; - - state = unused; - - /* Check all predecessor edges of this block. */ - seen_unknown = false; - FOR_EACH_EDGE (e, ei, block->preds) - { - if (e->src == block) - continue; - switch (BLOCK_INFO (e->src)->state) - { - case unknown: - if (!unknown_is_unused) - seen_unknown = true; - case unused: - break; - case used: - state = used; - goto done; - } - } - - if (seen_unknown) - state = unknown; - -done: - old_state = BLOCK_INFO (block)->state; - move_or_delete_vzeroupper_2 (block, state); - new_state = BLOCK_INFO (block)->state; - - if (state != unknown || new_state == used) - BLOCK_INFO (block)->processed = true; - - /* Need to rescan if the upper 128bits of AVX registers are changed - to USED at exit. */ - if (new_state != old_state) - { - if (new_state == used) - cfun->machine->rescan_vzeroupper_p = 1; - return true; - } - else - return false; -} - -/* Go through the instruction stream looking for vzeroupper. Delete - it if upper 128bit AVX registers are unused. If it isn't deleted, - move it to just before a jump insn. */ - -static void -move_or_delete_vzeroupper (void) -{ - edge e; - edge_iterator ei; - basic_block bb; - fibheap_t worklist, pending, fibheap_swap; - sbitmap visited, in_worklist, in_pending, sbitmap_swap; - int *bb_order; - int *rc_order; - int i; - - /* Set up block info for each basic block. */ - alloc_aux_for_blocks (sizeof (struct block_info_def)); - - /* Process outgoing edges of entry point. */ - if (dump_file) - fprintf (dump_file, "Process outgoing edges of entry point\n"); - - FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs) - { - move_or_delete_vzeroupper_2 (e->dest, - cfun->machine->caller_pass_avx256_p - ? used : unused); - BLOCK_INFO (e->dest)->processed = true; - } - - /* Compute reverse completion order of depth first search of the CFG - so that the data-flow runs faster. */ - rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS); - bb_order = XNEWVEC (int, last_basic_block); - pre_and_rev_post_order_compute (NULL, rc_order, false); - for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++) - bb_order[rc_order[i]] = i; - free (rc_order); - - worklist = fibheap_new (); - pending = fibheap_new (); - visited = sbitmap_alloc (last_basic_block); - in_worklist = sbitmap_alloc (last_basic_block); - in_pending = sbitmap_alloc (last_basic_block); - bitmap_clear (in_worklist); - - /* Don't check outgoing edges of entry point. */ - bitmap_ones (in_pending); - FOR_EACH_BB (bb) - if (BLOCK_INFO (bb)->processed) - bitmap_clear_bit (in_pending, bb->index); - else - { - move_or_delete_vzeroupper_1 (bb, false); - fibheap_insert (pending, bb_order[bb->index], bb); - } - - if (dump_file) - fprintf (dump_file, "Check remaining basic blocks\n"); - - while (!fibheap_empty (pending)) - { - fibheap_swap = pending; - pending = worklist; - worklist = fibheap_swap; - sbitmap_swap = in_pending; - in_pending = in_worklist; - in_worklist = sbitmap_swap; - - bitmap_clear (visited); - - cfun->machine->rescan_vzeroupper_p = 0; - - while (!fibheap_empty (worklist)) - { - bb = (basic_block) fibheap_extract_min (worklist); - bitmap_clear_bit (in_worklist, bb->index); - gcc_assert (!bitmap_bit_p (visited, bb->index)); - if (!bitmap_bit_p (visited, bb->index)) - { - edge_iterator ei; - - bitmap_set_bit (visited, bb->index); - - if (move_or_delete_vzeroupper_1 (bb, false)) - FOR_EACH_EDGE (e, ei, bb->succs) - { - if (e->dest == EXIT_BLOCK_PTR - || BLOCK_INFO (e->dest)->processed) - continue; - - if (bitmap_bit_p (visited, e->dest->index)) - { - if (!bitmap_bit_p (in_pending, e->dest->index)) - { - /* Send E->DEST to next round. */ - bitmap_set_bit (in_pending, e->dest->index); - fibheap_insert (pending, - bb_order[e->dest->index], - e->dest); - } - } - else if (!bitmap_bit_p (in_worklist, e->dest->index)) - { - /* Add E->DEST to current round. */ - bitmap_set_bit (in_worklist, e->dest->index); - fibheap_insert (worklist, bb_order[e->dest->index], - e->dest); - } - } - } - } - - if (!cfun->machine->rescan_vzeroupper_p) - break; - } - - free (bb_order); - fibheap_delete (worklist); - fibheap_delete (pending); - sbitmap_free (visited); - sbitmap_free (in_worklist); - sbitmap_free (in_pending); - - if (dump_file) - fprintf (dump_file, "Process remaining basic blocks\n"); - - FOR_EACH_BB (bb) - move_or_delete_vzeroupper_1 (bb, true); - - free_aux_for_blocks (); -} - static rtx legitimize_dllimport_symbol (rtx, bool); #ifndef CHECK_STACK_LIMIT @@ -4125,37 +3722,6 @@ ix86_option_override_internal (bool main_args_p) = build_target_option_node (); } -/* Return TRUE if VAL is passed in register with 256bit AVX modes. */ - -static bool -function_pass_avx256_p (const_rtx val) -{ - if (!val) - return false; - - if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val))) - return true; - - if (GET_CODE (val) == PARALLEL) - { - int i; - rtx r; - - for (i = XVECLEN (val, 0) - 1; i >= 0; i--) - { - r = XVECEXP (val, 0, i); - if (GET_CODE (r) == EXPR_LIST - && XEXP (r, 0) - && REG_P (XEXP (r, 0)) - && (GET_MODE (XEXP (r, 0)) == OImode - || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0))))) - return true; - } - } - - return false; -} - /* Implement the TARGET_OPTION_OVERRIDE hook. */ static void @@ -5078,15 +4644,6 @@ ix86_function_ok_for_sibcall (tree decl, tree exp) if (!rtx_equal_p (a, b)) return false; } - else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) - { - /* Disable sibcall if we need to generate vzeroupper after - callee returns. */ - if (TARGET_VZEROUPPER - && cfun->machine->callee_return_avx256_p - && !cfun->machine->caller_return_avx256_p) - return false; - } else if (!rtx_equal_p (a, b)) return false; @@ -5866,45 +5423,18 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ int caller) { struct cgraph_local_info *i; - tree fnret_type; memset (cum, 0, sizeof (*cum)); - /* Initialize for the current callee. */ - if (caller) - { - cfun->machine->callee_pass_avx256_p = false; - cfun->machine->callee_return_avx256_p = false; - } - if (fndecl) { i = cgraph_local_info (fndecl); cum->call_abi = ix86_function_abi (fndecl); - fnret_type = TREE_TYPE (TREE_TYPE (fndecl)); } else { i = NULL; cum->call_abi = ix86_function_type_abi (fntype); - if (fntype) - fnret_type = TREE_TYPE (fntype); - else - fnret_type = NULL; - } - - if (TARGET_VZEROUPPER && fnret_type) - { - rtx fnret_value = ix86_function_value (fnret_type, fntype, - false); - if (function_pass_avx256_p (fnret_value)) - { - /* The return value of this function uses 256bit AVX modes. */ - if (caller) - cfun->machine->callee_return_avx256_p = true; - else - cfun->machine->caller_return_avx256_p = true; - } } cum->caller = caller; @@ -7197,15 +6727,6 @@ ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode, else arg = function_arg_32 (cum, mode, omode, type, bytes, words); - if (TARGET_VZEROUPPER && function_pass_avx256_p (arg)) - { - /* This argument uses 256bit AVX modes. */ - if (cum->caller) - cfun->machine->callee_pass_avx256_p = true; - else - cfun->machine->caller_pass_avx256_p = true; - } - return arg; } @@ -11044,17 +10565,6 @@ ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset, } } -/* Emit vzeroupper if needed. */ - -void -ix86_maybe_emit_epilogue_vzeroupper (void) -{ - if (TARGET_VZEROUPPER - && !TREE_THIS_VOLATILE (cfun->decl) - && !cfun->machine->caller_return_avx256_p) - emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256))); -} - /* Restore function stack, frame, and registers. */ void @@ -11356,9 +10866,6 @@ ix86_expand_epilogue (int style) return; } - /* Emit vzeroupper if needed. */ - ix86_maybe_emit_epilogue_vzeroupper (); - if (crtl->args.pops_args && crtl->args.size) { rtx popc = GEN_INT (crtl->args.pops_args); @@ -15455,8 +14962,46 @@ output_387_binary_op (rtx insn, rtx *operands) /* Return needed mode for entity in optimize_mode_switching pass. */ -int -ix86_mode_needed (int entity, rtx insn) +static int +ix86_avx_u128_mode_needed (rtx insn) +{ + rtx pat = PATTERN (insn); + rtx arg; + enum upper_128bits_state state; + + if (CALL_P (insn)) + { + /* Needed mode is set to AVX_U128_CLEAN if there are + no 256bit modes used in function arguments. */ + for (arg = CALL_INSN_FUNCTION_USAGE (insn); arg; + arg = XEXP (arg, 1)) + { + if (GET_CODE (XEXP (arg, 0)) == USE) + { + rtx reg = XEXP (XEXP (arg, 0), 0); + + if (reg && REG_P (reg) + && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (reg))) + return AVX_U128_ANY; + } + } + + return AVX_U128_CLEAN; + } + + /* Check if a 256bit AVX register is referenced in stores. */ + state = unused; + note_stores (pat, check_avx256_stores, &state); + if (state == used) + return AVX_U128_DIRTY; + return AVX_U128_ANY; +} + +/* Return mode that i387 must be switched into + prior to the execution of insn. */ + +static int +ix86_i387_mode_needed (int entity, rtx insn) { enum attr_i387_cw mode; @@ -15505,11 +15050,166 @@ ix86_mode_needed (int entity, rtx insn) return I387_CW_ANY; } +/* Return mode that entity must be switched into + prior to the execution of insn. */ + +int +ix86_mode_needed (int entity, rtx insn) +{ + switch (entity) + { + case AVX_U128: + return ix86_avx_u128_mode_needed (insn); + case I387_TRUNC: + case I387_FLOOR: + case I387_CEIL: + case I387_MASK_PM: + return ix86_i387_mode_needed (entity, insn); + default: + gcc_unreachable (); + } + return 0; +} + +/* Calculate mode of upper 128bit AVX registers after the insn. */ + +static int +ix86_avx_u128_mode_after (int mode, rtx insn) +{ + rtx pat = PATTERN (insn); + rtx reg = NULL; + int i; + enum upper_128bits_state state; + + /* Check for CALL instruction. */ + if (CALL_P (insn)) + { + if (GET_CODE (pat) == SET || GET_CODE (pat) == CALL) + reg = SET_DEST (pat); + else if (GET_CODE (pat) == PARALLEL) + for (i = XVECLEN (pat, 0) - 1; i >= 0; i--) + { + rtx x = XVECEXP (pat, 0, i); + if (GET_CODE(x) == SET) + reg = SET_DEST (x); + } + /* Mode after call is set to AVX_U128_DIRTY if there are + 256bit modes used in the function return register. */ + if (reg && REG_P (reg) && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (reg))) + return AVX_U128_DIRTY; + else + return AVX_U128_CLEAN; + } + + if (vzeroupper_operation (pat, VOIDmode) + || vzeroall_operation (pat, VOIDmode)) + return AVX_U128_CLEAN; + + /* Check if a 256bit AVX register is referenced in stores. */ + state = unused; + note_stores (pat, check_avx256_stores, &state); + if (state == used) + return AVX_U128_DIRTY; + + return mode; +} + +/* Return the mode that an insn results in. */ + +int +ix86_mode_after (int entity, int mode, rtx insn) +{ + switch (entity) + { + case AVX_U128: + return ix86_avx_u128_mode_after (mode, insn); + case I387_TRUNC: + case I387_FLOOR: + case I387_CEIL: + case I387_MASK_PM: + return mode; + default: + gcc_unreachable (); + } +} + +static int +ix86_avx_u128_mode_entry (void) +{ + tree arg; + + /* Entry mode is set to AVX_U128_DIRTY if there are + 256bit modes used in function arguments. */ + for (arg = DECL_ARGUMENTS (current_function_decl); arg; + arg = TREE_CHAIN (arg)) + { + rtx reg = DECL_INCOMING_RTL (arg); + + if (reg && REG_P (reg) && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (reg))) + return AVX_U128_DIRTY; + } + + return AVX_U128_CLEAN; +} + +/* Return a mode that ENTITY is assumed to be + switched to at function entry. */ + +int +ix86_mode_entry (int entity) +{ + switch (entity) + { + case AVX_U128: + return ix86_avx_u128_mode_entry (); + case I387_TRUNC: + case I387_FLOOR: + case I387_CEIL: + case I387_MASK_PM: + return I387_CW_ANY; + default: + gcc_unreachable (); + } +} + +static int +ix86_avx_u128_mode_exit (void) +{ + rtx reg = crtl->return_rtx; + + /* Exit mode is set to AVX_U128_DIRTY if there are + 256bit modes used in the function return register. */ + if (reg && REG_P (reg) && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (reg))) + return AVX_U128_DIRTY; + + return AVX_U128_CLEAN; +} + +/* Return a mode that ENTITY is assumed to be + switched to at function exit. */ + +int +ix86_mode_exit (int entity) +{ + switch (entity) + { + case AVX_U128: + return ix86_avx_u128_mode_exit (); + case I387_TRUNC: + case I387_FLOOR: + case I387_CEIL: + case I387_MASK_PM: + return I387_CW_ANY; + default: + gcc_unreachable (); + } +} + /* Output code to initialize control word copies used by trunc?f?i and rounding patterns. CURRENT_MODE is set to current control word, while NEW_MODE is set to new control word. */ -void +static void emit_i387_cw_initialization (int mode) { rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); @@ -15596,6 +15296,30 @@ emit_i387_cw_initialization (int mode) emit_move_insn (new_mode, reg); } +/* Generate one or more insns to set ENTITY to MODE. */ + +void +ix86_emit_mode_set (int entity, int mode) +{ + switch (entity) + { + case AVX_U128: + if (mode == AVX_U128_CLEAN) + emit_insn (gen_avx_vzeroupper ()); + break; + case I387_TRUNC: + case I387_FLOOR: + case I387_CEIL: + case I387_MASK_PM: + if (mode != I387_CW_ANY + && mode != I387_CW_UNINITIALIZED) + emit_i387_cw_initialization (mode); + break; + default: + gcc_unreachable (); + } +} + /* Output code for INSN to convert a float to a signed int. OPERANDS are the insn operands. The output may be [HSD]Imode and the input operand may be [SDX]Fmode. */ @@ -23604,30 +23328,6 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, clobbered_registers[i])); } - /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */ - if (TARGET_VZEROUPPER) - { - int avx256; - if (cfun->machine->callee_pass_avx256_p) - { - if (cfun->machine->callee_return_avx256_p) - avx256 = callee_return_pass_avx256; - else - avx256 = callee_pass_avx256; - } - else if (cfun->machine->callee_return_avx256_p) - avx256 = callee_return_avx256; - else - avx256 = call_no_avx256; - - if (reload_completed) - emit_insn (gen_avx_vzeroupper (GEN_INT (avx256))); - else - vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, - gen_rtvec (1, GEN_INT (avx256)), - UNSPEC_CALL_NEEDS_VZEROUPPER); - } - if (vec_len > 1) call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec)); call = emit_call_insn (call); @@ -23637,25 +23337,6 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, return call; } -void -ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper) -{ - rtx pat = PATTERN (insn); - rtvec vec = XVEC (pat, 0); - int len = GET_NUM_ELEM (vec) - 1; - - /* Strip off the last entry of the parallel. */ - gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC); - gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER); - if (len == 1) - pat = RTVEC_ELT (vec, 0); - else - pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0))); - - emit_insn (gen_avx_vzeroupper (vzeroupper)); - emit_call_insn (pat); -} - /* Output the assembly for a call instruction. */ const char * @@ -23736,6 +23417,7 @@ ix86_init_machine_status (void) f->use_fast_prologue_epilogue_nregs = -1; f->tls_descriptor_call_expanded_p = 0; f->call_abi = ix86_abi; + f->optimize_mode_switching[AVX_U128] = TARGET_VZEROUPPER; return f; } @@ -31137,8 +30819,6 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, switch ((enum ix86_builtin_func_type) d->flag) { case VOID_FTYPE_VOID: - if (icode == CODE_FOR_avx_vzeroupper) - target = GEN_INT (vzeroupper_intrinsic); emit_insn (GEN_FCN (icode) (target)); return 0; case VOID_FTYPE_UINT64: @@ -35372,10 +35052,6 @@ ix86_reorg (void) with old MDEP_REORGS that are not CFG based. Recompute it now. */ compute_bb_for_insn (); - /* Run the vzeroupper optimization if needed. */ - if (TARGET_VZEROUPPER) - move_or_delete_vzeroupper (); - if (optimize && optimize_function_for_speed_p (cfun)) { if (TARGET_PAD_SHORT_FUNCTION) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 712d00a..67403c5 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1035,6 +1035,9 @@ enum target_cpu_default || (MODE) == V4DImode || (MODE) == V2TImode || (MODE) == V8SFmode \ || (MODE) == V4DFmode) +#define VALID_AVX256_REG_OR_OI_MODE(MODE) \ + (VALID_AVX256_REG_MODE (MODE) || (MODE) == OImode) + #define VALID_SSE2_REG_MODE(MODE) \ ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \ || (MODE) == V2DImode || (MODE) == DFmode) @@ -2141,7 +2144,8 @@ enum ix86_fpcmp_strategy { enum ix86_entity { - I387_TRUNC = 0, + AVX_U128 = 0, + I387_TRUNC, I387_FLOOR, I387_CEIL, I387_MASK_PM, @@ -2160,6 +2164,13 @@ enum ix86_stack_slot MAX_386_STACK_LOCALS }; +enum avx_u128_state +{ + AVX_U128_CLEAN, + AVX_U128_DIRTY, + AVX_U128_ANY +}; + /* Define this macro if the port needs extra instructions inserted for mode switching in an optimizing compilation. */ @@ -2175,16 +2186,34 @@ enum ix86_stack_slot refer to the mode-switched entity in question. */ #define NUM_MODES_FOR_MODE_SWITCHING \ - { I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY } + { AVX_U128_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY } /* ENTITY is an integer specifying a mode-switched entity. If `OPTIMIZE_MODE_SWITCHING' is defined, you must define this macro to return an integer value not larger than the corresponding element in `NUM_MODES_FOR_MODE_SWITCHING', to denote the mode that ENTITY - must be switched into prior to the execution of INSN. */ + must be switched into prior to the execution of INSN. */ #define MODE_NEEDED(ENTITY, I) ix86_mode_needed ((ENTITY), (I)) +/* If this macro is defined, it is evaluated for every INSN during + mode switching. It determines the mode that an insn results in (if + different from the incoming mode). */ + +#define MODE_AFTER(ENTITY, MODE, I) ix86_mode_after ((ENTITY), (MODE), (I)) + +/* If this macro is defined, it is evaluated for every ENTITY that + needs mode switching. It should evaluate to an integer, which is + a mode that ENTITY is assumed to be switched to at function entry. */ + +#define MODE_ENTRY(ENTITY) ix86_mode_entry (ENTITY) + +/* If this macro is defined, it is evaluated for every ENTITY that + needs mode switching. It should evaluate to an integer, which is + a mode that ENTITY is assumed to be switched to at function exit. */ + +#define MODE_EXIT(ENTITY) ix86_mode_exit (ENTITY) + /* This macro specifies the order in which modes for ENTITY are processed. 0 is the highest priority. */ @@ -2194,11 +2223,8 @@ enum ix86_stack_slot is the set of hard registers live at the point where the insn(s) are to be inserted. */ -#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \ - ((MODE) != I387_CW_ANY && (MODE) != I387_CW_UNINITIALIZED \ - ? emit_i387_cw_initialization (MODE), 0 \ - : 0) - +#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \ + ix86_emit_mode_set ((ENTITY), (MODE)) /* Avoid renaming of stack registers, as doing so in combination with scheduling just increases amount of live registers at time and in @@ -2299,21 +2325,6 @@ struct GTY(()) machine_function { stack below the return address. */ BOOL_BITFIELD static_chain_on_stack : 1; - /* Nonzero if caller passes 256bit AVX modes. */ - BOOL_BITFIELD caller_pass_avx256_p : 1; - - /* Nonzero if caller returns 256bit AVX modes. */ - BOOL_BITFIELD caller_return_avx256_p : 1; - - /* Nonzero if the current callee passes 256bit AVX modes. */ - BOOL_BITFIELD callee_pass_avx256_p : 1; - - /* Nonzero if the current callee returns 256bit AVX modes. */ - BOOL_BITFIELD callee_return_avx256_p : 1; - - /* Nonzero if rescan vzerouppers in the current function is needed. */ - BOOL_BITFIELD rescan_vzeroupper_p : 1; - /* During prologue/epilogue generation, the current frame state. Otherwise, the frame state at the end of the prologue. */ struct machine_frame_state fs; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 61d3ccd..f2d2cd6 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -109,7 +109,6 @@ UNSPEC_TRUNC_NOOP UNSPEC_DIV_ALREADY_SPLIT UNSPEC_MS_TO_SYSV_CALL - UNSPEC_CALL_NEEDS_VZEROUPPER UNSPEC_PAUSE UNSPEC_LEA_ADDR UNSPEC_XBEGIN_ABORT @@ -11503,18 +11502,6 @@ DONE; }) -(define_insn_and_split "*call_vzeroupper" - [(call (mem:QI (match_operand:W 0 "call_insn_operand" "zw")) - (match_operand 1)) - (unspec [(match_operand 2 "const_int_operand")] - UNSPEC_CALL_NEEDS_VZEROUPPER)] - "TARGET_VZEROUPPER && !SIBLING_CALL_P (insn)" - "#" - "&& reload_completed" - [(const_int 0)] - "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" - [(set_attr "type" "call")]) - (define_insn "*call" [(call (mem:QI (match_operand:W 0 "call_insn_operand" "zw")) (match_operand 1))] @@ -11522,31 +11509,6 @@ "* return ix86_output_call_insn (insn, operands[0]);" [(set_attr "type" "call")]) -(define_insn_and_split "*call_rex64_ms_sysv_vzeroupper" - [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rzw")) - (match_operand 1)) - (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) - (clobber (reg:TI XMM6_REG)) - (clobber (reg:TI XMM7_REG)) - (clobber (reg:TI XMM8_REG)) - (clobber (reg:TI XMM9_REG)) - (clobber (reg:TI XMM10_REG)) - (clobber (reg:TI XMM11_REG)) - (clobber (reg:TI XMM12_REG)) - (clobber (reg:TI XMM13_REG)) - (clobber (reg:TI XMM14_REG)) - (clobber (reg:TI XMM15_REG)) - (clobber (reg:DI SI_REG)) - (clobber (reg:DI DI_REG)) - (unspec [(match_operand 2 "const_int_operand")] - UNSPEC_CALL_NEEDS_VZEROUPPER)] - "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)" - "#" - "&& reload_completed" - [(const_int 0)] - "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" - [(set_attr "type" "call")]) - (define_insn "*call_rex64_ms_sysv" [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rzw")) (match_operand 1)) @@ -11567,18 +11529,6 @@ "* return ix86_output_call_insn (insn, operands[0]);" [(set_attr "type" "call")]) -(define_insn_and_split "*sibcall_vzeroupper" - [(call (mem:QI (match_operand:W 0 "sibcall_insn_operand" "Uz")) - (match_operand 1)) - (unspec [(match_operand 2 "const_int_operand")] - UNSPEC_CALL_NEEDS_VZEROUPPER)] - "TARGET_VZEROUPPER && SIBLING_CALL_P (insn)" - "#" - "&& reload_completed" - [(const_int 0)] - "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" - [(set_attr "type" "call")]) - (define_insn "*sibcall" [(call (mem:QI (match_operand:W 0 "sibcall_insn_operand" "Uz")) (match_operand 1))] @@ -11599,21 +11549,6 @@ DONE; }) -(define_insn_and_split "*call_pop_vzeroupper" - [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lzm")) - (match_operand 1)) - (set (reg:SI SP_REG) - (plus:SI (reg:SI SP_REG) - (match_operand:SI 2 "immediate_operand" "i"))) - (unspec [(match_operand 3 "const_int_operand")] - UNSPEC_CALL_NEEDS_VZEROUPPER)] - "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)" - "#" - "&& reload_completed" - [(const_int 0)] - "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" - [(set_attr "type" "call")]) - (define_insn "*call_pop" [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lzm")) (match_operand 1)) @@ -11624,21 +11559,6 @@ "* return ix86_output_call_insn (insn, operands[0]);" [(set_attr "type" "call")]) -(define_insn_and_split "*sibcall_pop_vzeroupper" - [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "Uz")) - (match_operand 1)) - (set (reg:SI SP_REG) - (plus:SI (reg:SI SP_REG) - (match_operand:SI 2 "immediate_operand" "i"))) - (unspec [(match_operand 3 "const_int_operand")] - UNSPEC_CALL_NEEDS_VZEROUPPER)] - "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)" - "#" - "&& reload_completed" - [(const_int 0)] - "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" - [(set_attr "type" "call")]) - (define_insn "*sibcall_pop" [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "Uz")) (match_operand 1)) @@ -11675,19 +11595,6 @@ DONE; }) -(define_insn_and_split "*call_value_vzeroupper" - [(set (match_operand 0) - (call (mem:QI (match_operand:W 1 "call_insn_operand" "zw")) - (match_operand 2))) - (unspec [(match_operand 3 "const_int_operand")] - UNSPEC_CALL_NEEDS_VZEROUPPER)] - "TARGET_VZEROUPPER && !SIBLING_CALL_P (insn)" - "#" - "&& reload_completed" - [(const_int 0)] - "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" - [(set_attr "type" "callv")]) - (define_insn "*call_value" [(set (match_operand 0) (call (mem:QI (match_operand:W 1 "call_insn_operand" "zw")) @@ -11696,19 +11603,6 @@ "* return ix86_output_call_insn (insn, operands[1]);" [(set_attr "type" "callv")]) -(define_insn_and_split "*sibcall_value_vzeroupper" - [(set (match_operand 0) - (call (mem:QI (match_operand:W 1 "sibcall_insn_operand" "Uz")) - (match_operand 2))) - (unspec [(match_operand 3 "const_int_operand")] - UNSPEC_CALL_NEEDS_VZEROUPPER)] - "TARGET_VZEROUPPER && SIBLING_CALL_P (insn)" - "#" - "&& reload_completed" - [(const_int 0)] - "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" - [(set_attr "type" "callv")]) - (define_insn "*sibcall_value" [(set (match_operand 0) (call (mem:QI (match_operand:W 1 "sibcall_insn_operand" "Uz")) @@ -11717,32 +11611,6 @@ "* return ix86_output_call_insn (insn, operands[1]);" [(set_attr "type" "callv")]) -(define_insn_and_split "*call_value_rex64_ms_sysv_vzeroupper" - [(set (match_operand 0) - (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rzw")) - (match_operand 2))) - (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) - (clobber (reg:TI XMM6_REG)) - (clobber (reg:TI XMM7_REG)) - (clobber (reg:TI XMM8_REG)) - (clobber (reg:TI XMM9_REG)) - (clobber (reg:TI XMM10_REG)) - (clobber (reg:TI XMM11_REG)) - (clobber (reg:TI XMM12_REG)) - (clobber (reg:TI XMM13_REG)) - (clobber (reg:TI XMM14_REG)) - (clobber (reg:TI XMM15_REG)) - (clobber (reg:DI SI_REG)) - (clobber (reg:DI DI_REG)) - (unspec [(match_operand 3 "const_int_operand")] - UNSPEC_CALL_NEEDS_VZEROUPPER)] - "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)" - "#" - "&& reload_completed" - [(const_int 0)] - "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" - [(set_attr "type" "callv")]) - (define_insn "*call_value_rex64_ms_sysv" [(set (match_operand 0) (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rzw")) @@ -11778,22 +11646,6 @@ DONE; }) -(define_insn_and_split "*call_value_pop_vzeroupper" - [(set (match_operand 0) - (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lzm")) - (match_operand 2))) - (set (reg:SI SP_REG) - (plus:SI (reg:SI SP_REG) - (match_operand:SI 3 "immediate_operand" "i"))) - (unspec [(match_operand 4 "const_int_operand")] - UNSPEC_CALL_NEEDS_VZEROUPPER)] - "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)" - "#" - "&& reload_completed" - [(const_int 0)] - "ix86_split_call_vzeroupper (curr_insn, operands[4]); DONE;" - [(set_attr "type" "callv")]) - (define_insn "*call_value_pop" [(set (match_operand 0) (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lzm")) @@ -11805,22 +11657,6 @@ "* return ix86_output_call_insn (insn, operands[1]);" [(set_attr "type" "callv")]) -(define_insn_and_split "*sibcall_value_pop_vzeroupper" - [(set (match_operand 0) - (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "Uz")) - (match_operand 2))) - (set (reg:SI SP_REG) - (plus:SI (reg:SI SP_REG) - (match_operand:SI 3 "immediate_operand" "i"))) - (unspec [(match_operand 4 "const_int_operand")] - UNSPEC_CALL_NEEDS_VZEROUPPER)] - "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)" - "#" - "&& reload_completed" - [(const_int 0)] - "ix86_split_call_vzeroupper (curr_insn, operands[4]); DONE;" - [(set_attr "type" "callv")]) - (define_insn "*sibcall_value_pop" [(set (match_operand 0) (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "Uz")) @@ -11922,7 +11758,6 @@ [(simple_return)] "ix86_can_use_return_insn_p ()" { - ix86_maybe_emit_epilogue_vzeroupper (); if (crtl->args.pops_args) { rtx popc = GEN_INT (crtl->args.pops_args); @@ -11939,7 +11774,6 @@ [(simple_return)] "!TARGET_SEH" { - ix86_maybe_emit_epilogue_vzeroupper (); if (crtl->args.pops_args) { rtx popc = GEN_INT (crtl->args.pops_args); diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 830c740f2..e108553 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1229,6 +1229,13 @@ return true; }) +;; return true if OP is a vzeroupper operation. +(define_predicate "vzeroupper_operation" + (match_code "unspec_volatile") +{ + return XINT (op, 1) == UNSPECV_VZEROUPPER; +}) + ;; Return true if OP is a parallel for a vbroadcast permute. (define_predicate "avx_vbroadcast_operand" diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index d6cf7f3..33d7b6b 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -10452,8 +10452,7 @@ ;; Clear the upper 128bits of AVX registers, equivalent to a NOP ;; if the upper 128bits are unused. (define_insn "avx_vzeroupper" - [(unspec_volatile [(match_operand 0 "const_int_operand")] - UNSPECV_VZEROUPPER)] + [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)] "TARGET_AVX" "vzeroupper" [(set_attr "type" "sse") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c4d388d..2108e77 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,14 @@ +2012-11-06 Vladimir Yakovlev + + * gcc.target/i386/avx-vzeroupper-5.c: Changed scan-assembler-times. + * gcc.target/i386/avx-vzeroupper-8.c: Likewise. + * gcc.target/i386/avx-vzeroupper-9.c: Likewise. + * gcc.target/i386/avx-vzeroupper-10.c: Likewise. + * gcc.target/i386/avx-vzeroupper-11.c: Likewise. + * gcc.target/i386/avx-vzeroupper-12.c: Likewise. + * gcc.target/i386/avx-vzeroupper-19.c: Likewis. + * gcc.target/i386/avx-vzeroupper-27.c: New. + 2012-11-06 Janus Weil PR fortran/54917 diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-10.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-10.c index 667bb17..5007753 100644 --- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-10.c +++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-10.c @@ -14,4 +14,4 @@ foo () _mm256_zeroupper (); } -/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */ +/* { dg-final { scan-assembler-times "avx_vzeroupper" 3 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-11.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-11.c index d98ceb9..507f945 100644 --- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-11.c +++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-11.c @@ -16,4 +16,4 @@ foo () } /* { dg-final { scan-assembler-times "\\*avx_vzeroall" 1 } } */ -/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */ +/* { dg-final { scan-assembler-times "avx_vzeroupper" 3 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c index f74ea0c..e694d40 100644 --- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c +++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c @@ -16,5 +16,5 @@ foo () _mm256_zeroupper (); } -/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */ +/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */ /* { dg-final { scan-assembler-times "\\*avx_vzeroall" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-19.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-19.c index 602de87..ae2f861 100644 --- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-19.c +++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-19.c @@ -14,4 +14,4 @@ void feat_s3_cep_dcep (int cepsize_used, float **mfc, float **feat) f[i] = w[i] - _w[i]; } -/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */ +/* { dg-final { scan-assembler-times "avx_vzeroupper" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-27.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-27.c new file mode 100644 index 0000000..7fa5de4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-27.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx -mtune=generic -dp" } */ + +typedef struct objc_class *Class; +typedef struct objc_object +{ + Class class_pointer; +} *id; + +typedef const struct objc_selector *SEL; +typedef void * retval_t; +typedef void * arglist_t; + +extern retval_t __objc_forward (id object, SEL sel, arglist_t args); + +double +__objc_double_forward (id rcv, SEL op, ...) +{ + void *args, *res; + + args = __builtin_apply_args (); + res = __objc_forward (rcv, op, args); + __builtin_return (res); +} + +/* { dg-final { scan-assembler-times "avx_vzeroupper" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-5.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-5.c index 0f54602..ba08978 100644 --- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-5.c +++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-5.c @@ -14,4 +14,4 @@ foo () _mm256_zeroupper (); } -/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */ +/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-8.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-8.c index 0a821c2..bb370c5 100644 --- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-8.c +++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-8.c @@ -13,4 +13,4 @@ foo () _mm256_zeroupper (); } -/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */ +/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c index 5aa05b8..974e162 100644 --- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c +++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c @@ -15,4 +15,4 @@ foo () _mm256_zeroupper (); } -/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */ +/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */ -- 2.7.4