1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
55 #include "tm-constrs.h"
59 #include "sched-int.h"
63 #include "diagnostic.h"
66 enum upper_128bits_state
73 typedef struct block_info_def
75 /* State of the upper 128bits of AVX registers at exit. */
76 enum upper_128bits_state state;
77 /* TRUE if state of the upper 128bits of AVX registers is unchanged
80 /* TRUE if block has been processed. */
82 /* TRUE if block has been scanned. */
84 /* Previous state of the upper 128bits of AVX registers at entry. */
85 enum upper_128bits_state prev;
88 #define BLOCK_INFO(B) ((block_info) (B)->aux)
90 enum call_avx256_state
92 /* Callee returns 256bit AVX register. */
93 callee_return_avx256 = -1,
94 /* Callee returns and passes 256bit AVX register. */
95 callee_return_pass_avx256,
96 /* Callee passes 256bit AVX register. */
98 /* Callee doesn't return nor passe 256bit AVX register, or no
99 256bit AVX register in function return. */
101 /* vzeroupper intrinsic. */
105 /* Check if a 256bit AVX register is referenced in stores. */
108 check_avx256_stores (rtx dest, const_rtx set, void *data)
111 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
112 || (GET_CODE (set) == SET
113 && REG_P (SET_SRC (set))
114 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
116 enum upper_128bits_state *state
117 = (enum upper_128bits_state *) data;
122 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
123 in basic block BB. Delete it if upper 128bit AVX registers are
124 unused. If it isn't deleted, move it to just before a jump insn.
126 STATE is state of the upper 128bits of AVX registers at entry. */
129 move_or_delete_vzeroupper_2 (basic_block bb,
130 enum upper_128bits_state state)
133 rtx vzeroupper_insn = NULL_RTX;
138 if (BLOCK_INFO (bb)->unchanged)
141 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
144 BLOCK_INFO (bb)->state = state;
148 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
151 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
152 bb->index, BLOCK_INFO (bb)->state);
156 BLOCK_INFO (bb)->prev = state;
159 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
164 /* BB_END changes when it is deleted. */
165 bb_end = BB_END (bb);
167 while (insn != bb_end)
169 insn = NEXT_INSN (insn);
171 if (!NONDEBUG_INSN_P (insn))
174 /* Move vzeroupper before jump/call. */
175 if (JUMP_P (insn) || CALL_P (insn))
177 if (!vzeroupper_insn)
180 if (PREV_INSN (insn) != vzeroupper_insn)
184 fprintf (dump_file, "Move vzeroupper after:\n");
185 print_rtl_single (dump_file, PREV_INSN (insn));
186 fprintf (dump_file, "before:\n");
187 print_rtl_single (dump_file, insn);
189 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
192 vzeroupper_insn = NULL_RTX;
196 pat = PATTERN (insn);
198 /* Check insn for vzeroupper intrinsic. */
199 if (GET_CODE (pat) == UNSPEC_VOLATILE
200 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
204 /* Found vzeroupper intrinsic. */
205 fprintf (dump_file, "Found vzeroupper:\n");
206 print_rtl_single (dump_file, insn);
211 /* Check insn for vzeroall intrinsic. */
212 if (GET_CODE (pat) == PARALLEL
213 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
214 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
219 /* Delete pending vzeroupper insertion. */
222 delete_insn (vzeroupper_insn);
223 vzeroupper_insn = NULL_RTX;
226 else if (state != used)
228 note_stores (pat, check_avx256_stores, &state);
235 /* Process vzeroupper intrinsic. */
236 avx256 = INTVAL (XVECEXP (pat, 0, 0));
240 /* Since the upper 128bits are cleared, callee must not pass
241 256bit AVX register. We only need to check if callee
242 returns 256bit AVX register. */
243 if (avx256 == callee_return_avx256)
249 /* Remove unnecessary vzeroupper since upper 128bits are
253 fprintf (dump_file, "Delete redundant vzeroupper:\n");
254 print_rtl_single (dump_file, insn);
260 /* Set state to UNUSED if callee doesn't return 256bit AVX
262 if (avx256 != callee_return_pass_avx256)
265 if (avx256 == callee_return_pass_avx256
266 || avx256 == callee_pass_avx256)
268 /* Must remove vzeroupper since callee passes in 256bit
272 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
273 print_rtl_single (dump_file, insn);
279 vzeroupper_insn = insn;
285 BLOCK_INFO (bb)->state = state;
286 BLOCK_INFO (bb)->unchanged = unchanged;
287 BLOCK_INFO (bb)->scanned = true;
290 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
291 bb->index, unchanged ? "unchanged" : "changed",
295 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
296 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
297 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
301 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
305 enum upper_128bits_state state, old_state, new_state;
309 fprintf (dump_file, " Process [bb %i]: status: %d\n",
310 block->index, BLOCK_INFO (block)->processed);
312 if (BLOCK_INFO (block)->processed)
317 /* Check all predecessor edges of this block. */
318 seen_unknown = false;
319 FOR_EACH_EDGE (e, ei, block->preds)
323 switch (BLOCK_INFO (e->src)->state)
326 if (!unknown_is_unused)
340 old_state = BLOCK_INFO (block)->state;
341 move_or_delete_vzeroupper_2 (block, state);
342 new_state = BLOCK_INFO (block)->state;
344 if (state != unknown || new_state == used)
345 BLOCK_INFO (block)->processed = true;
347 /* Need to rescan if the upper 128bits of AVX registers are changed
349 if (new_state != old_state)
351 if (new_state == used)
352 cfun->machine->rescan_vzeroupper_p = 1;
359 /* Go through the instruction stream looking for vzeroupper. Delete
360 it if upper 128bit AVX registers are unused. If it isn't deleted,
361 move it to just before a jump insn. */
364 move_or_delete_vzeroupper (void)
369 fibheap_t worklist, pending, fibheap_swap;
370 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
375 /* Set up block info for each basic block. */
376 alloc_aux_for_blocks (sizeof (struct block_info_def));
378 /* Process outgoing edges of entry point. */
380 fprintf (dump_file, "Process outgoing edges of entry point\n");
382 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
384 move_or_delete_vzeroupper_2 (e->dest,
385 cfun->machine->caller_pass_avx256_p
387 BLOCK_INFO (e->dest)->processed = true;
390 /* Compute reverse completion order of depth first search of the CFG
391 so that the data-flow runs faster. */
392 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
393 bb_order = XNEWVEC (int, last_basic_block);
394 pre_and_rev_post_order_compute (NULL, rc_order, false);
395 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
396 bb_order[rc_order[i]] = i;
399 worklist = fibheap_new ();
400 pending = fibheap_new ();
401 visited = sbitmap_alloc (last_basic_block);
402 in_worklist = sbitmap_alloc (last_basic_block);
403 in_pending = sbitmap_alloc (last_basic_block);
404 bitmap_clear (in_worklist);
406 /* Don't check outgoing edges of entry point. */
407 bitmap_ones (in_pending);
409 if (BLOCK_INFO (bb)->processed)
410 RESET_BIT (in_pending, bb->index);
413 move_or_delete_vzeroupper_1 (bb, false);
414 fibheap_insert (pending, bb_order[bb->index], bb);
418 fprintf (dump_file, "Check remaining basic blocks\n");
420 while (!fibheap_empty (pending))
422 fibheap_swap = pending;
424 worklist = fibheap_swap;
425 sbitmap_swap = in_pending;
426 in_pending = in_worklist;
427 in_worklist = sbitmap_swap;
429 bitmap_clear (visited);
431 cfun->machine->rescan_vzeroupper_p = 0;
433 while (!fibheap_empty (worklist))
435 bb = (basic_block) fibheap_extract_min (worklist);
436 RESET_BIT (in_worklist, bb->index);
437 gcc_assert (!TEST_BIT (visited, bb->index));
438 if (!TEST_BIT (visited, bb->index))
442 SET_BIT (visited, bb->index);
444 if (move_or_delete_vzeroupper_1 (bb, false))
445 FOR_EACH_EDGE (e, ei, bb->succs)
447 if (e->dest == EXIT_BLOCK_PTR
448 || BLOCK_INFO (e->dest)->processed)
451 if (TEST_BIT (visited, e->dest->index))
453 if (!TEST_BIT (in_pending, e->dest->index))
455 /* Send E->DEST to next round. */
456 SET_BIT (in_pending, e->dest->index);
457 fibheap_insert (pending,
458 bb_order[e->dest->index],
462 else if (!TEST_BIT (in_worklist, e->dest->index))
464 /* Add E->DEST to current round. */
465 SET_BIT (in_worklist, e->dest->index);
466 fibheap_insert (worklist, bb_order[e->dest->index],
473 if (!cfun->machine->rescan_vzeroupper_p)
478 fibheap_delete (worklist);
479 fibheap_delete (pending);
480 sbitmap_free (visited);
481 sbitmap_free (in_worklist);
482 sbitmap_free (in_pending);
485 fprintf (dump_file, "Process remaining basic blocks\n");
488 move_or_delete_vzeroupper_1 (bb, true);
490 free_aux_for_blocks ();
493 static rtx legitimize_dllimport_symbol (rtx, bool);
495 #ifndef CHECK_STACK_LIMIT
496 #define CHECK_STACK_LIMIT (-1)
499 /* Return index of given mode in mult and division cost tables. */
500 #define MODE_INDEX(mode) \
501 ((mode) == QImode ? 0 \
502 : (mode) == HImode ? 1 \
503 : (mode) == SImode ? 2 \
504 : (mode) == DImode ? 3 \
507 /* Processor costs (relative to an add) */
508 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
509 #define COSTS_N_BYTES(N) ((N) * 2)
511 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
514 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
515 COSTS_N_BYTES (2), /* cost of an add instruction */
516 COSTS_N_BYTES (3), /* cost of a lea instruction */
517 COSTS_N_BYTES (2), /* variable shift costs */
518 COSTS_N_BYTES (3), /* constant shift costs */
519 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
520 COSTS_N_BYTES (3), /* HI */
521 COSTS_N_BYTES (3), /* SI */
522 COSTS_N_BYTES (3), /* DI */
523 COSTS_N_BYTES (5)}, /* other */
524 0, /* cost of multiply per each bit set */
525 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
526 COSTS_N_BYTES (3), /* HI */
527 COSTS_N_BYTES (3), /* SI */
528 COSTS_N_BYTES (3), /* DI */
529 COSTS_N_BYTES (5)}, /* other */
530 COSTS_N_BYTES (3), /* cost of movsx */
531 COSTS_N_BYTES (3), /* cost of movzx */
532 0, /* "large" insn */
534 2, /* cost for loading QImode using movzbl */
535 {2, 2, 2}, /* cost of loading integer registers
536 in QImode, HImode and SImode.
537 Relative to reg-reg move (2). */
538 {2, 2, 2}, /* cost of storing integer registers */
539 2, /* cost of reg,reg fld/fst */
540 {2, 2, 2}, /* cost of loading fp registers
541 in SFmode, DFmode and XFmode */
542 {2, 2, 2}, /* cost of storing fp registers
543 in SFmode, DFmode and XFmode */
544 3, /* cost of moving MMX register */
545 {3, 3}, /* cost of loading MMX registers
546 in SImode and DImode */
547 {3, 3}, /* cost of storing MMX registers
548 in SImode and DImode */
549 3, /* cost of moving SSE register */
550 {3, 3, 3}, /* cost of loading SSE registers
551 in SImode, DImode and TImode */
552 {3, 3, 3}, /* cost of storing SSE registers
553 in SImode, DImode and TImode */
554 3, /* MMX or SSE register to integer */
555 0, /* size of l1 cache */
556 0, /* size of l2 cache */
557 0, /* size of prefetch block */
558 0, /* number of parallel prefetches */
560 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
562 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
563 COSTS_N_BYTES (2), /* cost of FABS instruction. */
564 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
565 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
566 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
567 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
568 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
569 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 1, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 1, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
583 /* Processor costs (relative to an add) */
585 struct processor_costs i386_cost = { /* 386 specific costs */
586 COSTS_N_INSNS (1), /* cost of an add instruction */
587 COSTS_N_INSNS (1), /* cost of a lea instruction */
588 COSTS_N_INSNS (3), /* variable shift costs */
589 COSTS_N_INSNS (2), /* constant shift costs */
590 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
591 COSTS_N_INSNS (6), /* HI */
592 COSTS_N_INSNS (6), /* SI */
593 COSTS_N_INSNS (6), /* DI */
594 COSTS_N_INSNS (6)}, /* other */
595 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
596 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
597 COSTS_N_INSNS (23), /* HI */
598 COSTS_N_INSNS (23), /* SI */
599 COSTS_N_INSNS (23), /* DI */
600 COSTS_N_INSNS (23)}, /* other */
601 COSTS_N_INSNS (3), /* cost of movsx */
602 COSTS_N_INSNS (2), /* cost of movzx */
603 15, /* "large" insn */
605 4, /* cost for loading QImode using movzbl */
606 {2, 4, 2}, /* cost of loading integer registers
607 in QImode, HImode and SImode.
608 Relative to reg-reg move (2). */
609 {2, 4, 2}, /* cost of storing integer registers */
610 2, /* cost of reg,reg fld/fst */
611 {8, 8, 8}, /* cost of loading fp registers
612 in SFmode, DFmode and XFmode */
613 {8, 8, 8}, /* cost of storing fp registers
614 in SFmode, DFmode and XFmode */
615 2, /* cost of moving MMX register */
616 {4, 8}, /* cost of loading MMX registers
617 in SImode and DImode */
618 {4, 8}, /* cost of storing MMX registers
619 in SImode and DImode */
620 2, /* cost of moving SSE register */
621 {4, 8, 16}, /* cost of loading SSE registers
622 in SImode, DImode and TImode */
623 {4, 8, 16}, /* cost of storing SSE registers
624 in SImode, DImode and TImode */
625 3, /* MMX or SSE register to integer */
626 0, /* size of l1 cache */
627 0, /* size of l2 cache */
628 0, /* size of prefetch block */
629 0, /* number of parallel prefetches */
631 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
632 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
633 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
634 COSTS_N_INSNS (22), /* cost of FABS instruction. */
635 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
636 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
637 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
638 DUMMY_STRINGOP_ALGS},
639 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
640 DUMMY_STRINGOP_ALGS},
641 1, /* scalar_stmt_cost. */
642 1, /* scalar load_cost. */
643 1, /* scalar_store_cost. */
644 1, /* vec_stmt_cost. */
645 1, /* vec_to_scalar_cost. */
646 1, /* scalar_to_vec_cost. */
647 1, /* vec_align_load_cost. */
648 2, /* vec_unalign_load_cost. */
649 1, /* vec_store_cost. */
650 3, /* cond_taken_branch_cost. */
651 1, /* cond_not_taken_branch_cost. */
655 struct processor_costs i486_cost = { /* 486 specific costs */
656 COSTS_N_INSNS (1), /* cost of an add instruction */
657 COSTS_N_INSNS (1), /* cost of a lea instruction */
658 COSTS_N_INSNS (3), /* variable shift costs */
659 COSTS_N_INSNS (2), /* constant shift costs */
660 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
661 COSTS_N_INSNS (12), /* HI */
662 COSTS_N_INSNS (12), /* SI */
663 COSTS_N_INSNS (12), /* DI */
664 COSTS_N_INSNS (12)}, /* other */
665 1, /* cost of multiply per each bit set */
666 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
667 COSTS_N_INSNS (40), /* HI */
668 COSTS_N_INSNS (40), /* SI */
669 COSTS_N_INSNS (40), /* DI */
670 COSTS_N_INSNS (40)}, /* other */
671 COSTS_N_INSNS (3), /* cost of movsx */
672 COSTS_N_INSNS (2), /* cost of movzx */
673 15, /* "large" insn */
675 4, /* cost for loading QImode using movzbl */
676 {2, 4, 2}, /* cost of loading integer registers
677 in QImode, HImode and SImode.
678 Relative to reg-reg move (2). */
679 {2, 4, 2}, /* cost of storing integer registers */
680 2, /* cost of reg,reg fld/fst */
681 {8, 8, 8}, /* cost of loading fp registers
682 in SFmode, DFmode and XFmode */
683 {8, 8, 8}, /* cost of storing fp registers
684 in SFmode, DFmode and XFmode */
685 2, /* cost of moving MMX register */
686 {4, 8}, /* cost of loading MMX registers
687 in SImode and DImode */
688 {4, 8}, /* cost of storing MMX registers
689 in SImode and DImode */
690 2, /* cost of moving SSE register */
691 {4, 8, 16}, /* cost of loading SSE registers
692 in SImode, DImode and TImode */
693 {4, 8, 16}, /* cost of storing SSE registers
694 in SImode, DImode and TImode */
695 3, /* MMX or SSE register to integer */
696 4, /* size of l1 cache. 486 has 8kB cache
697 shared for code and data, so 4kB is
698 not really precise. */
699 4, /* size of l2 cache */
700 0, /* size of prefetch block */
701 0, /* number of parallel prefetches */
703 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
704 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
705 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
706 COSTS_N_INSNS (3), /* cost of FABS instruction. */
707 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
708 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
709 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
710 DUMMY_STRINGOP_ALGS},
711 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
712 DUMMY_STRINGOP_ALGS},
713 1, /* scalar_stmt_cost. */
714 1, /* scalar load_cost. */
715 1, /* scalar_store_cost. */
716 1, /* vec_stmt_cost. */
717 1, /* vec_to_scalar_cost. */
718 1, /* scalar_to_vec_cost. */
719 1, /* vec_align_load_cost. */
720 2, /* vec_unalign_load_cost. */
721 1, /* vec_store_cost. */
722 3, /* cond_taken_branch_cost. */
723 1, /* cond_not_taken_branch_cost. */
727 struct processor_costs pentium_cost = {
728 COSTS_N_INSNS (1), /* cost of an add instruction */
729 COSTS_N_INSNS (1), /* cost of a lea instruction */
730 COSTS_N_INSNS (4), /* variable shift costs */
731 COSTS_N_INSNS (1), /* constant shift costs */
732 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
733 COSTS_N_INSNS (11), /* HI */
734 COSTS_N_INSNS (11), /* SI */
735 COSTS_N_INSNS (11), /* DI */
736 COSTS_N_INSNS (11)}, /* other */
737 0, /* cost of multiply per each bit set */
738 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
739 COSTS_N_INSNS (25), /* HI */
740 COSTS_N_INSNS (25), /* SI */
741 COSTS_N_INSNS (25), /* DI */
742 COSTS_N_INSNS (25)}, /* other */
743 COSTS_N_INSNS (3), /* cost of movsx */
744 COSTS_N_INSNS (2), /* cost of movzx */
745 8, /* "large" insn */
747 6, /* cost for loading QImode using movzbl */
748 {2, 4, 2}, /* cost of loading integer registers
749 in QImode, HImode and SImode.
750 Relative to reg-reg move (2). */
751 {2, 4, 2}, /* cost of storing integer registers */
752 2, /* cost of reg,reg fld/fst */
753 {2, 2, 6}, /* cost of loading fp registers
754 in SFmode, DFmode and XFmode */
755 {4, 4, 6}, /* cost of storing fp registers
756 in SFmode, DFmode and XFmode */
757 8, /* cost of moving MMX register */
758 {8, 8}, /* cost of loading MMX registers
759 in SImode and DImode */
760 {8, 8}, /* cost of storing MMX registers
761 in SImode and DImode */
762 2, /* cost of moving SSE register */
763 {4, 8, 16}, /* cost of loading SSE registers
764 in SImode, DImode and TImode */
765 {4, 8, 16}, /* cost of storing SSE registers
766 in SImode, DImode and TImode */
767 3, /* MMX or SSE register to integer */
768 8, /* size of l1 cache. */
769 8, /* size of l2 cache */
770 0, /* size of prefetch block */
771 0, /* number of parallel prefetches */
773 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
774 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
775 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
776 COSTS_N_INSNS (1), /* cost of FABS instruction. */
777 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
778 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
779 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
780 DUMMY_STRINGOP_ALGS},
781 {{libcall, {{-1, rep_prefix_4_byte}}},
782 DUMMY_STRINGOP_ALGS},
783 1, /* scalar_stmt_cost. */
784 1, /* scalar load_cost. */
785 1, /* scalar_store_cost. */
786 1, /* vec_stmt_cost. */
787 1, /* vec_to_scalar_cost. */
788 1, /* scalar_to_vec_cost. */
789 1, /* vec_align_load_cost. */
790 2, /* vec_unalign_load_cost. */
791 1, /* vec_store_cost. */
792 3, /* cond_taken_branch_cost. */
793 1, /* cond_not_taken_branch_cost. */
797 struct processor_costs pentiumpro_cost = {
798 COSTS_N_INSNS (1), /* cost of an add instruction */
799 COSTS_N_INSNS (1), /* cost of a lea instruction */
800 COSTS_N_INSNS (1), /* variable shift costs */
801 COSTS_N_INSNS (1), /* constant shift costs */
802 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
803 COSTS_N_INSNS (4), /* HI */
804 COSTS_N_INSNS (4), /* SI */
805 COSTS_N_INSNS (4), /* DI */
806 COSTS_N_INSNS (4)}, /* other */
807 0, /* cost of multiply per each bit set */
808 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
809 COSTS_N_INSNS (17), /* HI */
810 COSTS_N_INSNS (17), /* SI */
811 COSTS_N_INSNS (17), /* DI */
812 COSTS_N_INSNS (17)}, /* other */
813 COSTS_N_INSNS (1), /* cost of movsx */
814 COSTS_N_INSNS (1), /* cost of movzx */
815 8, /* "large" insn */
817 2, /* cost for loading QImode using movzbl */
818 {4, 4, 4}, /* cost of loading integer registers
819 in QImode, HImode and SImode.
820 Relative to reg-reg move (2). */
821 {2, 2, 2}, /* cost of storing integer registers */
822 2, /* cost of reg,reg fld/fst */
823 {2, 2, 6}, /* cost of loading fp registers
824 in SFmode, DFmode and XFmode */
825 {4, 4, 6}, /* cost of storing fp registers
826 in SFmode, DFmode and XFmode */
827 2, /* cost of moving MMX register */
828 {2, 2}, /* cost of loading MMX registers
829 in SImode and DImode */
830 {2, 2}, /* cost of storing MMX registers
831 in SImode and DImode */
832 2, /* cost of moving SSE register */
833 {2, 2, 8}, /* cost of loading SSE registers
834 in SImode, DImode and TImode */
835 {2, 2, 8}, /* cost of storing SSE registers
836 in SImode, DImode and TImode */
837 3, /* MMX or SSE register to integer */
838 8, /* size of l1 cache. */
839 256, /* size of l2 cache */
840 32, /* size of prefetch block */
841 6, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (2), /* cost of FABS instruction. */
847 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
849 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
850 (we ensure the alignment). For small blocks inline loop is still a
851 noticeable win, for bigger blocks either rep movsl or rep movsb is
852 way to go. Rep movsb has apparently more expensive startup time in CPU,
853 but after 4K the difference is down in the noise. */
854 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
855 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
856 DUMMY_STRINGOP_ALGS},
857 {{rep_prefix_4_byte, {{1024, unrolled_loop},
858 {8192, rep_prefix_4_byte}, {-1, libcall}}},
859 DUMMY_STRINGOP_ALGS},
860 1, /* scalar_stmt_cost. */
861 1, /* scalar load_cost. */
862 1, /* scalar_store_cost. */
863 1, /* vec_stmt_cost. */
864 1, /* vec_to_scalar_cost. */
865 1, /* scalar_to_vec_cost. */
866 1, /* vec_align_load_cost. */
867 2, /* vec_unalign_load_cost. */
868 1, /* vec_store_cost. */
869 3, /* cond_taken_branch_cost. */
870 1, /* cond_not_taken_branch_cost. */
874 struct processor_costs geode_cost = {
875 COSTS_N_INSNS (1), /* cost of an add instruction */
876 COSTS_N_INSNS (1), /* cost of a lea instruction */
877 COSTS_N_INSNS (2), /* variable shift costs */
878 COSTS_N_INSNS (1), /* constant shift costs */
879 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
880 COSTS_N_INSNS (4), /* HI */
881 COSTS_N_INSNS (7), /* SI */
882 COSTS_N_INSNS (7), /* DI */
883 COSTS_N_INSNS (7)}, /* other */
884 0, /* cost of multiply per each bit set */
885 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
886 COSTS_N_INSNS (23), /* HI */
887 COSTS_N_INSNS (39), /* SI */
888 COSTS_N_INSNS (39), /* DI */
889 COSTS_N_INSNS (39)}, /* other */
890 COSTS_N_INSNS (1), /* cost of movsx */
891 COSTS_N_INSNS (1), /* cost of movzx */
892 8, /* "large" insn */
894 1, /* cost for loading QImode using movzbl */
895 {1, 1, 1}, /* cost of loading integer registers
896 in QImode, HImode and SImode.
897 Relative to reg-reg move (2). */
898 {1, 1, 1}, /* cost of storing integer registers */
899 1, /* cost of reg,reg fld/fst */
900 {1, 1, 1}, /* cost of loading fp registers
901 in SFmode, DFmode and XFmode */
902 {4, 6, 6}, /* cost of storing fp registers
903 in SFmode, DFmode and XFmode */
905 1, /* cost of moving MMX register */
906 {1, 1}, /* cost of loading MMX registers
907 in SImode and DImode */
908 {1, 1}, /* cost of storing MMX registers
909 in SImode and DImode */
910 1, /* cost of moving SSE register */
911 {1, 1, 1}, /* cost of loading SSE registers
912 in SImode, DImode and TImode */
913 {1, 1, 1}, /* cost of storing SSE registers
914 in SImode, DImode and TImode */
915 1, /* MMX or SSE register to integer */
916 64, /* size of l1 cache. */
917 128, /* size of l2 cache. */
918 32, /* size of prefetch block */
919 1, /* number of parallel prefetches */
921 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
922 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
923 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
924 COSTS_N_INSNS (1), /* cost of FABS instruction. */
925 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
926 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
927 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
928 DUMMY_STRINGOP_ALGS},
929 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
930 DUMMY_STRINGOP_ALGS},
931 1, /* scalar_stmt_cost. */
932 1, /* scalar load_cost. */
933 1, /* scalar_store_cost. */
934 1, /* vec_stmt_cost. */
935 1, /* vec_to_scalar_cost. */
936 1, /* scalar_to_vec_cost. */
937 1, /* vec_align_load_cost. */
938 2, /* vec_unalign_load_cost. */
939 1, /* vec_store_cost. */
940 3, /* cond_taken_branch_cost. */
941 1, /* cond_not_taken_branch_cost. */
945 struct processor_costs k6_cost = {
946 COSTS_N_INSNS (1), /* cost of an add instruction */
947 COSTS_N_INSNS (2), /* cost of a lea instruction */
948 COSTS_N_INSNS (1), /* variable shift costs */
949 COSTS_N_INSNS (1), /* constant shift costs */
950 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
951 COSTS_N_INSNS (3), /* HI */
952 COSTS_N_INSNS (3), /* SI */
953 COSTS_N_INSNS (3), /* DI */
954 COSTS_N_INSNS (3)}, /* other */
955 0, /* cost of multiply per each bit set */
956 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
957 COSTS_N_INSNS (18), /* HI */
958 COSTS_N_INSNS (18), /* SI */
959 COSTS_N_INSNS (18), /* DI */
960 COSTS_N_INSNS (18)}, /* other */
961 COSTS_N_INSNS (2), /* cost of movsx */
962 COSTS_N_INSNS (2), /* cost of movzx */
963 8, /* "large" insn */
965 3, /* cost for loading QImode using movzbl */
966 {4, 5, 4}, /* cost of loading integer registers
967 in QImode, HImode and SImode.
968 Relative to reg-reg move (2). */
969 {2, 3, 2}, /* cost of storing integer registers */
970 4, /* cost of reg,reg fld/fst */
971 {6, 6, 6}, /* cost of loading fp registers
972 in SFmode, DFmode and XFmode */
973 {4, 4, 4}, /* cost of storing fp registers
974 in SFmode, DFmode and XFmode */
975 2, /* cost of moving MMX register */
976 {2, 2}, /* cost of loading MMX registers
977 in SImode and DImode */
978 {2, 2}, /* cost of storing MMX registers
979 in SImode and DImode */
980 2, /* cost of moving SSE register */
981 {2, 2, 8}, /* cost of loading SSE registers
982 in SImode, DImode and TImode */
983 {2, 2, 8}, /* cost of storing SSE registers
984 in SImode, DImode and TImode */
985 6, /* MMX or SSE register to integer */
986 32, /* size of l1 cache. */
987 32, /* size of l2 cache. Some models
988 have integrated l2 cache, but
989 optimizing for k6 is not important
990 enough to worry about that. */
991 32, /* size of prefetch block */
992 1, /* number of parallel prefetches */
994 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
995 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
996 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
997 COSTS_N_INSNS (2), /* cost of FABS instruction. */
998 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
999 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
1000 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1001 DUMMY_STRINGOP_ALGS},
1002 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1003 DUMMY_STRINGOP_ALGS},
1004 1, /* scalar_stmt_cost. */
1005 1, /* scalar load_cost. */
1006 1, /* scalar_store_cost. */
1007 1, /* vec_stmt_cost. */
1008 1, /* vec_to_scalar_cost. */
1009 1, /* scalar_to_vec_cost. */
1010 1, /* vec_align_load_cost. */
1011 2, /* vec_unalign_load_cost. */
1012 1, /* vec_store_cost. */
1013 3, /* cond_taken_branch_cost. */
1014 1, /* cond_not_taken_branch_cost. */
1018 struct processor_costs athlon_cost = {
1019 COSTS_N_INSNS (1), /* cost of an add instruction */
1020 COSTS_N_INSNS (2), /* cost of a lea instruction */
1021 COSTS_N_INSNS (1), /* variable shift costs */
1022 COSTS_N_INSNS (1), /* constant shift costs */
1023 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1024 COSTS_N_INSNS (5), /* HI */
1025 COSTS_N_INSNS (5), /* SI */
1026 COSTS_N_INSNS (5), /* DI */
1027 COSTS_N_INSNS (5)}, /* other */
1028 0, /* cost of multiply per each bit set */
1029 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1030 COSTS_N_INSNS (26), /* HI */
1031 COSTS_N_INSNS (42), /* SI */
1032 COSTS_N_INSNS (74), /* DI */
1033 COSTS_N_INSNS (74)}, /* other */
1034 COSTS_N_INSNS (1), /* cost of movsx */
1035 COSTS_N_INSNS (1), /* cost of movzx */
1036 8, /* "large" insn */
1038 4, /* cost for loading QImode using movzbl */
1039 {3, 4, 3}, /* cost of loading integer registers
1040 in QImode, HImode and SImode.
1041 Relative to reg-reg move (2). */
1042 {3, 4, 3}, /* cost of storing integer registers */
1043 4, /* cost of reg,reg fld/fst */
1044 {4, 4, 12}, /* cost of loading fp registers
1045 in SFmode, DFmode and XFmode */
1046 {6, 6, 8}, /* cost of storing fp registers
1047 in SFmode, DFmode and XFmode */
1048 2, /* cost of moving MMX register */
1049 {4, 4}, /* cost of loading MMX registers
1050 in SImode and DImode */
1051 {4, 4}, /* cost of storing MMX registers
1052 in SImode and DImode */
1053 2, /* cost of moving SSE register */
1054 {4, 4, 6}, /* cost of loading SSE registers
1055 in SImode, DImode and TImode */
1056 {4, 4, 5}, /* cost of storing SSE registers
1057 in SImode, DImode and TImode */
1058 5, /* MMX or SSE register to integer */
1059 64, /* size of l1 cache. */
1060 256, /* size of l2 cache. */
1061 64, /* size of prefetch block */
1062 6, /* number of parallel prefetches */
1063 5, /* Branch cost */
1064 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1065 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1066 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1067 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1068 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1069 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1070 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1071 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1072 128 bytes for memset. */
1073 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1074 DUMMY_STRINGOP_ALGS},
1075 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1076 DUMMY_STRINGOP_ALGS},
1077 1, /* scalar_stmt_cost. */
1078 1, /* scalar load_cost. */
1079 1, /* scalar_store_cost. */
1080 1, /* vec_stmt_cost. */
1081 1, /* vec_to_scalar_cost. */
1082 1, /* scalar_to_vec_cost. */
1083 1, /* vec_align_load_cost. */
1084 2, /* vec_unalign_load_cost. */
1085 1, /* vec_store_cost. */
1086 3, /* cond_taken_branch_cost. */
1087 1, /* cond_not_taken_branch_cost. */
1091 struct processor_costs k8_cost = {
1092 COSTS_N_INSNS (1), /* cost of an add instruction */
1093 COSTS_N_INSNS (2), /* cost of a lea instruction */
1094 COSTS_N_INSNS (1), /* variable shift costs */
1095 COSTS_N_INSNS (1), /* constant shift costs */
1096 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1097 COSTS_N_INSNS (4), /* HI */
1098 COSTS_N_INSNS (3), /* SI */
1099 COSTS_N_INSNS (4), /* DI */
1100 COSTS_N_INSNS (5)}, /* other */
1101 0, /* cost of multiply per each bit set */
1102 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1103 COSTS_N_INSNS (26), /* HI */
1104 COSTS_N_INSNS (42), /* SI */
1105 COSTS_N_INSNS (74), /* DI */
1106 COSTS_N_INSNS (74)}, /* other */
1107 COSTS_N_INSNS (1), /* cost of movsx */
1108 COSTS_N_INSNS (1), /* cost of movzx */
1109 8, /* "large" insn */
1111 4, /* cost for loading QImode using movzbl */
1112 {3, 4, 3}, /* cost of loading integer registers
1113 in QImode, HImode and SImode.
1114 Relative to reg-reg move (2). */
1115 {3, 4, 3}, /* cost of storing integer registers */
1116 4, /* cost of reg,reg fld/fst */
1117 {4, 4, 12}, /* cost of loading fp registers
1118 in SFmode, DFmode and XFmode */
1119 {6, 6, 8}, /* cost of storing fp registers
1120 in SFmode, DFmode and XFmode */
1121 2, /* cost of moving MMX register */
1122 {3, 3}, /* cost of loading MMX registers
1123 in SImode and DImode */
1124 {4, 4}, /* cost of storing MMX registers
1125 in SImode and DImode */
1126 2, /* cost of moving SSE register */
1127 {4, 3, 6}, /* cost of loading SSE registers
1128 in SImode, DImode and TImode */
1129 {4, 4, 5}, /* cost of storing SSE registers
1130 in SImode, DImode and TImode */
1131 5, /* MMX or SSE register to integer */
1132 64, /* size of l1 cache. */
1133 512, /* size of l2 cache. */
1134 64, /* size of prefetch block */
1135 /* New AMD processors never drop prefetches; if they cannot be performed
1136 immediately, they are queued. We set number of simultaneous prefetches
1137 to a large constant to reflect this (it probably is not a good idea not
1138 to limit number of prefetches at all, as their execution also takes some
1140 100, /* number of parallel prefetches */
1141 3, /* Branch cost */
1142 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1143 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1144 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1145 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1146 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1147 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1148 /* K8 has optimized REP instruction for medium sized blocks, but for very
1149 small blocks it is better to use loop. For large blocks, libcall can
1150 do nontemporary accesses and beat inline considerably. */
1151 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1152 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1153 {{libcall, {{8, loop}, {24, unrolled_loop},
1154 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1155 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1156 4, /* scalar_stmt_cost. */
1157 2, /* scalar load_cost. */
1158 2, /* scalar_store_cost. */
1159 5, /* vec_stmt_cost. */
1160 0, /* vec_to_scalar_cost. */
1161 2, /* scalar_to_vec_cost. */
1162 2, /* vec_align_load_cost. */
1163 3, /* vec_unalign_load_cost. */
1164 3, /* vec_store_cost. */
1165 3, /* cond_taken_branch_cost. */
1166 2, /* cond_not_taken_branch_cost. */
1169 struct processor_costs amdfam10_cost = {
1170 COSTS_N_INSNS (1), /* cost of an add instruction */
1171 COSTS_N_INSNS (2), /* cost of a lea instruction */
1172 COSTS_N_INSNS (1), /* variable shift costs */
1173 COSTS_N_INSNS (1), /* constant shift costs */
1174 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1175 COSTS_N_INSNS (4), /* HI */
1176 COSTS_N_INSNS (3), /* SI */
1177 COSTS_N_INSNS (4), /* DI */
1178 COSTS_N_INSNS (5)}, /* other */
1179 0, /* cost of multiply per each bit set */
1180 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1181 COSTS_N_INSNS (35), /* HI */
1182 COSTS_N_INSNS (51), /* SI */
1183 COSTS_N_INSNS (83), /* DI */
1184 COSTS_N_INSNS (83)}, /* other */
1185 COSTS_N_INSNS (1), /* cost of movsx */
1186 COSTS_N_INSNS (1), /* cost of movzx */
1187 8, /* "large" insn */
1189 4, /* cost for loading QImode using movzbl */
1190 {3, 4, 3}, /* cost of loading integer registers
1191 in QImode, HImode and SImode.
1192 Relative to reg-reg move (2). */
1193 {3, 4, 3}, /* cost of storing integer registers */
1194 4, /* cost of reg,reg fld/fst */
1195 {4, 4, 12}, /* cost of loading fp registers
1196 in SFmode, DFmode and XFmode */
1197 {6, 6, 8}, /* cost of storing fp registers
1198 in SFmode, DFmode and XFmode */
1199 2, /* cost of moving MMX register */
1200 {3, 3}, /* cost of loading MMX registers
1201 in SImode and DImode */
1202 {4, 4}, /* cost of storing MMX registers
1203 in SImode and DImode */
1204 2, /* cost of moving SSE register */
1205 {4, 4, 3}, /* cost of loading SSE registers
1206 in SImode, DImode and TImode */
1207 {4, 4, 5}, /* cost of storing SSE registers
1208 in SImode, DImode and TImode */
1209 3, /* MMX or SSE register to integer */
1211 MOVD reg64, xmmreg Double FSTORE 4
1212 MOVD reg32, xmmreg Double FSTORE 4
1214 MOVD reg64, xmmreg Double FADD 3
1216 MOVD reg32, xmmreg Double FADD 3
1218 64, /* size of l1 cache. */
1219 512, /* size of l2 cache. */
1220 64, /* size of prefetch block */
1221 /* New AMD processors never drop prefetches; if they cannot be performed
1222 immediately, they are queued. We set number of simultaneous prefetches
1223 to a large constant to reflect this (it probably is not a good idea not
1224 to limit number of prefetches at all, as their execution also takes some
1226 100, /* number of parallel prefetches */
1227 2, /* Branch cost */
1228 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1229 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1230 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1231 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1232 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1233 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1235 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1236 very small blocks it is better to use loop. For large blocks, libcall can
1237 do nontemporary accesses and beat inline considerably. */
1238 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1239 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1240 {{libcall, {{8, loop}, {24, unrolled_loop},
1241 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1242 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1243 4, /* scalar_stmt_cost. */
1244 2, /* scalar load_cost. */
1245 2, /* scalar_store_cost. */
1246 6, /* vec_stmt_cost. */
1247 0, /* vec_to_scalar_cost. */
1248 2, /* scalar_to_vec_cost. */
1249 2, /* vec_align_load_cost. */
1250 2, /* vec_unalign_load_cost. */
1251 2, /* vec_store_cost. */
1252 2, /* cond_taken_branch_cost. */
1253 1, /* cond_not_taken_branch_cost. */
1256 struct processor_costs bdver1_cost = {
1257 COSTS_N_INSNS (1), /* cost of an add instruction */
1258 COSTS_N_INSNS (1), /* cost of a lea instruction */
1259 COSTS_N_INSNS (1), /* variable shift costs */
1260 COSTS_N_INSNS (1), /* constant shift costs */
1261 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1262 COSTS_N_INSNS (4), /* HI */
1263 COSTS_N_INSNS (4), /* SI */
1264 COSTS_N_INSNS (6), /* DI */
1265 COSTS_N_INSNS (6)}, /* other */
1266 0, /* cost of multiply per each bit set */
1267 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1268 COSTS_N_INSNS (35), /* HI */
1269 COSTS_N_INSNS (51), /* SI */
1270 COSTS_N_INSNS (83), /* DI */
1271 COSTS_N_INSNS (83)}, /* other */
1272 COSTS_N_INSNS (1), /* cost of movsx */
1273 COSTS_N_INSNS (1), /* cost of movzx */
1274 8, /* "large" insn */
1276 4, /* cost for loading QImode using movzbl */
1277 {5, 5, 4}, /* cost of loading integer registers
1278 in QImode, HImode and SImode.
1279 Relative to reg-reg move (2). */
1280 {4, 4, 4}, /* cost of storing integer registers */
1281 2, /* cost of reg,reg fld/fst */
1282 {5, 5, 12}, /* cost of loading fp registers
1283 in SFmode, DFmode and XFmode */
1284 {4, 4, 8}, /* cost of storing fp registers
1285 in SFmode, DFmode and XFmode */
1286 2, /* cost of moving MMX register */
1287 {4, 4}, /* cost of loading MMX registers
1288 in SImode and DImode */
1289 {4, 4}, /* cost of storing MMX registers
1290 in SImode and DImode */
1291 2, /* cost of moving SSE register */
1292 {4, 4, 4}, /* cost of loading SSE registers
1293 in SImode, DImode and TImode */
1294 {4, 4, 4}, /* cost of storing SSE registers
1295 in SImode, DImode and TImode */
1296 2, /* MMX or SSE register to integer */
1298 MOVD reg64, xmmreg Double FSTORE 4
1299 MOVD reg32, xmmreg Double FSTORE 4
1301 MOVD reg64, xmmreg Double FADD 3
1303 MOVD reg32, xmmreg Double FADD 3
1305 16, /* size of l1 cache. */
1306 2048, /* size of l2 cache. */
1307 64, /* size of prefetch block */
1308 /* New AMD processors never drop prefetches; if they cannot be performed
1309 immediately, they are queued. We set number of simultaneous prefetches
1310 to a large constant to reflect this (it probably is not a good idea not
1311 to limit number of prefetches at all, as their execution also takes some
1313 100, /* number of parallel prefetches */
1314 2, /* Branch cost */
1315 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1316 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1317 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1318 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1319 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1320 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1322 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1323 very small blocks it is better to use loop. For large blocks, libcall
1324 can do nontemporary accesses and beat inline considerably. */
1325 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1326 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1327 {{libcall, {{8, loop}, {24, unrolled_loop},
1328 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1329 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1330 6, /* scalar_stmt_cost. */
1331 4, /* scalar load_cost. */
1332 4, /* scalar_store_cost. */
1333 6, /* vec_stmt_cost. */
1334 0, /* vec_to_scalar_cost. */
1335 2, /* scalar_to_vec_cost. */
1336 4, /* vec_align_load_cost. */
1337 4, /* vec_unalign_load_cost. */
1338 4, /* vec_store_cost. */
1339 2, /* cond_taken_branch_cost. */
1340 1, /* cond_not_taken_branch_cost. */
1343 struct processor_costs bdver2_cost = {
1344 COSTS_N_INSNS (1), /* cost of an add instruction */
1345 COSTS_N_INSNS (1), /* cost of a lea instruction */
1346 COSTS_N_INSNS (1), /* variable shift costs */
1347 COSTS_N_INSNS (1), /* constant shift costs */
1348 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1349 COSTS_N_INSNS (4), /* HI */
1350 COSTS_N_INSNS (4), /* SI */
1351 COSTS_N_INSNS (6), /* DI */
1352 COSTS_N_INSNS (6)}, /* other */
1353 0, /* cost of multiply per each bit set */
1354 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1355 COSTS_N_INSNS (35), /* HI */
1356 COSTS_N_INSNS (51), /* SI */
1357 COSTS_N_INSNS (83), /* DI */
1358 COSTS_N_INSNS (83)}, /* other */
1359 COSTS_N_INSNS (1), /* cost of movsx */
1360 COSTS_N_INSNS (1), /* cost of movzx */
1361 8, /* "large" insn */
1363 4, /* cost for loading QImode using movzbl */
1364 {5, 5, 4}, /* cost of loading integer registers
1365 in QImode, HImode and SImode.
1366 Relative to reg-reg move (2). */
1367 {4, 4, 4}, /* cost of storing integer registers */
1368 2, /* cost of reg,reg fld/fst */
1369 {5, 5, 12}, /* cost of loading fp registers
1370 in SFmode, DFmode and XFmode */
1371 {4, 4, 8}, /* cost of storing fp registers
1372 in SFmode, DFmode and XFmode */
1373 2, /* cost of moving MMX register */
1374 {4, 4}, /* cost of loading MMX registers
1375 in SImode and DImode */
1376 {4, 4}, /* cost of storing MMX registers
1377 in SImode and DImode */
1378 2, /* cost of moving SSE register */
1379 {4, 4, 4}, /* cost of loading SSE registers
1380 in SImode, DImode and TImode */
1381 {4, 4, 4}, /* cost of storing SSE registers
1382 in SImode, DImode and TImode */
1383 2, /* MMX or SSE register to integer */
1385 MOVD reg64, xmmreg Double FSTORE 4
1386 MOVD reg32, xmmreg Double FSTORE 4
1388 MOVD reg64, xmmreg Double FADD 3
1390 MOVD reg32, xmmreg Double FADD 3
1392 16, /* size of l1 cache. */
1393 2048, /* size of l2 cache. */
1394 64, /* size of prefetch block */
1395 /* New AMD processors never drop prefetches; if they cannot be performed
1396 immediately, they are queued. We set number of simultaneous prefetches
1397 to a large constant to reflect this (it probably is not a good idea not
1398 to limit number of prefetches at all, as their execution also takes some
1400 100, /* number of parallel prefetches */
1401 2, /* Branch cost */
1402 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1403 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1404 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1405 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1406 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1407 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1409 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1410 very small blocks it is better to use loop. For large blocks, libcall
1411 can do nontemporary accesses and beat inline considerably. */
1412 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1413 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1414 {{libcall, {{8, loop}, {24, unrolled_loop},
1415 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1416 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1417 6, /* scalar_stmt_cost. */
1418 4, /* scalar load_cost. */
1419 4, /* scalar_store_cost. */
1420 6, /* vec_stmt_cost. */
1421 0, /* vec_to_scalar_cost. */
1422 2, /* scalar_to_vec_cost. */
1423 4, /* vec_align_load_cost. */
1424 4, /* vec_unalign_load_cost. */
1425 4, /* vec_store_cost. */
1426 2, /* cond_taken_branch_cost. */
1427 1, /* cond_not_taken_branch_cost. */
1430 struct processor_costs btver1_cost = {
1431 COSTS_N_INSNS (1), /* cost of an add instruction */
1432 COSTS_N_INSNS (2), /* cost of a lea instruction */
1433 COSTS_N_INSNS (1), /* variable shift costs */
1434 COSTS_N_INSNS (1), /* constant shift costs */
1435 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1436 COSTS_N_INSNS (4), /* HI */
1437 COSTS_N_INSNS (3), /* SI */
1438 COSTS_N_INSNS (4), /* DI */
1439 COSTS_N_INSNS (5)}, /* other */
1440 0, /* cost of multiply per each bit set */
1441 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1442 COSTS_N_INSNS (35), /* HI */
1443 COSTS_N_INSNS (51), /* SI */
1444 COSTS_N_INSNS (83), /* DI */
1445 COSTS_N_INSNS (83)}, /* other */
1446 COSTS_N_INSNS (1), /* cost of movsx */
1447 COSTS_N_INSNS (1), /* cost of movzx */
1448 8, /* "large" insn */
1450 4, /* cost for loading QImode using movzbl */
1451 {3, 4, 3}, /* cost of loading integer registers
1452 in QImode, HImode and SImode.
1453 Relative to reg-reg move (2). */
1454 {3, 4, 3}, /* cost of storing integer registers */
1455 4, /* cost of reg,reg fld/fst */
1456 {4, 4, 12}, /* cost of loading fp registers
1457 in SFmode, DFmode and XFmode */
1458 {6, 6, 8}, /* cost of storing fp registers
1459 in SFmode, DFmode and XFmode */
1460 2, /* cost of moving MMX register */
1461 {3, 3}, /* cost of loading MMX registers
1462 in SImode and DImode */
1463 {4, 4}, /* cost of storing MMX registers
1464 in SImode and DImode */
1465 2, /* cost of moving SSE register */
1466 {4, 4, 3}, /* cost of loading SSE registers
1467 in SImode, DImode and TImode */
1468 {4, 4, 5}, /* cost of storing SSE registers
1469 in SImode, DImode and TImode */
1470 3, /* MMX or SSE register to integer */
1472 MOVD reg64, xmmreg Double FSTORE 4
1473 MOVD reg32, xmmreg Double FSTORE 4
1475 MOVD reg64, xmmreg Double FADD 3
1477 MOVD reg32, xmmreg Double FADD 3
1479 32, /* size of l1 cache. */
1480 512, /* size of l2 cache. */
1481 64, /* size of prefetch block */
1482 100, /* number of parallel prefetches */
1483 2, /* Branch cost */
1484 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1485 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1486 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1487 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1488 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1489 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1491 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1492 very small blocks it is better to use loop. For large blocks, libcall can
1493 do nontemporary accesses and beat inline considerably. */
1494 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1495 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1496 {{libcall, {{8, loop}, {24, unrolled_loop},
1497 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1498 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1499 4, /* scalar_stmt_cost. */
1500 2, /* scalar load_cost. */
1501 2, /* scalar_store_cost. */
1502 6, /* vec_stmt_cost. */
1503 0, /* vec_to_scalar_cost. */
1504 2, /* scalar_to_vec_cost. */
1505 2, /* vec_align_load_cost. */
1506 2, /* vec_unalign_load_cost. */
1507 2, /* vec_store_cost. */
1508 2, /* cond_taken_branch_cost. */
1509 1, /* cond_not_taken_branch_cost. */
1512 struct processor_costs btver2_cost = {
1513 COSTS_N_INSNS (1), /* cost of an add instruction */
1514 COSTS_N_INSNS (2), /* cost of a lea instruction */
1515 COSTS_N_INSNS (1), /* variable shift costs */
1516 COSTS_N_INSNS (1), /* constant shift costs */
1517 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1518 COSTS_N_INSNS (4), /* HI */
1519 COSTS_N_INSNS (3), /* SI */
1520 COSTS_N_INSNS (4), /* DI */
1521 COSTS_N_INSNS (5)}, /* other */
1522 0, /* cost of multiply per each bit set */
1523 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1524 COSTS_N_INSNS (35), /* HI */
1525 COSTS_N_INSNS (51), /* SI */
1526 COSTS_N_INSNS (83), /* DI */
1527 COSTS_N_INSNS (83)}, /* other */
1528 COSTS_N_INSNS (1), /* cost of movsx */
1529 COSTS_N_INSNS (1), /* cost of movzx */
1530 8, /* "large" insn */
1532 4, /* cost for loading QImode using movzbl */
1533 {3, 4, 3}, /* cost of loading integer registers
1534 in QImode, HImode and SImode.
1535 Relative to reg-reg move (2). */
1536 {3, 4, 3}, /* cost of storing integer registers */
1537 4, /* cost of reg,reg fld/fst */
1538 {4, 4, 12}, /* cost of loading fp registers
1539 in SFmode, DFmode and XFmode */
1540 {6, 6, 8}, /* cost of storing fp registers
1541 in SFmode, DFmode and XFmode */
1542 2, /* cost of moving MMX register */
1543 {3, 3}, /* cost of loading MMX registers
1544 in SImode and DImode */
1545 {4, 4}, /* cost of storing MMX registers
1546 in SImode and DImode */
1547 2, /* cost of moving SSE register */
1548 {4, 4, 3}, /* cost of loading SSE registers
1549 in SImode, DImode and TImode */
1550 {4, 4, 5}, /* cost of storing SSE registers
1551 in SImode, DImode and TImode */
1552 3, /* MMX or SSE register to integer */
1554 MOVD reg64, xmmreg Double FSTORE 4
1555 MOVD reg32, xmmreg Double FSTORE 4
1557 MOVD reg64, xmmreg Double FADD 3
1559 MOVD reg32, xmmreg Double FADD 3
1561 32, /* size of l1 cache. */
1562 2048, /* size of l2 cache. */
1563 64, /* size of prefetch block */
1564 100, /* number of parallel prefetches */
1565 2, /* Branch cost */
1566 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1567 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1568 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1569 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1570 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1571 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1573 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1574 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1575 {{libcall, {{8, loop}, {24, unrolled_loop},
1576 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1577 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1578 4, /* scalar_stmt_cost. */
1579 2, /* scalar load_cost. */
1580 2, /* scalar_store_cost. */
1581 6, /* vec_stmt_cost. */
1582 0, /* vec_to_scalar_cost. */
1583 2, /* scalar_to_vec_cost. */
1584 2, /* vec_align_load_cost. */
1585 2, /* vec_unalign_load_cost. */
1586 2, /* vec_store_cost. */
1587 2, /* cond_taken_branch_cost. */
1588 1, /* cond_not_taken_branch_cost. */
1592 struct processor_costs pentium4_cost = {
1593 COSTS_N_INSNS (1), /* cost of an add instruction */
1594 COSTS_N_INSNS (3), /* cost of a lea instruction */
1595 COSTS_N_INSNS (4), /* variable shift costs */
1596 COSTS_N_INSNS (4), /* constant shift costs */
1597 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1598 COSTS_N_INSNS (15), /* HI */
1599 COSTS_N_INSNS (15), /* SI */
1600 COSTS_N_INSNS (15), /* DI */
1601 COSTS_N_INSNS (15)}, /* other */
1602 0, /* cost of multiply per each bit set */
1603 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1604 COSTS_N_INSNS (56), /* HI */
1605 COSTS_N_INSNS (56), /* SI */
1606 COSTS_N_INSNS (56), /* DI */
1607 COSTS_N_INSNS (56)}, /* other */
1608 COSTS_N_INSNS (1), /* cost of movsx */
1609 COSTS_N_INSNS (1), /* cost of movzx */
1610 16, /* "large" insn */
1612 2, /* cost for loading QImode using movzbl */
1613 {4, 5, 4}, /* cost of loading integer registers
1614 in QImode, HImode and SImode.
1615 Relative to reg-reg move (2). */
1616 {2, 3, 2}, /* cost of storing integer registers */
1617 2, /* cost of reg,reg fld/fst */
1618 {2, 2, 6}, /* cost of loading fp registers
1619 in SFmode, DFmode and XFmode */
1620 {4, 4, 6}, /* cost of storing fp registers
1621 in SFmode, DFmode and XFmode */
1622 2, /* cost of moving MMX register */
1623 {2, 2}, /* cost of loading MMX registers
1624 in SImode and DImode */
1625 {2, 2}, /* cost of storing MMX registers
1626 in SImode and DImode */
1627 12, /* cost of moving SSE register */
1628 {12, 12, 12}, /* cost of loading SSE registers
1629 in SImode, DImode and TImode */
1630 {2, 2, 8}, /* cost of storing SSE registers
1631 in SImode, DImode and TImode */
1632 10, /* MMX or SSE register to integer */
1633 8, /* size of l1 cache. */
1634 256, /* size of l2 cache. */
1635 64, /* size of prefetch block */
1636 6, /* number of parallel prefetches */
1637 2, /* Branch cost */
1638 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1639 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1640 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1641 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1642 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1643 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1644 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1645 DUMMY_STRINGOP_ALGS},
1646 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1648 DUMMY_STRINGOP_ALGS},
1649 1, /* scalar_stmt_cost. */
1650 1, /* scalar load_cost. */
1651 1, /* scalar_store_cost. */
1652 1, /* vec_stmt_cost. */
1653 1, /* vec_to_scalar_cost. */
1654 1, /* scalar_to_vec_cost. */
1655 1, /* vec_align_load_cost. */
1656 2, /* vec_unalign_load_cost. */
1657 1, /* vec_store_cost. */
1658 3, /* cond_taken_branch_cost. */
1659 1, /* cond_not_taken_branch_cost. */
1663 struct processor_costs nocona_cost = {
1664 COSTS_N_INSNS (1), /* cost of an add instruction */
1665 COSTS_N_INSNS (1), /* cost of a lea instruction */
1666 COSTS_N_INSNS (1), /* variable shift costs */
1667 COSTS_N_INSNS (1), /* constant shift costs */
1668 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1669 COSTS_N_INSNS (10), /* HI */
1670 COSTS_N_INSNS (10), /* SI */
1671 COSTS_N_INSNS (10), /* DI */
1672 COSTS_N_INSNS (10)}, /* other */
1673 0, /* cost of multiply per each bit set */
1674 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1675 COSTS_N_INSNS (66), /* HI */
1676 COSTS_N_INSNS (66), /* SI */
1677 COSTS_N_INSNS (66), /* DI */
1678 COSTS_N_INSNS (66)}, /* other */
1679 COSTS_N_INSNS (1), /* cost of movsx */
1680 COSTS_N_INSNS (1), /* cost of movzx */
1681 16, /* "large" insn */
1682 17, /* MOVE_RATIO */
1683 4, /* cost for loading QImode using movzbl */
1684 {4, 4, 4}, /* cost of loading integer registers
1685 in QImode, HImode and SImode.
1686 Relative to reg-reg move (2). */
1687 {4, 4, 4}, /* cost of storing integer registers */
1688 3, /* cost of reg,reg fld/fst */
1689 {12, 12, 12}, /* cost of loading fp registers
1690 in SFmode, DFmode and XFmode */
1691 {4, 4, 4}, /* cost of storing fp registers
1692 in SFmode, DFmode and XFmode */
1693 6, /* cost of moving MMX register */
1694 {12, 12}, /* cost of loading MMX registers
1695 in SImode and DImode */
1696 {12, 12}, /* cost of storing MMX registers
1697 in SImode and DImode */
1698 6, /* cost of moving SSE register */
1699 {12, 12, 12}, /* cost of loading SSE registers
1700 in SImode, DImode and TImode */
1701 {12, 12, 12}, /* cost of storing SSE registers
1702 in SImode, DImode and TImode */
1703 8, /* MMX or SSE register to integer */
1704 8, /* size of l1 cache. */
1705 1024, /* size of l2 cache. */
1706 128, /* size of prefetch block */
1707 8, /* number of parallel prefetches */
1708 1, /* Branch cost */
1709 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1710 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1711 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1712 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1713 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1714 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1715 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1716 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1717 {100000, unrolled_loop}, {-1, libcall}}}},
1718 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1720 {libcall, {{24, loop}, {64, unrolled_loop},
1721 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1722 1, /* scalar_stmt_cost. */
1723 1, /* scalar load_cost. */
1724 1, /* scalar_store_cost. */
1725 1, /* vec_stmt_cost. */
1726 1, /* vec_to_scalar_cost. */
1727 1, /* scalar_to_vec_cost. */
1728 1, /* vec_align_load_cost. */
1729 2, /* vec_unalign_load_cost. */
1730 1, /* vec_store_cost. */
1731 3, /* cond_taken_branch_cost. */
1732 1, /* cond_not_taken_branch_cost. */
1736 struct processor_costs atom_cost = {
1737 COSTS_N_INSNS (1), /* cost of an add instruction */
1738 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1739 COSTS_N_INSNS (1), /* variable shift costs */
1740 COSTS_N_INSNS (1), /* constant shift costs */
1741 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1742 COSTS_N_INSNS (4), /* HI */
1743 COSTS_N_INSNS (3), /* SI */
1744 COSTS_N_INSNS (4), /* DI */
1745 COSTS_N_INSNS (2)}, /* other */
1746 0, /* cost of multiply per each bit set */
1747 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1748 COSTS_N_INSNS (26), /* HI */
1749 COSTS_N_INSNS (42), /* SI */
1750 COSTS_N_INSNS (74), /* DI */
1751 COSTS_N_INSNS (74)}, /* other */
1752 COSTS_N_INSNS (1), /* cost of movsx */
1753 COSTS_N_INSNS (1), /* cost of movzx */
1754 8, /* "large" insn */
1755 17, /* MOVE_RATIO */
1756 4, /* cost for loading QImode using movzbl */
1757 {4, 4, 4}, /* cost of loading integer registers
1758 in QImode, HImode and SImode.
1759 Relative to reg-reg move (2). */
1760 {4, 4, 4}, /* cost of storing integer registers */
1761 4, /* cost of reg,reg fld/fst */
1762 {12, 12, 12}, /* cost of loading fp registers
1763 in SFmode, DFmode and XFmode */
1764 {6, 6, 8}, /* cost of storing fp registers
1765 in SFmode, DFmode and XFmode */
1766 2, /* cost of moving MMX register */
1767 {8, 8}, /* cost of loading MMX registers
1768 in SImode and DImode */
1769 {8, 8}, /* cost of storing MMX registers
1770 in SImode and DImode */
1771 2, /* cost of moving SSE register */
1772 {8, 8, 8}, /* cost of loading SSE registers
1773 in SImode, DImode and TImode */
1774 {8, 8, 8}, /* cost of storing SSE registers
1775 in SImode, DImode and TImode */
1776 5, /* MMX or SSE register to integer */
1777 32, /* size of l1 cache. */
1778 256, /* size of l2 cache. */
1779 64, /* size of prefetch block */
1780 6, /* number of parallel prefetches */
1781 3, /* Branch cost */
1782 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1783 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1784 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1785 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1786 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1787 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1788 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1789 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1790 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1791 {{libcall, {{8, loop}, {15, unrolled_loop},
1792 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1793 {libcall, {{24, loop}, {32, unrolled_loop},
1794 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1795 1, /* scalar_stmt_cost. */
1796 1, /* scalar load_cost. */
1797 1, /* scalar_store_cost. */
1798 1, /* vec_stmt_cost. */
1799 1, /* vec_to_scalar_cost. */
1800 1, /* scalar_to_vec_cost. */
1801 1, /* vec_align_load_cost. */
1802 2, /* vec_unalign_load_cost. */
1803 1, /* vec_store_cost. */
1804 3, /* cond_taken_branch_cost. */
1805 1, /* cond_not_taken_branch_cost. */
1808 /* Generic64 should produce code tuned for Nocona and K8. */
1810 struct processor_costs generic64_cost = {
1811 COSTS_N_INSNS (1), /* cost of an add instruction */
1812 /* On all chips taken into consideration lea is 2 cycles and more. With
1813 this cost however our current implementation of synth_mult results in
1814 use of unnecessary temporary registers causing regression on several
1815 SPECfp benchmarks. */
1816 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1817 COSTS_N_INSNS (1), /* variable shift costs */
1818 COSTS_N_INSNS (1), /* constant shift costs */
1819 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1820 COSTS_N_INSNS (4), /* HI */
1821 COSTS_N_INSNS (3), /* SI */
1822 COSTS_N_INSNS (4), /* DI */
1823 COSTS_N_INSNS (2)}, /* other */
1824 0, /* cost of multiply per each bit set */
1825 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1826 COSTS_N_INSNS (26), /* HI */
1827 COSTS_N_INSNS (42), /* SI */
1828 COSTS_N_INSNS (74), /* DI */
1829 COSTS_N_INSNS (74)}, /* other */
1830 COSTS_N_INSNS (1), /* cost of movsx */
1831 COSTS_N_INSNS (1), /* cost of movzx */
1832 8, /* "large" insn */
1833 17, /* MOVE_RATIO */
1834 4, /* cost for loading QImode using movzbl */
1835 {4, 4, 4}, /* cost of loading integer registers
1836 in QImode, HImode and SImode.
1837 Relative to reg-reg move (2). */
1838 {4, 4, 4}, /* cost of storing integer registers */
1839 4, /* cost of reg,reg fld/fst */
1840 {12, 12, 12}, /* cost of loading fp registers
1841 in SFmode, DFmode and XFmode */
1842 {6, 6, 8}, /* cost of storing fp registers
1843 in SFmode, DFmode and XFmode */
1844 2, /* cost of moving MMX register */
1845 {8, 8}, /* cost of loading MMX registers
1846 in SImode and DImode */
1847 {8, 8}, /* cost of storing MMX registers
1848 in SImode and DImode */
1849 2, /* cost of moving SSE register */
1850 {8, 8, 8}, /* cost of loading SSE registers
1851 in SImode, DImode and TImode */
1852 {8, 8, 8}, /* cost of storing SSE registers
1853 in SImode, DImode and TImode */
1854 5, /* MMX or SSE register to integer */
1855 32, /* size of l1 cache. */
1856 512, /* size of l2 cache. */
1857 64, /* size of prefetch block */
1858 6, /* number of parallel prefetches */
1859 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1860 value is increased to perhaps more appropriate value of 5. */
1861 3, /* Branch cost */
1862 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1863 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1864 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1865 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1866 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1867 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1868 {DUMMY_STRINGOP_ALGS,
1869 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1870 {DUMMY_STRINGOP_ALGS,
1871 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1872 1, /* scalar_stmt_cost. */
1873 1, /* scalar load_cost. */
1874 1, /* scalar_store_cost. */
1875 1, /* vec_stmt_cost. */
1876 1, /* vec_to_scalar_cost. */
1877 1, /* scalar_to_vec_cost. */
1878 1, /* vec_align_load_cost. */
1879 2, /* vec_unalign_load_cost. */
1880 1, /* vec_store_cost. */
1881 3, /* cond_taken_branch_cost. */
1882 1, /* cond_not_taken_branch_cost. */
1885 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1888 struct processor_costs generic32_cost = {
1889 COSTS_N_INSNS (1), /* cost of an add instruction */
1890 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1891 COSTS_N_INSNS (1), /* variable shift costs */
1892 COSTS_N_INSNS (1), /* constant shift costs */
1893 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1894 COSTS_N_INSNS (4), /* HI */
1895 COSTS_N_INSNS (3), /* SI */
1896 COSTS_N_INSNS (4), /* DI */
1897 COSTS_N_INSNS (2)}, /* other */
1898 0, /* cost of multiply per each bit set */
1899 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1900 COSTS_N_INSNS (26), /* HI */
1901 COSTS_N_INSNS (42), /* SI */
1902 COSTS_N_INSNS (74), /* DI */
1903 COSTS_N_INSNS (74)}, /* other */
1904 COSTS_N_INSNS (1), /* cost of movsx */
1905 COSTS_N_INSNS (1), /* cost of movzx */
1906 8, /* "large" insn */
1907 17, /* MOVE_RATIO */
1908 4, /* cost for loading QImode using movzbl */
1909 {4, 4, 4}, /* cost of loading integer registers
1910 in QImode, HImode and SImode.
1911 Relative to reg-reg move (2). */
1912 {4, 4, 4}, /* cost of storing integer registers */
1913 4, /* cost of reg,reg fld/fst */
1914 {12, 12, 12}, /* cost of loading fp registers
1915 in SFmode, DFmode and XFmode */
1916 {6, 6, 8}, /* cost of storing fp registers
1917 in SFmode, DFmode and XFmode */
1918 2, /* cost of moving MMX register */
1919 {8, 8}, /* cost of loading MMX registers
1920 in SImode and DImode */
1921 {8, 8}, /* cost of storing MMX registers
1922 in SImode and DImode */
1923 2, /* cost of moving SSE register */
1924 {8, 8, 8}, /* cost of loading SSE registers
1925 in SImode, DImode and TImode */
1926 {8, 8, 8}, /* cost of storing SSE registers
1927 in SImode, DImode and TImode */
1928 5, /* MMX or SSE register to integer */
1929 32, /* size of l1 cache. */
1930 256, /* size of l2 cache. */
1931 64, /* size of prefetch block */
1932 6, /* number of parallel prefetches */
1933 3, /* Branch cost */
1934 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1935 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1936 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1937 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1938 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1939 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1940 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1941 DUMMY_STRINGOP_ALGS},
1942 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1943 DUMMY_STRINGOP_ALGS},
1944 1, /* scalar_stmt_cost. */
1945 1, /* scalar load_cost. */
1946 1, /* scalar_store_cost. */
1947 1, /* vec_stmt_cost. */
1948 1, /* vec_to_scalar_cost. */
1949 1, /* scalar_to_vec_cost. */
1950 1, /* vec_align_load_cost. */
1951 2, /* vec_unalign_load_cost. */
1952 1, /* vec_store_cost. */
1953 3, /* cond_taken_branch_cost. */
1954 1, /* cond_not_taken_branch_cost. */
1957 /* Set by -mtune. */
1958 const struct processor_costs *ix86_tune_cost = &pentium_cost;
1960 /* Set by -mtune or -Os. */
1961 const struct processor_costs *ix86_cost = &pentium_cost;
1963 /* Processor feature/optimization bitmasks. */
1964 #define m_386 (1<<PROCESSOR_I386)
1965 #define m_486 (1<<PROCESSOR_I486)
1966 #define m_PENT (1<<PROCESSOR_PENTIUM)
1967 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1968 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1969 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1970 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1971 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1972 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1973 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1974 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1975 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1976 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1977 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1978 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1979 #define m_ATOM (1<<PROCESSOR_ATOM)
1981 #define m_GEODE (1<<PROCESSOR_GEODE)
1982 #define m_K6 (1<<PROCESSOR_K6)
1983 #define m_K6_GEODE (m_K6 | m_GEODE)
1984 #define m_K8 (1<<PROCESSOR_K8)
1985 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1986 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1987 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1988 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1989 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1990 #define m_BDVER (m_BDVER1 | m_BDVER2)
1991 #define m_BTVER (m_BTVER1 | m_BTVER2)
1992 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1993 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1994 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1996 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1997 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1999 /* Generic instruction choice should be common subset of supported CPUs
2000 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
2001 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
2003 /* Feature tests against the various tunings. */
2004 unsigned char ix86_tune_features[X86_TUNE_LAST];
2006 /* Feature tests against the various tunings used to create ix86_tune_features
2007 based on the processor mask. */
2008 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
2010 negatively, so enabling for Generic64 seems like good code size
2011 tradeoff. We can't enable it for 32bit generic because it does not
2012 work well with PPro base chips. */
2013 m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
2015 /* X86_TUNE_PUSH_MEMORY */
2016 m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2018 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
2021 /* X86_TUNE_UNROLL_STRLEN */
2022 m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
2024 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
2025 on simulation result. But after P4 was made, no performance benefit
2026 was observed with branch hints. It also increases the code size.
2027 As a result, icc never generates branch hints. */
2030 /* X86_TUNE_DOUBLE_WITH_ADD */
2033 /* X86_TUNE_USE_SAHF */
2034 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC,
2036 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
2037 partial dependencies. */
2038 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2040 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
2041 register stalls on Generic32 compilation setting as well. However
2042 in current implementation the partial register stalls are not eliminated
2043 very well - they can be introduced via subregs synthesized by combine
2044 and can happen in caller/callee saving sequences. Because this option
2045 pays back little on PPro based chips and is in conflict with partial reg
2046 dependencies used by Athlon/P4 based chips, it is better to leave it off
2047 for generic32 for now. */
2050 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
2051 m_CORE2I7 | m_GENERIC,
2053 /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
2054 * on 16-bit immediate moves into memory on Core2 and Corei7. */
2055 m_CORE2I7 | m_GENERIC,
2057 /* X86_TUNE_USE_HIMODE_FIOP */
2058 m_386 | m_486 | m_K6_GEODE,
2060 /* X86_TUNE_USE_SIMODE_FIOP */
2061 ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
2063 /* X86_TUNE_USE_MOV0 */
2066 /* X86_TUNE_USE_CLTD */
2067 ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
2069 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
2072 /* X86_TUNE_SPLIT_LONG_MOVES */
2075 /* X86_TUNE_READ_MODIFY_WRITE */
2078 /* X86_TUNE_READ_MODIFY */
2081 /* X86_TUNE_PROMOTE_QIMODE */
2082 m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2084 /* X86_TUNE_FAST_PREFIX */
2085 ~(m_386 | m_486 | m_PENT),
2087 /* X86_TUNE_SINGLE_STRINGOP */
2088 m_386 | m_P4_NOCONA,
2090 /* X86_TUNE_QIMODE_MATH */
2093 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2094 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2095 might be considered for Generic32 if our scheme for avoiding partial
2096 stalls was more effective. */
2099 /* X86_TUNE_PROMOTE_QI_REGS */
2102 /* X86_TUNE_PROMOTE_HI_REGS */
2105 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2106 over esp addition. */
2107 m_386 | m_486 | m_PENT | m_PPRO,
2109 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2110 over esp addition. */
2113 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2114 over esp subtraction. */
2115 m_386 | m_486 | m_PENT | m_K6_GEODE,
2117 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2118 over esp subtraction. */
2119 m_PENT | m_K6_GEODE,
2121 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2122 for DFmode copies */
2123 ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
2125 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2126 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2128 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2129 conflict here in between PPro/Pentium4 based chips that thread 128bit
2130 SSE registers as single units versus K8 based chips that divide SSE
2131 registers to two 64bit halves. This knob promotes all store destinations
2132 to be 128bit to allow register renaming on 128bit SSE units, but usually
2133 results in one extra microop on 64bit SSE units. Experimental results
2134 shows that disabling this option on P4 brings over 20% SPECfp regression,
2135 while enabling it on K8 brings roughly 2.4% regression that can be partly
2136 masked by careful scheduling of moves. */
2137 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
2139 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2140 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER,
2142 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2145 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2149 are resolved on SSE register parts instead of whole registers, so we may
2150 maintain just lower part of scalar values in proper format leaving the
2151 upper part undefined. */
2154 /* X86_TUNE_SSE_TYPELESS_STORES */
2157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2158 m_PPRO | m_P4_NOCONA,
2160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2161 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2163 /* X86_TUNE_PROLOGUE_USING_MOVE */
2164 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2166 /* X86_TUNE_EPILOGUE_USING_MOVE */
2167 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2169 /* X86_TUNE_SHIFT1 */
2172 /* X86_TUNE_USE_FFREEP */
2175 /* X86_TUNE_INTER_UNIT_MOVES */
2176 ~(m_AMD_MULTIPLE | m_GENERIC),
2178 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2179 ~(m_AMDFAM10 | m_BDVER ),
2181 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2182 than 4 branch instructions in the 16 byte window. */
2183 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2185 /* X86_TUNE_SCHEDULE */
2186 m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2188 /* X86_TUNE_USE_BT */
2189 m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2191 /* X86_TUNE_USE_INCDEC */
2192 ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
2194 /* X86_TUNE_PAD_RETURNS */
2195 m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
2197 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2200 /* X86_TUNE_EXT_80387_CONSTANTS */
2201 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2203 /* X86_TUNE_SHORTEN_X87_SSE */
2206 /* X86_TUNE_AVOID_VECTOR_DECODE */
2207 m_CORE2I7_64 | m_K8 | m_GENERIC64,
2209 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2210 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2213 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2214 vector path on AMD machines. */
2215 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC64,
2217 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2219 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC64,
2221 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2225 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2226 but one byte longer. */
2229 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2230 operand that cannot be represented using a modRM byte. The XOR
2231 replacement is long decoded, so this split helps here as well. */
2234 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2236 m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
2238 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2239 from integer to FP. */
2242 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2243 with a subsequent conditional jump instruction into a single
2244 compare-and-branch uop. */
2247 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2248 will impact LEA instruction selection. */
2251 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2255 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2256 at -O3. For the moment, the prefetching seems badly tuned for Intel
2258 m_K6_GEODE | m_AMD_MULTIPLE,
2260 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2261 the auto-vectorizer. */
2264 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2265 during reassociation of integer computation. */
2268 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2269 during reassociation of fp computation. */
2272 /* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
2273 regs instead of memory. */
2274 m_COREI7 | m_CORE2I7
2277 /* Feature tests against the various architecture variations. */
2278 unsigned char ix86_arch_features[X86_ARCH_LAST];
2280 /* Feature tests against the various architecture variations, used to create
2281 ix86_arch_features based on the processor mask. */
2282 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2283 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2284 ~(m_386 | m_486 | m_PENT | m_K6),
2286 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2289 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2292 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2295 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2299 static const unsigned int x86_accumulate_outgoing_args
2300 = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
2302 static const unsigned int x86_arch_always_fancy_math_387
2303 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
2305 static const unsigned int x86_avx256_split_unaligned_load
2306 = m_COREI7 | m_GENERIC;
2308 static const unsigned int x86_avx256_split_unaligned_store
2309 = m_COREI7 | m_BDVER | m_GENERIC;
2311 /* In case the average insn count for single function invocation is
2312 lower than this constant, emit fast (but longer) prologue and
2314 #define FAST_PROLOGUE_INSN_COUNT 20
2316 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2317 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2318 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2319 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2321 /* Array of the smallest class containing reg number REGNO, indexed by
2322 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2324 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2326 /* ax, dx, cx, bx */
2327 AREG, DREG, CREG, BREG,
2328 /* si, di, bp, sp */
2329 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2331 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2332 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2335 /* flags, fpsr, fpcr, frame */
2336 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2338 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2341 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2344 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2345 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2346 /* SSE REX registers */
2347 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2351 /* The "default" register map used in 32bit mode. */
2353 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2355 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2356 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2357 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2358 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2359 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2360 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2361 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2364 /* The "default" register map used in 64bit mode. */
2366 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2368 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2369 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2370 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2371 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2372 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2373 8,9,10,11,12,13,14,15, /* extended integer registers */
2374 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2377 /* Define the register numbers to be used in Dwarf debugging information.
2378 The SVR4 reference port C compiler uses the following register numbers
2379 in its Dwarf output code:
2380 0 for %eax (gcc regno = 0)
2381 1 for %ecx (gcc regno = 2)
2382 2 for %edx (gcc regno = 1)
2383 3 for %ebx (gcc regno = 3)
2384 4 for %esp (gcc regno = 7)
2385 5 for %ebp (gcc regno = 6)
2386 6 for %esi (gcc regno = 4)
2387 7 for %edi (gcc regno = 5)
2388 The following three DWARF register numbers are never generated by
2389 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2390 believes these numbers have these meanings.
2391 8 for %eip (no gcc equivalent)
2392 9 for %eflags (gcc regno = 17)
2393 10 for %trapno (no gcc equivalent)
2394 It is not at all clear how we should number the FP stack registers
2395 for the x86 architecture. If the version of SDB on x86/svr4 were
2396 a bit less brain dead with respect to floating-point then we would
2397 have a precedent to follow with respect to DWARF register numbers
2398 for x86 FP registers, but the SDB on x86/svr4 is so completely
2399 broken with respect to FP registers that it is hardly worth thinking
2400 of it as something to strive for compatibility with.
2401 The version of x86/svr4 SDB I have at the moment does (partially)
2402 seem to believe that DWARF register number 11 is associated with
2403 the x86 register %st(0), but that's about all. Higher DWARF
2404 register numbers don't seem to be associated with anything in
2405 particular, and even for DWARF regno 11, SDB only seems to under-
2406 stand that it should say that a variable lives in %st(0) (when
2407 asked via an `=' command) if we said it was in DWARF regno 11,
2408 but SDB still prints garbage when asked for the value of the
2409 variable in question (via a `/' command).
2410 (Also note that the labels SDB prints for various FP stack regs
2411 when doing an `x' command are all wrong.)
2412 Note that these problems generally don't affect the native SVR4
2413 C compiler because it doesn't allow the use of -O with -g and
2414 because when it is *not* optimizing, it allocates a memory
2415 location for each floating-point variable, and the memory
2416 location is what gets described in the DWARF AT_location
2417 attribute for the variable in question.
2418 Regardless of the severe mental illness of the x86/svr4 SDB, we
2419 do something sensible here and we use the following DWARF
2420 register numbers. Note that these are all stack-top-relative
2422 11 for %st(0) (gcc regno = 8)
2423 12 for %st(1) (gcc regno = 9)
2424 13 for %st(2) (gcc regno = 10)
2425 14 for %st(3) (gcc regno = 11)
2426 15 for %st(4) (gcc regno = 12)
2427 16 for %st(5) (gcc regno = 13)
2428 17 for %st(6) (gcc regno = 14)
2429 18 for %st(7) (gcc regno = 15)
2431 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2433 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2434 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2435 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2436 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2437 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2438 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2439 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2442 /* Define parameter passing and return registers. */
2444 static int const x86_64_int_parameter_registers[6] =
2446 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2449 static int const x86_64_ms_abi_int_parameter_registers[4] =
2451 CX_REG, DX_REG, R8_REG, R9_REG
2454 static int const x86_64_int_return_registers[4] =
2456 AX_REG, DX_REG, DI_REG, SI_REG
2459 /* Define the structure for the machine field in struct function. */
2461 struct GTY(()) stack_local_entry {
2462 unsigned short mode;
2465 struct stack_local_entry *next;
2468 /* Structure describing stack frame layout.
2469 Stack grows downward:
2475 saved static chain if ix86_static_chain_on_stack
2477 saved frame pointer if frame_pointer_needed
2478 <- HARD_FRAME_POINTER
2484 <- sse_regs_save_offset
2487 [va_arg registers] |
2491 [padding2] | = to_allocate
2500 int outgoing_arguments_size;
2502 /* The offsets relative to ARG_POINTER. */
2503 HOST_WIDE_INT frame_pointer_offset;
2504 HOST_WIDE_INT hard_frame_pointer_offset;
2505 HOST_WIDE_INT stack_pointer_offset;
2506 HOST_WIDE_INT hfp_save_offset;
2507 HOST_WIDE_INT reg_save_offset;
2508 HOST_WIDE_INT sse_reg_save_offset;
2510 /* When save_regs_using_mov is set, emit prologue using
2511 move instead of push instructions. */
2512 bool save_regs_using_mov;
2515 /* Which cpu are we scheduling for. */
2516 enum attr_cpu ix86_schedule;
2518 /* Which cpu are we optimizing for. */
2519 enum processor_type ix86_tune;
2521 /* Which instruction set architecture to use. */
2522 enum processor_type ix86_arch;
2524 /* True if processor has SSE prefetch instruction. */
2525 unsigned char x86_prefetch_sse;
2527 /* -mstackrealign option */
2528 static const char ix86_force_align_arg_pointer_string[]
2529 = "force_align_arg_pointer";
2531 static rtx (*ix86_gen_leave) (void);
2532 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2533 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2534 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2535 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2536 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2537 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2538 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2539 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2540 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2541 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2542 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2544 /* Preferred alignment for stack boundary in bits. */
2545 unsigned int ix86_preferred_stack_boundary;
2547 /* Alignment for incoming stack boundary in bits specified at
2549 static unsigned int ix86_user_incoming_stack_boundary;
2551 /* Default alignment for incoming stack boundary in bits. */
2552 static unsigned int ix86_default_incoming_stack_boundary;
2554 /* Alignment for incoming stack boundary in bits. */
2555 unsigned int ix86_incoming_stack_boundary;
2557 /* Calling abi specific va_list type nodes. */
2558 static GTY(()) tree sysv_va_list_type_node;
2559 static GTY(()) tree ms_va_list_type_node;
2561 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2562 char internal_label_prefix[16];
2563 int internal_label_prefix_len;
2565 /* Fence to use after loop using movnt. */
2568 /* Register class used for passing given 64bit part of the argument.
2569 These represent classes as documented by the PS ABI, with the exception
2570 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2571 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2573 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2574 whenever possible (upper half does contain padding). */
2575 enum x86_64_reg_class
2578 X86_64_INTEGER_CLASS,
2579 X86_64_INTEGERSI_CLASS,
2586 X86_64_COMPLEX_X87_CLASS,
2590 #define MAX_CLASSES 4
2592 /* Table of constants used by fldpi, fldln2, etc.... */
2593 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2594 static bool ext_80387_constants_init = 0;
2597 static struct machine_function * ix86_init_machine_status (void);
2598 static rtx ix86_function_value (const_tree, const_tree, bool);
2599 static bool ix86_function_value_regno_p (const unsigned int);
2600 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2602 static rtx ix86_static_chain (const_tree, bool);
2603 static int ix86_function_regparm (const_tree, const_tree);
2604 static void ix86_compute_frame_layout (struct ix86_frame *);
2605 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2607 static void ix86_add_new_builtins (HOST_WIDE_INT);
2608 static tree ix86_canonical_va_list_type (tree);
2609 static void predict_jump (int);
2610 static unsigned int split_stack_prologue_scratch_regno (void);
2611 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2613 enum ix86_function_specific_strings
2615 IX86_FUNCTION_SPECIFIC_ARCH,
2616 IX86_FUNCTION_SPECIFIC_TUNE,
2617 IX86_FUNCTION_SPECIFIC_MAX
2620 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2621 const char *, enum fpmath_unit, bool);
2622 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2623 static void ix86_function_specific_save (struct cl_target_option *);
2624 static void ix86_function_specific_restore (struct cl_target_option *);
2625 static void ix86_function_specific_print (FILE *, int,
2626 struct cl_target_option *);
2627 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2628 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2629 struct gcc_options *);
2630 static bool ix86_can_inline_p (tree, tree);
2631 static void ix86_set_current_function (tree);
2632 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2634 static enum calling_abi ix86_function_abi (const_tree);
2637 #ifndef SUBTARGET32_DEFAULT_CPU
2638 #define SUBTARGET32_DEFAULT_CPU "i386"
2641 /* The svr4 ABI for the i386 says that records and unions are returned
2643 #ifndef DEFAULT_PCC_STRUCT_RETURN
2644 #define DEFAULT_PCC_STRUCT_RETURN 1
2647 /* Whether -mtune= or -march= were specified */
2648 static int ix86_tune_defaulted;
2649 static int ix86_arch_specified;
2651 /* Vectorization library interface and handlers. */
2652 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2654 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2655 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2657 /* Processor target table, indexed by processor number */
2660 const struct processor_costs *cost; /* Processor costs */
2661 const int align_loop; /* Default alignments. */
2662 const int align_loop_max_skip;
2663 const int align_jump;
2664 const int align_jump_max_skip;
2665 const int align_func;
2668 static const struct ptt processor_target_table[PROCESSOR_max] =
2670 {&i386_cost, 4, 3, 4, 3, 4},
2671 {&i486_cost, 16, 15, 16, 15, 16},
2672 {&pentium_cost, 16, 7, 16, 7, 16},
2673 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2674 {&geode_cost, 0, 0, 0, 0, 0},
2675 {&k6_cost, 32, 7, 32, 7, 32},
2676 {&athlon_cost, 16, 7, 16, 7, 16},
2677 {&pentium4_cost, 0, 0, 0, 0, 0},
2678 {&k8_cost, 16, 7, 16, 7, 16},
2679 {&nocona_cost, 0, 0, 0, 0, 0},
2680 /* Core 2 32-bit. */
2681 {&generic32_cost, 16, 10, 16, 10, 16},
2682 /* Core 2 64-bit. */
2683 {&generic64_cost, 16, 10, 16, 10, 16},
2684 /* Core i7 32-bit. */
2685 {&generic32_cost, 16, 10, 16, 10, 16},
2686 /* Core i7 64-bit. */
2687 {&generic64_cost, 16, 10, 16, 10, 16},
2688 {&generic32_cost, 16, 7, 16, 7, 16},
2689 {&generic64_cost, 16, 10, 16, 10, 16},
2690 {&amdfam10_cost, 32, 24, 32, 7, 32},
2691 {&bdver1_cost, 32, 24, 32, 7, 32},
2692 {&bdver2_cost, 32, 24, 32, 7, 32},
2693 {&btver1_cost, 32, 24, 32, 7, 32},
2694 {&btver2_cost, 32, 24, 32, 7, 32},
2695 {&atom_cost, 16, 15, 16, 7, 16}
2698 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2729 /* Return true if a red-zone is in use. */
2732 ix86_using_red_zone (void)
2734 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2737 /* Return a string that documents the current -m options. The caller is
2738 responsible for freeing the string. */
2741 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2742 const char *tune, enum fpmath_unit fpmath,
2745 struct ix86_target_opts
2747 const char *option; /* option string */
2748 HOST_WIDE_INT mask; /* isa mask options */
2751 /* This table is ordered so that options like -msse4.2 that imply
2752 preceding options while match those first. */
2753 static struct ix86_target_opts isa_opts[] =
2755 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2756 { "-mfma", OPTION_MASK_ISA_FMA },
2757 { "-mxop", OPTION_MASK_ISA_XOP },
2758 { "-mlwp", OPTION_MASK_ISA_LWP },
2759 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2760 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2761 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2762 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2763 { "-msse3", OPTION_MASK_ISA_SSE3 },
2764 { "-msse2", OPTION_MASK_ISA_SSE2 },
2765 { "-msse", OPTION_MASK_ISA_SSE },
2766 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2767 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2768 { "-mmmx", OPTION_MASK_ISA_MMX },
2769 { "-mabm", OPTION_MASK_ISA_ABM },
2770 { "-mbmi", OPTION_MASK_ISA_BMI },
2771 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2772 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2773 { "-mhle", OPTION_MASK_ISA_HLE },
2774 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2775 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2776 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2777 { "-madx", OPTION_MASK_ISA_ADX },
2778 { "-mtbm", OPTION_MASK_ISA_TBM },
2779 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2780 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2781 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2782 { "-maes", OPTION_MASK_ISA_AES },
2783 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2784 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2785 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2786 { "-mf16c", OPTION_MASK_ISA_F16C },
2787 { "-mrtm", OPTION_MASK_ISA_RTM },
2788 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2789 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2793 static struct ix86_target_opts flag_opts[] =
2795 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2796 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2797 { "-m80387", MASK_80387 },
2798 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2799 { "-malign-double", MASK_ALIGN_DOUBLE },
2800 { "-mcld", MASK_CLD },
2801 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2802 { "-mieee-fp", MASK_IEEE_FP },
2803 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2804 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2805 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2806 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2807 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2808 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2809 { "-mno-red-zone", MASK_NO_RED_ZONE },
2810 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2811 { "-mrecip", MASK_RECIP },
2812 { "-mrtd", MASK_RTD },
2813 { "-msseregparm", MASK_SSEREGPARM },
2814 { "-mstack-arg-probe", MASK_STACK_PROBE },
2815 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2816 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2817 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2818 { "-mvzeroupper", MASK_VZEROUPPER },
2819 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2820 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2821 { "-mprefer-avx128", MASK_PREFER_AVX128},
2824 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2827 char target_other[40];
2837 memset (opts, '\0', sizeof (opts));
2839 /* Add -march= option. */
2842 opts[num][0] = "-march=";
2843 opts[num++][1] = arch;
2846 /* Add -mtune= option. */
2849 opts[num][0] = "-mtune=";
2850 opts[num++][1] = tune;
2853 /* Add -m32/-m64/-mx32. */
2854 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2856 if ((isa & OPTION_MASK_ABI_64) != 0)
2860 isa &= ~ (OPTION_MASK_ISA_64BIT
2861 | OPTION_MASK_ABI_64
2862 | OPTION_MASK_ABI_X32);
2866 opts[num++][0] = abi;
2868 /* Pick out the options in isa options. */
2869 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2871 if ((isa & isa_opts[i].mask) != 0)
2873 opts[num++][0] = isa_opts[i].option;
2874 isa &= ~ isa_opts[i].mask;
2878 if (isa && add_nl_p)
2880 opts[num++][0] = isa_other;
2881 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2885 /* Add flag options. */
2886 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2888 if ((flags & flag_opts[i].mask) != 0)
2890 opts[num++][0] = flag_opts[i].option;
2891 flags &= ~ flag_opts[i].mask;
2895 if (flags && add_nl_p)
2897 opts[num++][0] = target_other;
2898 sprintf (target_other, "(other flags: %#x)", flags);
2901 /* Add -fpmath= option. */
2904 opts[num][0] = "-mfpmath=";
2905 switch ((int) fpmath)
2908 opts[num++][1] = "387";
2912 opts[num++][1] = "sse";
2915 case FPMATH_387 | FPMATH_SSE:
2916 opts[num++][1] = "sse+387";
2928 gcc_assert (num < ARRAY_SIZE (opts));
2930 /* Size the string. */
2932 sep_len = (add_nl_p) ? 3 : 1;
2933 for (i = 0; i < num; i++)
2936 for (j = 0; j < 2; j++)
2938 len += strlen (opts[i][j]);
2941 /* Build the string. */
2942 ret = ptr = (char *) xmalloc (len);
2945 for (i = 0; i < num; i++)
2949 for (j = 0; j < 2; j++)
2950 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2957 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2965 for (j = 0; j < 2; j++)
2968 memcpy (ptr, opts[i][j], len2[j]);
2970 line_len += len2[j];
2975 gcc_assert (ret + len >= ptr);
2980 /* Return true, if profiling code should be emitted before
2981 prologue. Otherwise it returns false.
2982 Note: For x86 with "hotfix" it is sorried. */
2984 ix86_profile_before_prologue (void)
2986 return flag_fentry != 0;
2989 /* Function that is callable from the debugger to print the current
2992 ix86_debug_options (void)
2994 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2995 ix86_arch_string, ix86_tune_string,
3000 fprintf (stderr, "%s\n\n", opts);
3004 fputs ("<no options>\n\n", stderr);
3009 /* Override various settings based on options. If MAIN_ARGS_P, the
3010 options are from the command line, otherwise they are from
3014 ix86_option_override_internal (bool main_args_p)
3017 unsigned int ix86_arch_mask, ix86_tune_mask;
3018 const bool ix86_tune_specified = (ix86_tune_string != NULL);
3023 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3024 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3025 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3026 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3027 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3028 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3029 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3030 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3031 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3032 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3033 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3034 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3035 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3036 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3037 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3038 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3039 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3040 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3041 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3042 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3043 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3044 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3045 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3046 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3047 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3048 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3049 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3050 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3051 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3052 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3053 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3054 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3055 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3056 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3057 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3058 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3059 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3060 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3061 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3062 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3064 /* if this reaches 64, need to widen struct pta flags below */
3068 const char *const name; /* processor name or nickname. */
3069 const enum processor_type processor;
3070 const enum attr_cpu schedule;
3071 const unsigned HOST_WIDE_INT flags;
3073 const processor_alias_table[] =
3075 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3076 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3077 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3078 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3079 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3080 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3081 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3082 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3083 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
3084 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3085 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3086 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3087 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3088 PTA_MMX | PTA_SSE | PTA_FXSR},
3089 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3090 PTA_MMX | PTA_SSE | PTA_FXSR},
3091 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3092 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3093 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3094 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3095 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3096 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3097 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3098 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3099 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3100 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3101 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3102 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
3103 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3104 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR},
3105 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
3106 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3107 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16 | PTA_FXSR},
3108 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
3109 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3110 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
3111 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL
3112 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3113 {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
3114 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3115 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
3116 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
3117 | PTA_RDRND | PTA_F16C | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3118 {"core-avx2", PROCESSOR_COREI7_64, CPU_COREI7,
3119 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3120 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
3121 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
3122 | PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
3123 | PTA_FMA | PTA_MOVBE | PTA_RTM | PTA_HLE | PTA_FXSR | PTA_XSAVE
3125 {"atom", PROCESSOR_ATOM, CPU_ATOM,
3126 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3127 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE | PTA_FXSR},
3128 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3129 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3130 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3131 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3132 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3133 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3134 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3135 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3136 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3137 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3138 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3139 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3140 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3141 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3142 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3143 {"x86-64", PROCESSOR_K8, CPU_K8,
3144 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3145 {"k8", PROCESSOR_K8, CPU_K8,
3146 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3147 | PTA_SSE2 | PTA_NO_SAHF},
3148 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3149 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3150 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3151 {"opteron", PROCESSOR_K8, CPU_K8,
3152 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3153 | PTA_SSE2 | PTA_NO_SAHF},
3154 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3155 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3156 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3157 {"athlon64", PROCESSOR_K8, CPU_K8,
3158 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3159 | PTA_SSE2 | PTA_NO_SAHF},
3160 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3161 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3162 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3163 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3164 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3165 | PTA_SSE2 | PTA_NO_SAHF},
3166 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3167 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3168 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3169 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3170 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3171 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3172 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3173 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3174 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3175 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3176 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3178 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3179 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3180 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3181 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3182 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3183 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3184 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3185 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3186 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3187 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3188 {"btver2", PROCESSOR_BTVER2, CPU_GENERIC64,
3189 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3190 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3191 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3192 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3193 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3195 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3196 PTA_HLE /* flags are only used for -march switch. */ },
3197 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3199 | PTA_HLE /* flags are only used for -march switch. */ },
3202 /* -mrecip options. */
3205 const char *string; /* option name */
3206 unsigned int mask; /* mask bits to set */
3208 const recip_options[] =
3210 { "all", RECIP_MASK_ALL },
3211 { "none", RECIP_MASK_NONE },
3212 { "div", RECIP_MASK_DIV },
3213 { "sqrt", RECIP_MASK_SQRT },
3214 { "vec-div", RECIP_MASK_VEC_DIV },
3215 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3218 int const pta_size = ARRAY_SIZE (processor_alias_table);
3220 /* Set up prefix/suffix so the error messages refer to either the command
3221 line argument, or the attribute(target). */
3230 prefix = "option(\"";
3235 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3236 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3237 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT)
3238 ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3239 #ifdef TARGET_BI_ARCH
3242 #if TARGET_BI_ARCH == 1
3243 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3244 is on and OPTION_MASK_ABI_X32 is off. We turn off
3245 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3248 ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3250 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3251 on and OPTION_MASK_ABI_64 is off. We turn off
3252 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3255 ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3262 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3263 OPTION_MASK_ABI_64 for TARGET_X32. */
3264 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3265 ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3267 else if (TARGET_LP64)
3269 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3270 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3271 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3272 ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3275 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3276 SUBTARGET_OVERRIDE_OPTIONS;
3279 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3280 SUBSUBTARGET_OVERRIDE_OPTIONS;
3283 /* -fPIC is the default for x86_64. */
3284 if (TARGET_MACHO && TARGET_64BIT)
3287 /* Need to check -mtune=generic first. */
3288 if (ix86_tune_string)
3290 if (!strcmp (ix86_tune_string, "generic")
3291 || !strcmp (ix86_tune_string, "i686")
3292 /* As special support for cross compilers we read -mtune=native
3293 as -mtune=generic. With native compilers we won't see the
3294 -mtune=native, as it was changed by the driver. */
3295 || !strcmp (ix86_tune_string, "native"))
3298 ix86_tune_string = "generic64";
3300 ix86_tune_string = "generic32";
3302 /* If this call is for setting the option attribute, allow the
3303 generic32/generic64 that was previously set. */
3304 else if (!main_args_p
3305 && (!strcmp (ix86_tune_string, "generic32")
3306 || !strcmp (ix86_tune_string, "generic64")))
3308 else if (!strncmp (ix86_tune_string, "generic", 7))
3309 error ("bad value (%s) for %stune=%s %s",
3310 ix86_tune_string, prefix, suffix, sw);
3311 else if (!strcmp (ix86_tune_string, "x86-64"))
3312 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3313 "%stune=k8%s or %stune=generic%s instead as appropriate",
3314 prefix, suffix, prefix, suffix, prefix, suffix);
3318 if (ix86_arch_string)
3319 ix86_tune_string = ix86_arch_string;
3320 if (!ix86_tune_string)
3322 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3323 ix86_tune_defaulted = 1;
3326 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3327 need to use a sensible tune option. */
3328 if (!strcmp (ix86_tune_string, "generic")
3329 || !strcmp (ix86_tune_string, "x86-64")
3330 || !strcmp (ix86_tune_string, "i686"))
3333 ix86_tune_string = "generic64";
3335 ix86_tune_string = "generic32";
3339 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3341 /* rep; movq isn't available in 32-bit code. */
3342 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3343 ix86_stringop_alg = no_stringop;
3346 if (!ix86_arch_string)
3347 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3349 ix86_arch_specified = 1;
3351 if (global_options_set.x_ix86_pmode)
3353 if ((TARGET_LP64 && ix86_pmode == PMODE_SI)
3354 || (!TARGET_64BIT && ix86_pmode == PMODE_DI))
3355 error ("address mode %qs not supported in the %s bit mode",
3356 TARGET_64BIT ? "short" : "long",
3357 TARGET_64BIT ? "64" : "32");
3360 ix86_pmode = TARGET_LP64 ? PMODE_DI : PMODE_SI;
3362 if (!global_options_set.x_ix86_abi)
3363 ix86_abi = DEFAULT_ABI;
3365 if (global_options_set.x_ix86_cmodel)
3367 switch (ix86_cmodel)
3372 ix86_cmodel = CM_SMALL_PIC;
3374 error ("code model %qs not supported in the %s bit mode",
3381 ix86_cmodel = CM_MEDIUM_PIC;
3383 error ("code model %qs not supported in the %s bit mode",
3385 else if (TARGET_X32)
3386 error ("code model %qs not supported in x32 mode",
3393 ix86_cmodel = CM_LARGE_PIC;
3395 error ("code model %qs not supported in the %s bit mode",
3397 else if (TARGET_X32)
3398 error ("code model %qs not supported in x32 mode",
3404 error ("code model %s does not support PIC mode", "32");
3406 error ("code model %qs not supported in the %s bit mode",
3413 error ("code model %s does not support PIC mode", "kernel");
3414 ix86_cmodel = CM_32;
3417 error ("code model %qs not supported in the %s bit mode",
3427 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3428 use of rip-relative addressing. This eliminates fixups that
3429 would otherwise be needed if this object is to be placed in a
3430 DLL, and is essentially just as efficient as direct addressing. */
3431 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3432 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3433 else if (TARGET_64BIT)
3434 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3436 ix86_cmodel = CM_32;
3438 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3440 error ("-masm=intel not supported in this configuration");
3441 ix86_asm_dialect = ASM_ATT;
3443 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3444 sorry ("%i-bit mode not compiled in",
3445 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3447 for (i = 0; i < pta_size; i++)
3448 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3450 ix86_schedule = processor_alias_table[i].schedule;
3451 ix86_arch = processor_alias_table[i].processor;
3452 /* Default cpu tuning to the architecture. */
3453 ix86_tune = ix86_arch;
3455 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3456 error ("CPU you selected does not support x86-64 "
3459 if (processor_alias_table[i].flags & PTA_MMX
3460 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3461 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3462 if (processor_alias_table[i].flags & PTA_3DNOW
3463 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3464 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3465 if (processor_alias_table[i].flags & PTA_3DNOW_A
3466 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3467 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3468 if (processor_alias_table[i].flags & PTA_SSE
3469 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3470 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3471 if (processor_alias_table[i].flags & PTA_SSE2
3472 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3473 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3474 if (processor_alias_table[i].flags & PTA_SSE3
3475 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3476 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3477 if (processor_alias_table[i].flags & PTA_SSSE3
3478 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3479 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3480 if (processor_alias_table[i].flags & PTA_SSE4_1
3481 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3482 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3483 if (processor_alias_table[i].flags & PTA_SSE4_2
3484 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3485 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3486 if (processor_alias_table[i].flags & PTA_AVX
3487 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3488 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3489 if (processor_alias_table[i].flags & PTA_AVX2
3490 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3491 ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3492 if (processor_alias_table[i].flags & PTA_FMA
3493 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3494 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3495 if (processor_alias_table[i].flags & PTA_SSE4A
3496 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3497 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3498 if (processor_alias_table[i].flags & PTA_FMA4
3499 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3500 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3501 if (processor_alias_table[i].flags & PTA_XOP
3502 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3503 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3504 if (processor_alias_table[i].flags & PTA_LWP
3505 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3506 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3507 if (processor_alias_table[i].flags & PTA_ABM
3508 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3509 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3510 if (processor_alias_table[i].flags & PTA_BMI
3511 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3512 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3513 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3514 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3515 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3516 if (processor_alias_table[i].flags & PTA_TBM
3517 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3518 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3519 if (processor_alias_table[i].flags & PTA_BMI2
3520 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3521 ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3522 if (processor_alias_table[i].flags & PTA_CX16
3523 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3524 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3525 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3526 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3527 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3528 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3529 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3530 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3531 if (processor_alias_table[i].flags & PTA_MOVBE
3532 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3533 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3534 if (processor_alias_table[i].flags & PTA_AES
3535 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3536 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3537 if (processor_alias_table[i].flags & PTA_PCLMUL
3538 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3539 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3540 if (processor_alias_table[i].flags & PTA_FSGSBASE
3541 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3542 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3543 if (processor_alias_table[i].flags & PTA_RDRND
3544 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3545 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3546 if (processor_alias_table[i].flags & PTA_F16C
3547 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3548 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3549 if (processor_alias_table[i].flags & PTA_RTM
3550 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3551 ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3552 if (processor_alias_table[i].flags & PTA_HLE
3553 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3554 ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3555 if (processor_alias_table[i].flags & PTA_PRFCHW
3556 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3557 ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3558 if (processor_alias_table[i].flags & PTA_RDSEED
3559 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3560 ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3561 if (processor_alias_table[i].flags & PTA_ADX
3562 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3563 ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3564 if (processor_alias_table[i].flags & PTA_FXSR
3565 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3566 ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3567 if (processor_alias_table[i].flags & PTA_XSAVE
3568 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3569 ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3570 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3571 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3572 ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3573 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3574 x86_prefetch_sse = true;
3579 if (!strcmp (ix86_arch_string, "generic"))
3580 error ("generic CPU can be used only for %stune=%s %s",
3581 prefix, suffix, sw);
3582 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3583 error ("bad value (%s) for %sarch=%s %s",
3584 ix86_arch_string, prefix, suffix, sw);
3586 ix86_arch_mask = 1u << ix86_arch;
3587 for (i = 0; i < X86_ARCH_LAST; ++i)
3588 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3590 for (i = 0; i < pta_size; i++)
3591 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3593 ix86_schedule = processor_alias_table[i].schedule;
3594 ix86_tune = processor_alias_table[i].processor;
3597 if (!(processor_alias_table[i].flags & PTA_64BIT))
3599 if (ix86_tune_defaulted)
3601 ix86_tune_string = "x86-64";
3602 for (i = 0; i < pta_size; i++)
3603 if (! strcmp (ix86_tune_string,
3604 processor_alias_table[i].name))
3606 ix86_schedule = processor_alias_table[i].schedule;
3607 ix86_tune = processor_alias_table[i].processor;
3610 error ("CPU you selected does not support x86-64 "
3616 /* Adjust tuning when compiling for 32-bit ABI. */
3619 case PROCESSOR_GENERIC64:
3620 ix86_tune = PROCESSOR_GENERIC32;
3621 ix86_schedule = CPU_PENTIUMPRO;
3624 case PROCESSOR_CORE2_64:
3625 ix86_tune = PROCESSOR_CORE2_32;
3628 case PROCESSOR_COREI7_64:
3629 ix86_tune = PROCESSOR_COREI7_32;
3636 /* Intel CPUs have always interpreted SSE prefetch instructions as
3637 NOPs; so, we can enable SSE prefetch instructions even when
3638 -mtune (rather than -march) points us to a processor that has them.
3639 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3640 higher processors. */
3642 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3643 x86_prefetch_sse = true;
3647 if (ix86_tune_specified && i == pta_size)
3648 error ("bad value (%s) for %stune=%s %s",
3649 ix86_tune_string, prefix, suffix, sw);
3651 ix86_tune_mask = 1u << ix86_tune;
3652 for (i = 0; i < X86_TUNE_LAST; ++i)
3653 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3655 #ifndef USE_IX86_FRAME_POINTER
3656 #define USE_IX86_FRAME_POINTER 0
3659 #ifndef USE_X86_64_FRAME_POINTER
3660 #define USE_X86_64_FRAME_POINTER 0
3663 /* Set the default values for switches whose default depends on TARGET_64BIT
3664 in case they weren't overwritten by command line options. */
3667 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3668 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3669 if (flag_asynchronous_unwind_tables == 2)
3670 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3671 if (flag_pcc_struct_return == 2)
3672 flag_pcc_struct_return = 0;
3676 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3677 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3678 if (flag_asynchronous_unwind_tables == 2)
3679 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3680 if (flag_pcc_struct_return == 2)
3681 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3684 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3686 ix86_cost = &ix86_size_cost;
3688 ix86_cost = ix86_tune_cost;
3690 /* Arrange to set up i386_stack_locals for all functions. */
3691 init_machine_status = ix86_init_machine_status;
3693 /* Validate -mregparm= value. */
3694 if (global_options_set.x_ix86_regparm)
3697 warning (0, "-mregparm is ignored in 64-bit mode");
3698 if (ix86_regparm > REGPARM_MAX)
3700 error ("-mregparm=%d is not between 0 and %d",
3701 ix86_regparm, REGPARM_MAX);
3706 ix86_regparm = REGPARM_MAX;
3708 /* Default align_* from the processor table. */
3709 if (align_loops == 0)
3711 align_loops = processor_target_table[ix86_tune].align_loop;
3712 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3714 if (align_jumps == 0)
3716 align_jumps = processor_target_table[ix86_tune].align_jump;
3717 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3719 if (align_functions == 0)
3721 align_functions = processor_target_table[ix86_tune].align_func;
3724 /* Provide default for -mbranch-cost= value. */
3725 if (!global_options_set.x_ix86_branch_cost)
3726 ix86_branch_cost = ix86_cost->branch_cost;
3730 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3732 /* Enable by default the SSE and MMX builtins. Do allow the user to
3733 explicitly disable any of these. In particular, disabling SSE and
3734 MMX for kernel code is extremely useful. */
3735 if (!ix86_arch_specified)
3737 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3738 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3741 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3745 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3747 if (!ix86_arch_specified)
3749 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3751 /* i386 ABI does not specify red zone. It still makes sense to use it
3752 when programmer takes care to stack from being destroyed. */
3753 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3754 target_flags |= MASK_NO_RED_ZONE;
3757 /* Keep nonleaf frame pointers. */
3758 if (flag_omit_frame_pointer)
3759 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3760 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3761 flag_omit_frame_pointer = 1;
3763 /* If we're doing fast math, we don't care about comparison order
3764 wrt NaNs. This lets us use a shorter comparison sequence. */
3765 if (flag_finite_math_only)
3766 target_flags &= ~MASK_IEEE_FP;
3768 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3769 since the insns won't need emulation. */
3770 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3771 target_flags &= ~MASK_NO_FANCY_MATH_387;
3773 /* Likewise, if the target doesn't have a 387, or we've specified
3774 software floating point, don't use 387 inline intrinsics. */
3776 target_flags |= MASK_NO_FANCY_MATH_387;
3778 /* Turn on MMX builtins for -msse. */
3780 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3782 /* Enable SSE prefetch. */
3783 if (TARGET_SSE || TARGET_PRFCHW)
3784 x86_prefetch_sse = true;
3786 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3787 if (TARGET_SSE4_2 || TARGET_ABM)
3788 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3790 /* Turn on lzcnt instruction for -mabm. */
3792 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
3794 /* Validate -mpreferred-stack-boundary= value or default it to
3795 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3796 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3797 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3799 int min = (TARGET_64BIT ? (TARGET_SSE ? 4 : 3) : 2);
3800 int max = (TARGET_SEH ? 4 : 12);
3802 if (ix86_preferred_stack_boundary_arg < min
3803 || ix86_preferred_stack_boundary_arg > max)
3806 error ("-mpreferred-stack-boundary is not supported "
3809 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3810 ix86_preferred_stack_boundary_arg, min, max);
3813 ix86_preferred_stack_boundary
3814 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3817 /* Set the default value for -mstackrealign. */
3818 if (ix86_force_align_arg_pointer == -1)
3819 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3821 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3823 /* Validate -mincoming-stack-boundary= value or default it to
3824 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3825 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3826 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3828 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3829 || ix86_incoming_stack_boundary_arg > 12)
3830 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3831 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3834 ix86_user_incoming_stack_boundary
3835 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3836 ix86_incoming_stack_boundary
3837 = ix86_user_incoming_stack_boundary;
3841 /* Accept -msseregparm only if at least SSE support is enabled. */
3842 if (TARGET_SSEREGPARM
3844 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3846 if (global_options_set.x_ix86_fpmath)
3848 if (ix86_fpmath & FPMATH_SSE)
3852 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3853 ix86_fpmath = FPMATH_387;
3855 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3857 warning (0, "387 instruction set disabled, using SSE arithmetics");
3858 ix86_fpmath = FPMATH_SSE;
3863 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3865 /* If the i387 is disabled, then do not return values in it. */
3867 target_flags &= ~MASK_FLOAT_RETURNS;
3869 /* Use external vectorized library in vectorizing intrinsics. */
3870 if (global_options_set.x_ix86_veclibabi_type)
3871 switch (ix86_veclibabi_type)
3873 case ix86_veclibabi_type_svml:
3874 ix86_veclib_handler = ix86_veclibabi_svml;
3877 case ix86_veclibabi_type_acml:
3878 ix86_veclib_handler = ix86_veclibabi_acml;
3885 if ((!USE_IX86_FRAME_POINTER
3886 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3887 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3889 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3891 /* ??? Unwind info is not correct around the CFG unless either a frame
3892 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3893 unwind info generation to be aware of the CFG and propagating states
3895 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3896 || flag_exceptions || flag_non_call_exceptions)
3897 && flag_omit_frame_pointer
3898 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3900 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3901 warning (0, "unwind tables currently require either a frame pointer "
3902 "or %saccumulate-outgoing-args%s for correctness",
3904 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3907 /* If stack probes are required, the space used for large function
3908 arguments on the stack must also be probed, so enable
3909 -maccumulate-outgoing-args so this happens in the prologue. */
3910 if (TARGET_STACK_PROBE
3911 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3913 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3914 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3915 "for correctness", prefix, suffix);
3916 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3919 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3922 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3923 p = strchr (internal_label_prefix, 'X');
3924 internal_label_prefix_len = p - internal_label_prefix;
3928 /* When scheduling description is not available, disable scheduler pass
3929 so it won't slow down the compilation and make x87 code slower. */
3930 if (!TARGET_SCHEDULE)
3931 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3933 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3934 ix86_tune_cost->simultaneous_prefetches,
3935 global_options.x_param_values,
3936 global_options_set.x_param_values);
3937 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3938 ix86_tune_cost->prefetch_block,
3939 global_options.x_param_values,
3940 global_options_set.x_param_values);
3941 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3942 ix86_tune_cost->l1_cache_size,
3943 global_options.x_param_values,
3944 global_options_set.x_param_values);
3945 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
3946 ix86_tune_cost->l2_cache_size,
3947 global_options.x_param_values,
3948 global_options_set.x_param_values);
3950 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3951 if (flag_prefetch_loop_arrays < 0
3954 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3955 flag_prefetch_loop_arrays = 1;
3957 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3958 can be optimized to ap = __builtin_next_arg (0). */
3959 if (!TARGET_64BIT && !flag_split_stack)
3960 targetm.expand_builtin_va_start = NULL;
3964 ix86_gen_leave = gen_leave_rex64;
3965 if (Pmode == DImode)
3967 ix86_gen_monitor = gen_sse3_monitor64_di;
3968 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
3969 ix86_gen_tls_local_dynamic_base_64
3970 = gen_tls_local_dynamic_base_64_di;
3974 ix86_gen_monitor = gen_sse3_monitor64_si;
3975 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
3976 ix86_gen_tls_local_dynamic_base_64
3977 = gen_tls_local_dynamic_base_64_si;
3982 ix86_gen_leave = gen_leave;
3983 ix86_gen_monitor = gen_sse3_monitor;
3986 if (Pmode == DImode)
3988 ix86_gen_add3 = gen_adddi3;
3989 ix86_gen_sub3 = gen_subdi3;
3990 ix86_gen_sub3_carry = gen_subdi3_carry;
3991 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3992 ix86_gen_andsp = gen_anddi3;
3993 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3994 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3995 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3999 ix86_gen_add3 = gen_addsi3;
4000 ix86_gen_sub3 = gen_subsi3;
4001 ix86_gen_sub3_carry = gen_subsi3_carry;
4002 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4003 ix86_gen_andsp = gen_andsi3;
4004 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4005 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4006 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4010 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4012 target_flags |= MASK_CLD & ~target_flags_explicit;
4015 if (!TARGET_64BIT && flag_pic)
4017 if (flag_fentry > 0)
4018 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4022 else if (TARGET_SEH)
4024 if (flag_fentry == 0)
4025 sorry ("-mno-fentry isn%'t compatible with SEH");
4028 else if (flag_fentry < 0)
4030 #if defined(PROFILE_BEFORE_PROLOGUE)
4039 /* When not optimize for size, enable vzeroupper optimization for
4040 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4041 AVX unaligned load/store. */
4044 if (flag_expensive_optimizations
4045 && !(target_flags_explicit & MASK_VZEROUPPER))
4046 target_flags |= MASK_VZEROUPPER;
4047 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
4048 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4049 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4050 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
4051 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4052 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4053 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
4054 if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
4055 target_flags |= MASK_PREFER_AVX128;
4060 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4061 target_flags &= ~MASK_VZEROUPPER;
4064 if (ix86_recip_name)
4066 char *p = ASTRDUP (ix86_recip_name);
4068 unsigned int mask, i;
4071 while ((q = strtok (p, ",")) != NULL)
4082 if (!strcmp (q, "default"))
4083 mask = RECIP_MASK_ALL;
4086 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4087 if (!strcmp (q, recip_options[i].string))
4089 mask = recip_options[i].mask;
4093 if (i == ARRAY_SIZE (recip_options))
4095 error ("unknown option for -mrecip=%s", q);
4097 mask = RECIP_MASK_NONE;
4101 recip_mask_explicit |= mask;
4103 recip_mask &= ~mask;
4110 recip_mask |= RECIP_MASK_ALL & ~recip_mask_explicit;
4111 else if (target_flags_explicit & MASK_RECIP)
4112 recip_mask &= ~(RECIP_MASK_ALL & ~recip_mask_explicit);
4114 /* Default long double to 64-bit for Bionic. */
4115 if (TARGET_HAS_BIONIC
4116 && !(target_flags_explicit & MASK_LONG_DOUBLE_64))
4117 target_flags |= MASK_LONG_DOUBLE_64;
4119 /* Save the initial options in case the user does function specific
4122 target_option_default_node = target_option_current_node
4123 = build_target_option_node ();
4126 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4129 function_pass_avx256_p (const_rtx val)
4134 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
4137 if (GET_CODE (val) == PARALLEL)
4142 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
4144 r = XVECEXP (val, 0, i);
4145 if (GET_CODE (r) == EXPR_LIST
4147 && REG_P (XEXP (r, 0))
4148 && (GET_MODE (XEXP (r, 0)) == OImode
4149 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
4157 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4160 ix86_option_override (void)
4162 ix86_option_override_internal (true);
4165 /* Update register usage after having seen the compiler flags. */
4168 ix86_conditional_register_usage (void)
4173 /* The PIC register, if it exists, is fixed. */
4174 j = PIC_OFFSET_TABLE_REGNUM;
4175 if (j != INVALID_REGNUM)
4176 fixed_regs[j] = call_used_regs[j] = 1;
4178 /* For 32-bit targets, squash the REX registers. */
4181 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4182 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4183 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4184 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4187 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4188 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4189 : TARGET_64BIT ? (1 << 2)
4192 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4194 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4196 /* Set/reset conditionally defined registers from
4197 CALL_USED_REGISTERS initializer. */
4198 if (call_used_regs[i] > 1)
4199 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4201 /* Calculate registers of CLOBBERED_REGS register set
4202 as call used registers from GENERAL_REGS register set. */
4203 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4204 && call_used_regs[i])
4205 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4208 /* If MMX is disabled, squash the registers. */
4210 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4211 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4212 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4214 /* If SSE is disabled, squash the registers. */
4216 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4217 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4218 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4220 /* If the FPU is disabled, squash the registers. */
4221 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4222 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4223 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4224 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4228 /* Save the current options */
4231 ix86_function_specific_save (struct cl_target_option *ptr)
4233 ptr->arch = ix86_arch;
4234 ptr->schedule = ix86_schedule;
4235 ptr->tune = ix86_tune;
4236 ptr->branch_cost = ix86_branch_cost;
4237 ptr->tune_defaulted = ix86_tune_defaulted;
4238 ptr->arch_specified = ix86_arch_specified;
4239 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4240 ptr->ix86_target_flags_explicit = target_flags_explicit;
4241 ptr->x_recip_mask_explicit = recip_mask_explicit;
4243 /* The fields are char but the variables are not; make sure the
4244 values fit in the fields. */
4245 gcc_assert (ptr->arch == ix86_arch);
4246 gcc_assert (ptr->schedule == ix86_schedule);
4247 gcc_assert (ptr->tune == ix86_tune);
4248 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4251 /* Restore the current options */
4254 ix86_function_specific_restore (struct cl_target_option *ptr)
4256 enum processor_type old_tune = ix86_tune;
4257 enum processor_type old_arch = ix86_arch;
4258 unsigned int ix86_arch_mask, ix86_tune_mask;
4261 ix86_arch = (enum processor_type) ptr->arch;
4262 ix86_schedule = (enum attr_cpu) ptr->schedule;
4263 ix86_tune = (enum processor_type) ptr->tune;
4264 ix86_branch_cost = ptr->branch_cost;
4265 ix86_tune_defaulted = ptr->tune_defaulted;
4266 ix86_arch_specified = ptr->arch_specified;
4267 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4268 target_flags_explicit = ptr->ix86_target_flags_explicit;
4269 recip_mask_explicit = ptr->x_recip_mask_explicit;
4271 /* Recreate the arch feature tests if the arch changed */
4272 if (old_arch != ix86_arch)
4274 ix86_arch_mask = 1u << ix86_arch;
4275 for (i = 0; i < X86_ARCH_LAST; ++i)
4276 ix86_arch_features[i]
4277 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4280 /* Recreate the tune optimization tests */
4281 if (old_tune != ix86_tune)
4283 ix86_tune_mask = 1u << ix86_tune;
4284 for (i = 0; i < X86_TUNE_LAST; ++i)
4285 ix86_tune_features[i]
4286 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4290 /* Print the current options */
4293 ix86_function_specific_print (FILE *file, int indent,
4294 struct cl_target_option *ptr)
4297 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4298 NULL, NULL, ptr->x_ix86_fpmath, false);
4300 fprintf (file, "%*sarch = %d (%s)\n",
4303 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4304 ? cpu_names[ptr->arch]
4307 fprintf (file, "%*stune = %d (%s)\n",
4310 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4311 ? cpu_names[ptr->tune]
4314 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4318 fprintf (file, "%*s%s\n", indent, "", target_string);
4319 free (target_string);
4324 /* Inner function to process the attribute((target(...))), take an argument and
4325 set the current options from the argument. If we have a list, recursively go
4329 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4330 struct gcc_options *enum_opts_set)
4335 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4336 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4337 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4338 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4339 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4355 enum ix86_opt_type type;
4360 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4361 IX86_ATTR_ISA ("abm", OPT_mabm),
4362 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4363 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4364 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4365 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4366 IX86_ATTR_ISA ("aes", OPT_maes),
4367 IX86_ATTR_ISA ("avx", OPT_mavx),
4368 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4369 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4370 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4371 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4372 IX86_ATTR_ISA ("sse", OPT_msse),
4373 IX86_ATTR_ISA ("sse2", OPT_msse2),
4374 IX86_ATTR_ISA ("sse3", OPT_msse3),
4375 IX86_ATTR_ISA ("sse4", OPT_msse4),
4376 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4377 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4378 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4379 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4380 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4381 IX86_ATTR_ISA ("fma", OPT_mfma),
4382 IX86_ATTR_ISA ("xop", OPT_mxop),
4383 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4384 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4385 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4386 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4387 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4388 IX86_ATTR_ISA ("hle", OPT_mhle),
4389 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4390 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4391 IX86_ATTR_ISA ("adx", OPT_madx),
4392 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4393 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4394 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4397 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4399 /* string options */
4400 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4401 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4404 IX86_ATTR_YES ("cld",
4408 IX86_ATTR_NO ("fancy-math-387",
4409 OPT_mfancy_math_387,
4410 MASK_NO_FANCY_MATH_387),
4412 IX86_ATTR_YES ("ieee-fp",
4416 IX86_ATTR_YES ("inline-all-stringops",
4417 OPT_minline_all_stringops,
4418 MASK_INLINE_ALL_STRINGOPS),
4420 IX86_ATTR_YES ("inline-stringops-dynamically",
4421 OPT_minline_stringops_dynamically,
4422 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4424 IX86_ATTR_NO ("align-stringops",
4425 OPT_mno_align_stringops,
4426 MASK_NO_ALIGN_STRINGOPS),
4428 IX86_ATTR_YES ("recip",
4434 /* If this is a list, recurse to get the options. */
4435 if (TREE_CODE (args) == TREE_LIST)
4439 for (; args; args = TREE_CHAIN (args))
4440 if (TREE_VALUE (args)
4441 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4442 p_strings, enum_opts_set))
4448 else if (TREE_CODE (args) != STRING_CST)
4451 /* Handle multiple arguments separated by commas. */
4452 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4454 while (next_optstr && *next_optstr != '\0')
4456 char *p = next_optstr;
4458 char *comma = strchr (next_optstr, ',');
4459 const char *opt_string;
4460 size_t len, opt_len;
4465 enum ix86_opt_type type = ix86_opt_unknown;
4471 len = comma - next_optstr;
4472 next_optstr = comma + 1;
4480 /* Recognize no-xxx. */
4481 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4490 /* Find the option. */
4493 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4495 type = attrs[i].type;
4496 opt_len = attrs[i].len;
4497 if (ch == attrs[i].string[0]
4498 && ((type != ix86_opt_str && type != ix86_opt_enum)
4501 && memcmp (p, attrs[i].string, opt_len) == 0)
4504 mask = attrs[i].mask;
4505 opt_string = attrs[i].string;
4510 /* Process the option. */
4513 error ("attribute(target(\"%s\")) is unknown", orig_p);
4517 else if (type == ix86_opt_isa)
4519 struct cl_decoded_option decoded;
4521 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4522 ix86_handle_option (&global_options, &global_options_set,
4523 &decoded, input_location);
4526 else if (type == ix86_opt_yes || type == ix86_opt_no)
4528 if (type == ix86_opt_no)
4529 opt_set_p = !opt_set_p;
4532 target_flags |= mask;
4534 target_flags &= ~mask;
4537 else if (type == ix86_opt_str)
4541 error ("option(\"%s\") was already specified", opt_string);
4545 p_strings[opt] = xstrdup (p + opt_len);
4548 else if (type == ix86_opt_enum)
4553 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4555 set_option (&global_options, enum_opts_set, opt, value,
4556 p + opt_len, DK_UNSPECIFIED, input_location,
4560 error ("attribute(target(\"%s\")) is unknown", orig_p);
4572 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4575 ix86_valid_target_attribute_tree (tree args)
4577 const char *orig_arch_string = ix86_arch_string;
4578 const char *orig_tune_string = ix86_tune_string;
4579 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4580 int orig_tune_defaulted = ix86_tune_defaulted;
4581 int orig_arch_specified = ix86_arch_specified;
4582 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4585 struct cl_target_option *def
4586 = TREE_TARGET_OPTION (target_option_default_node);
4587 struct gcc_options enum_opts_set;
4589 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4591 /* Process each of the options on the chain. */
4592 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4596 /* If the changed options are different from the default, rerun
4597 ix86_option_override_internal, and then save the options away.
4598 The string options are are attribute options, and will be undone
4599 when we copy the save structure. */
4600 if (ix86_isa_flags != def->x_ix86_isa_flags
4601 || target_flags != def->x_target_flags
4602 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4603 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4604 || enum_opts_set.x_ix86_fpmath)
4606 /* If we are using the default tune= or arch=, undo the string assigned,
4607 and use the default. */
4608 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4609 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4610 else if (!orig_arch_specified)
4611 ix86_arch_string = NULL;
4613 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4614 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4615 else if (orig_tune_defaulted)
4616 ix86_tune_string = NULL;
4618 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4619 if (enum_opts_set.x_ix86_fpmath)
4620 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4621 else if (!TARGET_64BIT && TARGET_SSE)
4623 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4624 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4627 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4628 ix86_option_override_internal (false);
4630 /* Add any builtin functions with the new isa if any. */
4631 ix86_add_new_builtins (ix86_isa_flags);
4633 /* Save the current options unless we are validating options for
4635 t = build_target_option_node ();
4637 ix86_arch_string = orig_arch_string;
4638 ix86_tune_string = orig_tune_string;
4639 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4641 /* Free up memory allocated to hold the strings */
4642 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4643 free (option_strings[i]);
4649 /* Hook to validate attribute((target("string"))). */
4652 ix86_valid_target_attribute_p (tree fndecl,
4653 tree ARG_UNUSED (name),
4655 int ARG_UNUSED (flags))
4657 struct cl_target_option cur_target;
4659 tree old_optimize = build_optimization_node ();
4660 tree new_target, new_optimize;
4661 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4663 /* If the function changed the optimization levels as well as setting target
4664 options, start with the optimizations specified. */
4665 if (func_optimize && func_optimize != old_optimize)
4666 cl_optimization_restore (&global_options,
4667 TREE_OPTIMIZATION (func_optimize));
4669 /* The target attributes may also change some optimization flags, so update
4670 the optimization options if necessary. */
4671 cl_target_option_save (&cur_target, &global_options);
4672 new_target = ix86_valid_target_attribute_tree (args);
4673 new_optimize = build_optimization_node ();
4680 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4682 if (old_optimize != new_optimize)
4683 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4686 cl_target_option_restore (&global_options, &cur_target);
4688 if (old_optimize != new_optimize)
4689 cl_optimization_restore (&global_options,
4690 TREE_OPTIMIZATION (old_optimize));
4696 /* Hook to determine if one function can safely inline another. */
4699 ix86_can_inline_p (tree caller, tree callee)
4702 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4703 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4705 /* If callee has no option attributes, then it is ok to inline. */
4709 /* If caller has no option attributes, but callee does then it is not ok to
4711 else if (!caller_tree)
4716 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4717 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4719 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4720 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4722 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4723 != callee_opts->x_ix86_isa_flags)
4726 /* See if we have the same non-isa options. */
4727 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4730 /* See if arch, tune, etc. are the same. */
4731 else if (caller_opts->arch != callee_opts->arch)
4734 else if (caller_opts->tune != callee_opts->tune)
4737 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4740 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4751 /* Remember the last target of ix86_set_current_function. */
4752 static GTY(()) tree ix86_previous_fndecl;
4754 /* Establish appropriate back-end context for processing the function
4755 FNDECL. The argument might be NULL to indicate processing at top
4756 level, outside of any function scope. */
4758 ix86_set_current_function (tree fndecl)
4760 /* Only change the context if the function changes. This hook is called
4761 several times in the course of compiling a function, and we don't want to
4762 slow things down too much or call target_reinit when it isn't safe. */
4763 if (fndecl && fndecl != ix86_previous_fndecl)
4765 tree old_tree = (ix86_previous_fndecl
4766 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4769 tree new_tree = (fndecl
4770 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4773 ix86_previous_fndecl = fndecl;
4774 if (old_tree == new_tree)
4779 cl_target_option_restore (&global_options,
4780 TREE_TARGET_OPTION (new_tree));
4786 struct cl_target_option *def
4787 = TREE_TARGET_OPTION (target_option_current_node);
4789 cl_target_option_restore (&global_options, def);
4796 /* Return true if this goes in large data/bss. */
4799 ix86_in_large_data_p (tree exp)
4801 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4804 /* Functions are never large data. */
4805 if (TREE_CODE (exp) == FUNCTION_DECL)
4808 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4810 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4811 if (strcmp (section, ".ldata") == 0
4812 || strcmp (section, ".lbss") == 0)
4818 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4820 /* If this is an incomplete type with size 0, then we can't put it
4821 in data because it might be too big when completed. */
4822 if (!size || size > ix86_section_threshold)
4829 /* Switch to the appropriate section for output of DECL.
4830 DECL is either a `VAR_DECL' node or a constant of some sort.
4831 RELOC indicates whether forming the initial value of DECL requires
4832 link-time relocations. */
4834 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4838 x86_64_elf_select_section (tree decl, int reloc,
4839 unsigned HOST_WIDE_INT align)
4841 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4842 && ix86_in_large_data_p (decl))
4844 const char *sname = NULL;
4845 unsigned int flags = SECTION_WRITE;
4846 switch (categorize_decl_for_section (decl, reloc))
4851 case SECCAT_DATA_REL:
4852 sname = ".ldata.rel";
4854 case SECCAT_DATA_REL_LOCAL:
4855 sname = ".ldata.rel.local";
4857 case SECCAT_DATA_REL_RO:
4858 sname = ".ldata.rel.ro";
4860 case SECCAT_DATA_REL_RO_LOCAL:
4861 sname = ".ldata.rel.ro.local";
4865 flags |= SECTION_BSS;
4868 case SECCAT_RODATA_MERGE_STR:
4869 case SECCAT_RODATA_MERGE_STR_INIT:
4870 case SECCAT_RODATA_MERGE_CONST:
4874 case SECCAT_SRODATA:
4881 /* We don't split these for medium model. Place them into
4882 default sections and hope for best. */
4887 /* We might get called with string constants, but get_named_section
4888 doesn't like them as they are not DECLs. Also, we need to set
4889 flags in that case. */
4891 return get_section (sname, flags, NULL);
4892 return get_named_section (decl, sname, reloc);
4895 return default_elf_select_section (decl, reloc, align);
4898 /* Build up a unique section name, expressed as a
4899 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4900 RELOC indicates whether the initial value of EXP requires
4901 link-time relocations. */
4903 static void ATTRIBUTE_UNUSED
4904 x86_64_elf_unique_section (tree decl, int reloc)
4906 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4907 && ix86_in_large_data_p (decl))
4909 const char *prefix = NULL;
4910 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4911 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4913 switch (categorize_decl_for_section (decl, reloc))
4916 case SECCAT_DATA_REL:
4917 case SECCAT_DATA_REL_LOCAL:
4918 case SECCAT_DATA_REL_RO:
4919 case SECCAT_DATA_REL_RO_LOCAL:
4920 prefix = one_only ? ".ld" : ".ldata";
4923 prefix = one_only ? ".lb" : ".lbss";
4926 case SECCAT_RODATA_MERGE_STR:
4927 case SECCAT_RODATA_MERGE_STR_INIT:
4928 case SECCAT_RODATA_MERGE_CONST:
4929 prefix = one_only ? ".lr" : ".lrodata";
4931 case SECCAT_SRODATA:
4938 /* We don't split these for medium model. Place them into
4939 default sections and hope for best. */
4944 const char *name, *linkonce;
4947 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4948 name = targetm.strip_name_encoding (name);
4950 /* If we're using one_only, then there needs to be a .gnu.linkonce
4951 prefix to the section name. */
4952 linkonce = one_only ? ".gnu.linkonce" : "";
4954 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4956 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4960 default_unique_section (decl, reloc);
4963 #ifdef COMMON_ASM_OP
4964 /* This says how to output assembler code to declare an
4965 uninitialized external linkage data object.
4967 For medium model x86-64 we need to use .largecomm opcode for
4970 x86_elf_aligned_common (FILE *file,
4971 const char *name, unsigned HOST_WIDE_INT size,
4974 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4975 && size > (unsigned int)ix86_section_threshold)
4976 fputs (".largecomm\t", file);
4978 fputs (COMMON_ASM_OP, file);
4979 assemble_name (file, name);
4980 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4981 size, align / BITS_PER_UNIT);
4985 /* Utility function for targets to use in implementing
4986 ASM_OUTPUT_ALIGNED_BSS. */
4989 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4990 const char *name, unsigned HOST_WIDE_INT size,
4993 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4994 && size > (unsigned int)ix86_section_threshold)
4995 switch_to_section (get_named_section (decl, ".lbss", 0));
4997 switch_to_section (bss_section);
4998 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4999 #ifdef ASM_DECLARE_OBJECT_NAME
5000 last_assemble_variable_decl = decl;
5001 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5003 /* Standard thing is just output label for the object. */
5004 ASM_OUTPUT_LABEL (file, name);
5005 #endif /* ASM_DECLARE_OBJECT_NAME */
5006 ASM_OUTPUT_SKIP (file, size ? size : 1);
5009 /* Decide whether we must probe the stack before any space allocation
5010 on this target. It's essentially TARGET_STACK_PROBE except when
5011 -fstack-check causes the stack to be already probed differently. */
5014 ix86_target_stack_probe (void)
5016 /* Do not probe the stack twice if static stack checking is enabled. */
5017 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5020 return TARGET_STACK_PROBE;
5023 /* Decide whether we can make a sibling call to a function. DECL is the
5024 declaration of the function being targeted by the call and EXP is the
5025 CALL_EXPR representing the call. */
5028 ix86_function_ok_for_sibcall (tree decl, tree exp)
5030 tree type, decl_or_type;
5033 /* If we are generating position-independent code, we cannot sibcall
5034 optimize any indirect call, or a direct call to a global function,
5035 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5039 && (!decl || !targetm.binds_local_p (decl)))
5042 /* If we need to align the outgoing stack, then sibcalling would
5043 unalign the stack, which may break the called function. */
5044 if (ix86_minimum_incoming_stack_boundary (true)
5045 < PREFERRED_STACK_BOUNDARY)
5050 decl_or_type = decl;
5051 type = TREE_TYPE (decl);
5055 /* We're looking at the CALL_EXPR, we need the type of the function. */
5056 type = CALL_EXPR_FN (exp); /* pointer expression */
5057 type = TREE_TYPE (type); /* pointer type */
5058 type = TREE_TYPE (type); /* function type */
5059 decl_or_type = type;
5062 /* Check that the return value locations are the same. Like
5063 if we are returning floats on the 80387 register stack, we cannot
5064 make a sibcall from a function that doesn't return a float to a
5065 function that does or, conversely, from a function that does return
5066 a float to a function that doesn't; the necessary stack adjustment
5067 would not be executed. This is also the place we notice
5068 differences in the return value ABI. Note that it is ok for one
5069 of the functions to have void return type as long as the return
5070 value of the other is passed in a register. */
5071 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5072 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5074 if (STACK_REG_P (a) || STACK_REG_P (b))
5076 if (!rtx_equal_p (a, b))
5079 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5081 /* Disable sibcall if we need to generate vzeroupper after
5083 if (TARGET_VZEROUPPER
5084 && cfun->machine->callee_return_avx256_p
5085 && !cfun->machine->caller_return_avx256_p)
5088 else if (!rtx_equal_p (a, b))
5093 /* The SYSV ABI has more call-clobbered registers;
5094 disallow sibcalls from MS to SYSV. */
5095 if (cfun->machine->call_abi == MS_ABI
5096 && ix86_function_type_abi (type) == SYSV_ABI)
5101 /* If this call is indirect, we'll need to be able to use a
5102 call-clobbered register for the address of the target function.
5103 Make sure that all such registers are not used for passing
5104 parameters. Note that DLLIMPORT functions are indirect. */
5106 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5108 if (ix86_function_regparm (type, NULL) >= 3)
5110 /* ??? Need to count the actual number of registers to be used,
5111 not the possible number of registers. Fix later. */
5117 /* Otherwise okay. That also includes certain types of indirect calls. */
5121 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5122 and "sseregparm" calling convention attributes;
5123 arguments as in struct attribute_spec.handler. */
5126 ix86_handle_cconv_attribute (tree *node, tree name,
5128 int flags ATTRIBUTE_UNUSED,
5131 if (TREE_CODE (*node) != FUNCTION_TYPE
5132 && TREE_CODE (*node) != METHOD_TYPE
5133 && TREE_CODE (*node) != FIELD_DECL
5134 && TREE_CODE (*node) != TYPE_DECL)
5136 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5138 *no_add_attrs = true;
5142 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5143 if (is_attribute_p ("regparm", name))
5147 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5149 error ("fastcall and regparm attributes are not compatible");
5152 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5154 error ("regparam and thiscall attributes are not compatible");
5157 cst = TREE_VALUE (args);
5158 if (TREE_CODE (cst) != INTEGER_CST)
5160 warning (OPT_Wattributes,
5161 "%qE attribute requires an integer constant argument",
5163 *no_add_attrs = true;
5165 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5167 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5169 *no_add_attrs = true;
5177 /* Do not warn when emulating the MS ABI. */
5178 if ((TREE_CODE (*node) != FUNCTION_TYPE
5179 && TREE_CODE (*node) != METHOD_TYPE)
5180 || ix86_function_type_abi (*node) != MS_ABI)
5181 warning (OPT_Wattributes, "%qE attribute ignored",
5183 *no_add_attrs = true;
5187 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5188 if (is_attribute_p ("fastcall", name))
5190 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5192 error ("fastcall and cdecl attributes are not compatible");
5194 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5196 error ("fastcall and stdcall attributes are not compatible");
5198 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5200 error ("fastcall and regparm attributes are not compatible");
5202 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5204 error ("fastcall and thiscall attributes are not compatible");
5208 /* Can combine stdcall with fastcall (redundant), regparm and
5210 else if (is_attribute_p ("stdcall", name))
5212 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5214 error ("stdcall and cdecl attributes are not compatible");
5216 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5218 error ("stdcall and fastcall attributes are not compatible");
5220 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5222 error ("stdcall and thiscall attributes are not compatible");
5226 /* Can combine cdecl with regparm and sseregparm. */
5227 else if (is_attribute_p ("cdecl", name))
5229 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5231 error ("stdcall and cdecl attributes are not compatible");
5233 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5235 error ("fastcall and cdecl attributes are not compatible");
5237 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5239 error ("cdecl and thiscall attributes are not compatible");
5242 else if (is_attribute_p ("thiscall", name))
5244 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5245 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5247 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5249 error ("stdcall and thiscall attributes are not compatible");
5251 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5253 error ("fastcall and thiscall attributes are not compatible");
5255 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5257 error ("cdecl and thiscall attributes are not compatible");
5261 /* Can combine sseregparm with all attributes. */
5266 /* The transactional memory builtins are implicitly regparm or fastcall
5267 depending on the ABI. Override the generic do-nothing attribute that
5268 these builtins were declared with, and replace it with one of the two
5269 attributes that we expect elsewhere. */
5272 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5273 tree args ATTRIBUTE_UNUSED,
5274 int flags ATTRIBUTE_UNUSED,
5279 /* In no case do we want to add the placeholder attribute. */
5280 *no_add_attrs = true;
5282 /* The 64-bit ABI is unchanged for transactional memory. */
5286 /* ??? Is there a better way to validate 32-bit windows? We have
5287 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5288 if (CHECK_STACK_LIMIT > 0)
5289 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5292 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5293 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5295 decl_attributes (node, alt, flags);
5300 /* This function determines from TYPE the calling-convention. */
5303 ix86_get_callcvt (const_tree type)
5305 unsigned int ret = 0;
5310 return IX86_CALLCVT_CDECL;
5312 attrs = TYPE_ATTRIBUTES (type);
5313 if (attrs != NULL_TREE)
5315 if (lookup_attribute ("cdecl", attrs))
5316 ret |= IX86_CALLCVT_CDECL;
5317 else if (lookup_attribute ("stdcall", attrs))
5318 ret |= IX86_CALLCVT_STDCALL;
5319 else if (lookup_attribute ("fastcall", attrs))
5320 ret |= IX86_CALLCVT_FASTCALL;
5321 else if (lookup_attribute ("thiscall", attrs))
5322 ret |= IX86_CALLCVT_THISCALL;
5324 /* Regparam isn't allowed for thiscall and fastcall. */
5325 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5327 if (lookup_attribute ("regparm", attrs))
5328 ret |= IX86_CALLCVT_REGPARM;
5329 if (lookup_attribute ("sseregparm", attrs))
5330 ret |= IX86_CALLCVT_SSEREGPARM;
5333 if (IX86_BASE_CALLCVT(ret) != 0)
5337 is_stdarg = stdarg_p (type);
5338 if (TARGET_RTD && !is_stdarg)
5339 return IX86_CALLCVT_STDCALL | ret;
5343 || TREE_CODE (type) != METHOD_TYPE
5344 || ix86_function_type_abi (type) != MS_ABI)
5345 return IX86_CALLCVT_CDECL | ret;
5347 return IX86_CALLCVT_THISCALL;
5350 /* Return 0 if the attributes for two types are incompatible, 1 if they
5351 are compatible, and 2 if they are nearly compatible (which causes a
5352 warning to be generated). */
5355 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5357 unsigned int ccvt1, ccvt2;
5359 if (TREE_CODE (type1) != FUNCTION_TYPE
5360 && TREE_CODE (type1) != METHOD_TYPE)
5363 ccvt1 = ix86_get_callcvt (type1);
5364 ccvt2 = ix86_get_callcvt (type2);
5367 if (ix86_function_regparm (type1, NULL)
5368 != ix86_function_regparm (type2, NULL))
5374 /* Return the regparm value for a function with the indicated TYPE and DECL.
5375 DECL may be NULL when calling function indirectly
5376 or considering a libcall. */
5379 ix86_function_regparm (const_tree type, const_tree decl)
5386 return (ix86_function_type_abi (type) == SYSV_ABI
5387 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5388 ccvt = ix86_get_callcvt (type);
5389 regparm = ix86_regparm;
5391 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5393 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5396 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5400 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5402 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5405 /* Use register calling convention for local functions when possible. */
5407 && TREE_CODE (decl) == FUNCTION_DECL
5409 && !(profile_flag && !flag_fentry))
5411 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5412 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5413 if (i && i->local && i->can_change_signature)
5415 int local_regparm, globals = 0, regno;
5417 /* Make sure no regparm register is taken by a
5418 fixed register variable. */
5419 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5420 if (fixed_regs[local_regparm])
5423 /* We don't want to use regparm(3) for nested functions as
5424 these use a static chain pointer in the third argument. */
5425 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5428 /* In 32-bit mode save a register for the split stack. */
5429 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5432 /* Each fixed register usage increases register pressure,
5433 so less registers should be used for argument passing.
5434 This functionality can be overriden by an explicit
5436 for (regno = AX_REG; regno <= DI_REG; regno++)
5437 if (fixed_regs[regno])
5441 = globals < local_regparm ? local_regparm - globals : 0;
5443 if (local_regparm > regparm)
5444 regparm = local_regparm;
5451 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5452 DFmode (2) arguments in SSE registers for a function with the
5453 indicated TYPE and DECL. DECL may be NULL when calling function
5454 indirectly or considering a libcall. Otherwise return 0. */
5457 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5459 gcc_assert (!TARGET_64BIT);
5461 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5462 by the sseregparm attribute. */
5463 if (TARGET_SSEREGPARM
5464 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5471 error ("calling %qD with attribute sseregparm without "
5472 "SSE/SSE2 enabled", decl);
5474 error ("calling %qT with attribute sseregparm without "
5475 "SSE/SSE2 enabled", type);
5483 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5484 (and DFmode for SSE2) arguments in SSE registers. */
5485 if (decl && TARGET_SSE_MATH && optimize
5486 && !(profile_flag && !flag_fentry))
5488 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5489 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5490 if (i && i->local && i->can_change_signature)
5491 return TARGET_SSE2 ? 2 : 1;
5497 /* Return true if EAX is live at the start of the function. Used by
5498 ix86_expand_prologue to determine if we need special help before
5499 calling allocate_stack_worker. */
5502 ix86_eax_live_at_start_p (void)
5504 /* Cheat. Don't bother working forward from ix86_function_regparm
5505 to the function type to whether an actual argument is located in
5506 eax. Instead just look at cfg info, which is still close enough
5507 to correct at this point. This gives false positives for broken
5508 functions that might use uninitialized data that happens to be
5509 allocated in eax, but who cares? */
5510 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5514 ix86_keep_aggregate_return_pointer (tree fntype)
5520 attr = lookup_attribute ("callee_pop_aggregate_return",
5521 TYPE_ATTRIBUTES (fntype));
5523 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5525 /* For 32-bit MS-ABI the default is to keep aggregate
5527 if (ix86_function_type_abi (fntype) == MS_ABI)
5530 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5533 /* Value is the number of bytes of arguments automatically
5534 popped when returning from a subroutine call.
5535 FUNDECL is the declaration node of the function (as a tree),
5536 FUNTYPE is the data type of the function (as a tree),
5537 or for a library call it is an identifier node for the subroutine name.
5538 SIZE is the number of bytes of arguments passed on the stack.
5540 On the 80386, the RTD insn may be used to pop them if the number
5541 of args is fixed, but if the number is variable then the caller
5542 must pop them all. RTD can't be used for library calls now
5543 because the library is compiled with the Unix compiler.
5544 Use of RTD is a selectable option, since it is incompatible with
5545 standard Unix calling sequences. If the option is not selected,
5546 the caller must always pop the args.
5548 The attribute stdcall is equivalent to RTD on a per module basis. */
5551 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5555 /* None of the 64-bit ABIs pop arguments. */
5559 ccvt = ix86_get_callcvt (funtype);
5561 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5562 | IX86_CALLCVT_THISCALL)) != 0
5563 && ! stdarg_p (funtype))
5566 /* Lose any fake structure return argument if it is passed on the stack. */
5567 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5568 && !ix86_keep_aggregate_return_pointer (funtype))
5570 int nregs = ix86_function_regparm (funtype, fundecl);
5572 return GET_MODE_SIZE (Pmode);
5578 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5581 ix86_legitimate_combined_insn (rtx insn)
5583 /* Check operand constraints in case hard registers were propagated
5584 into insn pattern. This check prevents combine pass from
5585 generating insn patterns with invalid hard register operands.
5586 These invalid insns can eventually confuse reload to error out
5587 with a spill failure. See also PRs 46829 and 46843. */
5588 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5592 extract_insn (insn);
5593 preprocess_constraints ();
5595 for (i = 0; i < recog_data.n_operands; i++)
5597 rtx op = recog_data.operand[i];
5598 enum machine_mode mode = GET_MODE (op);
5599 struct operand_alternative *op_alt;
5604 /* A unary operator may be accepted by the predicate, but it
5605 is irrelevant for matching constraints. */
5609 if (GET_CODE (op) == SUBREG)
5611 if (REG_P (SUBREG_REG (op))
5612 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5613 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5614 GET_MODE (SUBREG_REG (op)),
5617 op = SUBREG_REG (op);
5620 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5623 op_alt = recog_op_alt[i];
5625 /* Operand has no constraints, anything is OK. */
5626 win = !recog_data.n_alternatives;
5628 for (j = 0; j < recog_data.n_alternatives; j++)
5630 if (op_alt[j].anything_ok
5631 || (op_alt[j].matches != -1
5633 (recog_data.operand[i],
5634 recog_data.operand[op_alt[j].matches]))
5635 || reg_fits_class_p (op, op_alt[j].cl, offset, mode))
5650 /* Argument support functions. */
5652 /* Return true when register may be used to pass function parameters. */
5654 ix86_function_arg_regno_p (int regno)
5657 const int *parm_regs;
5662 return (regno < REGPARM_MAX
5663 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5665 return (regno < REGPARM_MAX
5666 || (TARGET_MMX && MMX_REGNO_P (regno)
5667 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5668 || (TARGET_SSE && SSE_REGNO_P (regno)
5669 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5674 if (SSE_REGNO_P (regno) && TARGET_SSE)
5679 if (TARGET_SSE && SSE_REGNO_P (regno)
5680 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5684 /* TODO: The function should depend on current function ABI but
5685 builtins.c would need updating then. Therefore we use the
5688 /* RAX is used as hidden argument to va_arg functions. */
5689 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5692 if (ix86_abi == MS_ABI)
5693 parm_regs = x86_64_ms_abi_int_parameter_registers;
5695 parm_regs = x86_64_int_parameter_registers;
5696 for (i = 0; i < (ix86_abi == MS_ABI
5697 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5698 if (regno == parm_regs[i])
5703 /* Return if we do not know how to pass TYPE solely in registers. */
5706 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5708 if (must_pass_in_stack_var_size_or_pad (mode, type))
5711 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5712 The layout_type routine is crafty and tries to trick us into passing
5713 currently unsupported vector types on the stack by using TImode. */
5714 return (!TARGET_64BIT && mode == TImode
5715 && type && TREE_CODE (type) != VECTOR_TYPE);
5718 /* It returns the size, in bytes, of the area reserved for arguments passed
5719 in registers for the function represented by fndecl dependent to the used
5722 ix86_reg_parm_stack_space (const_tree fndecl)
5724 enum calling_abi call_abi = SYSV_ABI;
5725 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5726 call_abi = ix86_function_abi (fndecl);
5728 call_abi = ix86_function_type_abi (fndecl);
5729 if (TARGET_64BIT && call_abi == MS_ABI)
5734 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5737 ix86_function_type_abi (const_tree fntype)
5739 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5741 enum calling_abi abi = ix86_abi;
5742 if (abi == SYSV_ABI)
5744 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5747 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5755 ix86_function_ms_hook_prologue (const_tree fn)
5757 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5759 if (decl_function_context (fn) != NULL_TREE)
5760 error_at (DECL_SOURCE_LOCATION (fn),
5761 "ms_hook_prologue is not compatible with nested function");
5768 static enum calling_abi
5769 ix86_function_abi (const_tree fndecl)
5773 return ix86_function_type_abi (TREE_TYPE (fndecl));
5776 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5779 ix86_cfun_abi (void)
5783 return cfun->machine->call_abi;
5786 /* Write the extra assembler code needed to declare a function properly. */
5789 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5792 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5796 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5797 unsigned int filler_cc = 0xcccccccc;
5799 for (i = 0; i < filler_count; i += 4)
5800 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5803 #ifdef SUBTARGET_ASM_UNWIND_INIT
5804 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5807 ASM_OUTPUT_LABEL (asm_out_file, fname);
5809 /* Output magic byte marker, if hot-patch attribute is set. */
5814 /* leaq [%rsp + 0], %rsp */
5815 asm_fprintf (asm_out_file, ASM_BYTE
5816 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5820 /* movl.s %edi, %edi
5822 movl.s %esp, %ebp */
5823 asm_fprintf (asm_out_file, ASM_BYTE
5824 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5830 extern void init_regs (void);
5832 /* Implementation of call abi switching target hook. Specific to FNDECL
5833 the specific call register sets are set. See also
5834 ix86_conditional_register_usage for more details. */
5836 ix86_call_abi_override (const_tree fndecl)
5838 if (fndecl == NULL_TREE)
5839 cfun->machine->call_abi = ix86_abi;
5841 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5844 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5845 expensive re-initialization of init_regs each time we switch function context
5846 since this is needed only during RTL expansion. */
5848 ix86_maybe_switch_abi (void)
5851 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5855 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5856 for a call to a function whose data type is FNTYPE.
5857 For a library call, FNTYPE is 0. */
5860 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5861 tree fntype, /* tree ptr for function decl */
5862 rtx libname, /* SYMBOL_REF of library name or 0 */
5866 struct cgraph_local_info *i;
5869 memset (cum, 0, sizeof (*cum));
5871 /* Initialize for the current callee. */
5874 cfun->machine->callee_pass_avx256_p = false;
5875 cfun->machine->callee_return_avx256_p = false;
5880 i = cgraph_local_info (fndecl);
5881 cum->call_abi = ix86_function_abi (fndecl);
5882 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5887 cum->call_abi = ix86_function_type_abi (fntype);
5889 fnret_type = TREE_TYPE (fntype);
5894 if (TARGET_VZEROUPPER && fnret_type)
5896 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5898 if (function_pass_avx256_p (fnret_value))
5900 /* The return value of this function uses 256bit AVX modes. */
5902 cfun->machine->callee_return_avx256_p = true;
5904 cfun->machine->caller_return_avx256_p = true;
5908 cum->caller = caller;
5910 /* Set up the number of registers to use for passing arguments. */
5912 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5913 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5914 "or subtarget optimization implying it");
5915 cum->nregs = ix86_regparm;
5918 cum->nregs = (cum->call_abi == SYSV_ABI
5919 ? X86_64_REGPARM_MAX
5920 : X86_64_MS_REGPARM_MAX);
5924 cum->sse_nregs = SSE_REGPARM_MAX;
5927 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5928 ? X86_64_SSE_REGPARM_MAX
5929 : X86_64_MS_SSE_REGPARM_MAX);
5933 cum->mmx_nregs = MMX_REGPARM_MAX;
5934 cum->warn_avx = true;
5935 cum->warn_sse = true;
5936 cum->warn_mmx = true;
5938 /* Because type might mismatch in between caller and callee, we need to
5939 use actual type of function for local calls.
5940 FIXME: cgraph_analyze can be told to actually record if function uses
5941 va_start so for local functions maybe_vaarg can be made aggressive
5943 FIXME: once typesytem is fixed, we won't need this code anymore. */
5944 if (i && i->local && i->can_change_signature)
5945 fntype = TREE_TYPE (fndecl);
5946 cum->maybe_vaarg = (fntype
5947 ? (!prototype_p (fntype) || stdarg_p (fntype))
5952 /* If there are variable arguments, then we won't pass anything
5953 in registers in 32-bit mode. */
5954 if (stdarg_p (fntype))
5965 /* Use ecx and edx registers if function has fastcall attribute,
5966 else look for regparm information. */
5969 unsigned int ccvt = ix86_get_callcvt (fntype);
5970 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5973 cum->fastcall = 1; /* Same first register as in fastcall. */
5975 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5981 cum->nregs = ix86_function_regparm (fntype, fndecl);
5984 /* Set up the number of SSE registers used for passing SFmode
5985 and DFmode arguments. Warn for mismatching ABI. */
5986 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5990 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5991 But in the case of vector types, it is some vector mode.
5993 When we have only some of our vector isa extensions enabled, then there
5994 are some modes for which vector_mode_supported_p is false. For these
5995 modes, the generic vector support in gcc will choose some non-vector mode
5996 in order to implement the type. By computing the natural mode, we'll
5997 select the proper ABI location for the operand and not depend on whatever
5998 the middle-end decides to do with these vector types.
6000 The midde-end can't deal with the vector types > 16 bytes. In this
6001 case, we return the original mode and warn ABI change if CUM isn't
6004 static enum machine_mode
6005 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
6007 enum machine_mode mode = TYPE_MODE (type);
6009 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6011 HOST_WIDE_INT size = int_size_in_bytes (type);
6012 if ((size == 8 || size == 16 || size == 32)
6013 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6014 && TYPE_VECTOR_SUBPARTS (type) > 1)
6016 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6018 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6019 mode = MIN_MODE_VECTOR_FLOAT;
6021 mode = MIN_MODE_VECTOR_INT;
6023 /* Get the mode which has this inner mode and number of units. */
6024 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6025 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6026 && GET_MODE_INNER (mode) == innermode)
6028 if (size == 32 && !TARGET_AVX)
6030 static bool warnedavx;
6037 warning (0, "AVX vector argument without AVX "
6038 "enabled changes the ABI");
6040 return TYPE_MODE (type);
6042 else if ((size == 8 || size == 16) && !TARGET_SSE)
6044 static bool warnedsse;
6051 warning (0, "SSE vector argument without SSE "
6052 "enabled changes the ABI");
6067 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6068 this may not agree with the mode that the type system has chosen for the
6069 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6070 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6073 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6078 if (orig_mode != BLKmode)
6079 tmp = gen_rtx_REG (orig_mode, regno);
6082 tmp = gen_rtx_REG (mode, regno);
6083 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6084 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6090 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6091 of this code is to classify each 8bytes of incoming argument by the register
6092 class and assign registers accordingly. */
6094 /* Return the union class of CLASS1 and CLASS2.
6095 See the x86-64 PS ABI for details. */
6097 static enum x86_64_reg_class
6098 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6100 /* Rule #1: If both classes are equal, this is the resulting class. */
6101 if (class1 == class2)
6104 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6106 if (class1 == X86_64_NO_CLASS)
6108 if (class2 == X86_64_NO_CLASS)
6111 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6112 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6113 return X86_64_MEMORY_CLASS;
6115 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6116 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6117 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6118 return X86_64_INTEGERSI_CLASS;
6119 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6120 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6121 return X86_64_INTEGER_CLASS;
6123 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6125 if (class1 == X86_64_X87_CLASS
6126 || class1 == X86_64_X87UP_CLASS
6127 || class1 == X86_64_COMPLEX_X87_CLASS
6128 || class2 == X86_64_X87_CLASS
6129 || class2 == X86_64_X87UP_CLASS
6130 || class2 == X86_64_COMPLEX_X87_CLASS)
6131 return X86_64_MEMORY_CLASS;
6133 /* Rule #6: Otherwise class SSE is used. */
6134 return X86_64_SSE_CLASS;
6137 /* Classify the argument of type TYPE and mode MODE.
6138 CLASSES will be filled by the register class used to pass each word
6139 of the operand. The number of words is returned. In case the parameter
6140 should be passed in memory, 0 is returned. As a special case for zero
6141 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6143 BIT_OFFSET is used internally for handling records and specifies offset
6144 of the offset in bits modulo 256 to avoid overflow cases.
6146 See the x86-64 PS ABI for details.
6150 classify_argument (enum machine_mode mode, const_tree type,
6151 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6153 HOST_WIDE_INT bytes =
6154 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6156 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6158 /* Variable sized entities are always passed/returned in memory. */
6162 if (mode != VOIDmode
6163 && targetm.calls.must_pass_in_stack (mode, type))
6166 if (type && AGGREGATE_TYPE_P (type))
6170 enum x86_64_reg_class subclasses[MAX_CLASSES];
6172 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6176 for (i = 0; i < words; i++)
6177 classes[i] = X86_64_NO_CLASS;
6179 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6180 signalize memory class, so handle it as special case. */
6183 classes[0] = X86_64_NO_CLASS;
6187 /* Classify each field of record and merge classes. */
6188 switch (TREE_CODE (type))
6191 /* And now merge the fields of structure. */
6192 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6194 if (TREE_CODE (field) == FIELD_DECL)
6198 if (TREE_TYPE (field) == error_mark_node)
6201 /* Bitfields are always classified as integer. Handle them
6202 early, since later code would consider them to be
6203 misaligned integers. */
6204 if (DECL_BIT_FIELD (field))
6206 for (i = (int_bit_position (field)
6207 + (bit_offset % 64)) / 8 / 8;
6208 i < ((int_bit_position (field) + (bit_offset % 64))
6209 + tree_low_cst (DECL_SIZE (field), 0)
6212 merge_classes (X86_64_INTEGER_CLASS,
6219 type = TREE_TYPE (field);
6221 /* Flexible array member is ignored. */
6222 if (TYPE_MODE (type) == BLKmode
6223 && TREE_CODE (type) == ARRAY_TYPE
6224 && TYPE_SIZE (type) == NULL_TREE
6225 && TYPE_DOMAIN (type) != NULL_TREE
6226 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6231 if (!warned && warn_psabi)
6234 inform (input_location,
6235 "the ABI of passing struct with"
6236 " a flexible array member has"
6237 " changed in GCC 4.4");
6241 num = classify_argument (TYPE_MODE (type), type,
6243 (int_bit_position (field)
6244 + bit_offset) % 256);
6247 pos = (int_bit_position (field)
6248 + (bit_offset % 64)) / 8 / 8;
6249 for (i = 0; i < num && (i + pos) < words; i++)
6251 merge_classes (subclasses[i], classes[i + pos]);
6258 /* Arrays are handled as small records. */
6261 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6262 TREE_TYPE (type), subclasses, bit_offset);
6266 /* The partial classes are now full classes. */
6267 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6268 subclasses[0] = X86_64_SSE_CLASS;
6269 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6270 && !((bit_offset % 64) == 0 && bytes == 4))
6271 subclasses[0] = X86_64_INTEGER_CLASS;
6273 for (i = 0; i < words; i++)
6274 classes[i] = subclasses[i % num];
6279 case QUAL_UNION_TYPE:
6280 /* Unions are similar to RECORD_TYPE but offset is always 0.
6282 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6284 if (TREE_CODE (field) == FIELD_DECL)
6288 if (TREE_TYPE (field) == error_mark_node)
6291 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6292 TREE_TYPE (field), subclasses,
6296 for (i = 0; i < num; i++)
6297 classes[i] = merge_classes (subclasses[i], classes[i]);
6308 /* When size > 16 bytes, if the first one isn't
6309 X86_64_SSE_CLASS or any other ones aren't
6310 X86_64_SSEUP_CLASS, everything should be passed in
6312 if (classes[0] != X86_64_SSE_CLASS)
6315 for (i = 1; i < words; i++)
6316 if (classes[i] != X86_64_SSEUP_CLASS)
6320 /* Final merger cleanup. */
6321 for (i = 0; i < words; i++)
6323 /* If one class is MEMORY, everything should be passed in
6325 if (classes[i] == X86_64_MEMORY_CLASS)
6328 /* The X86_64_SSEUP_CLASS should be always preceded by
6329 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6330 if (classes[i] == X86_64_SSEUP_CLASS
6331 && classes[i - 1] != X86_64_SSE_CLASS
6332 && classes[i - 1] != X86_64_SSEUP_CLASS)
6334 /* The first one should never be X86_64_SSEUP_CLASS. */
6335 gcc_assert (i != 0);
6336 classes[i] = X86_64_SSE_CLASS;
6339 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6340 everything should be passed in memory. */
6341 if (classes[i] == X86_64_X87UP_CLASS
6342 && (classes[i - 1] != X86_64_X87_CLASS))
6346 /* The first one should never be X86_64_X87UP_CLASS. */
6347 gcc_assert (i != 0);
6348 if (!warned && warn_psabi)
6351 inform (input_location,
6352 "the ABI of passing union with long double"
6353 " has changed in GCC 4.4");
6361 /* Compute alignment needed. We align all types to natural boundaries with
6362 exception of XFmode that is aligned to 64bits. */
6363 if (mode != VOIDmode && mode != BLKmode)
6365 int mode_alignment = GET_MODE_BITSIZE (mode);
6368 mode_alignment = 128;
6369 else if (mode == XCmode)
6370 mode_alignment = 256;
6371 if (COMPLEX_MODE_P (mode))
6372 mode_alignment /= 2;
6373 /* Misaligned fields are always returned in memory. */
6374 if (bit_offset % mode_alignment)
6378 /* for V1xx modes, just use the base mode */
6379 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6380 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6381 mode = GET_MODE_INNER (mode);
6383 /* Classification of atomic types. */
6388 classes[0] = X86_64_SSE_CLASS;
6391 classes[0] = X86_64_SSE_CLASS;
6392 classes[1] = X86_64_SSEUP_CLASS;
6402 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6406 classes[0] = X86_64_INTEGERSI_CLASS;
6409 else if (size <= 64)
6411 classes[0] = X86_64_INTEGER_CLASS;
6414 else if (size <= 64+32)
6416 classes[0] = X86_64_INTEGER_CLASS;
6417 classes[1] = X86_64_INTEGERSI_CLASS;
6420 else if (size <= 64+64)
6422 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6430 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6434 /* OImode shouldn't be used directly. */
6439 if (!(bit_offset % 64))
6440 classes[0] = X86_64_SSESF_CLASS;
6442 classes[0] = X86_64_SSE_CLASS;
6445 classes[0] = X86_64_SSEDF_CLASS;
6448 classes[0] = X86_64_X87_CLASS;
6449 classes[1] = X86_64_X87UP_CLASS;
6452 classes[0] = X86_64_SSE_CLASS;
6453 classes[1] = X86_64_SSEUP_CLASS;
6456 classes[0] = X86_64_SSE_CLASS;
6457 if (!(bit_offset % 64))
6463 if (!warned && warn_psabi)
6466 inform (input_location,
6467 "the ABI of passing structure with complex float"
6468 " member has changed in GCC 4.4");
6470 classes[1] = X86_64_SSESF_CLASS;
6474 classes[0] = X86_64_SSEDF_CLASS;
6475 classes[1] = X86_64_SSEDF_CLASS;
6478 classes[0] = X86_64_COMPLEX_X87_CLASS;
6481 /* This modes is larger than 16 bytes. */
6489 classes[0] = X86_64_SSE_CLASS;
6490 classes[1] = X86_64_SSEUP_CLASS;
6491 classes[2] = X86_64_SSEUP_CLASS;
6492 classes[3] = X86_64_SSEUP_CLASS;
6500 classes[0] = X86_64_SSE_CLASS;
6501 classes[1] = X86_64_SSEUP_CLASS;
6509 classes[0] = X86_64_SSE_CLASS;
6515 gcc_assert (VECTOR_MODE_P (mode));
6520 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6522 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6523 classes[0] = X86_64_INTEGERSI_CLASS;
6525 classes[0] = X86_64_INTEGER_CLASS;
6526 classes[1] = X86_64_INTEGER_CLASS;
6527 return 1 + (bytes > 8);
6531 /* Examine the argument and return set number of register required in each
6532 class. Return 0 iff parameter should be passed in memory. */
6534 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6535 int *int_nregs, int *sse_nregs)
6537 enum x86_64_reg_class regclass[MAX_CLASSES];
6538 int n = classify_argument (mode, type, regclass, 0);
6544 for (n--; n >= 0; n--)
6545 switch (regclass[n])
6547 case X86_64_INTEGER_CLASS:
6548 case X86_64_INTEGERSI_CLASS:
6551 case X86_64_SSE_CLASS:
6552 case X86_64_SSESF_CLASS:
6553 case X86_64_SSEDF_CLASS:
6556 case X86_64_NO_CLASS:
6557 case X86_64_SSEUP_CLASS:
6559 case X86_64_X87_CLASS:
6560 case X86_64_X87UP_CLASS:
6564 case X86_64_COMPLEX_X87_CLASS:
6565 return in_return ? 2 : 0;
6566 case X86_64_MEMORY_CLASS:
6572 /* Construct container for the argument used by GCC interface. See
6573 FUNCTION_ARG for the detailed description. */
6576 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6577 const_tree type, int in_return, int nintregs, int nsseregs,
6578 const int *intreg, int sse_regno)
6580 /* The following variables hold the static issued_error state. */
6581 static bool issued_sse_arg_error;
6582 static bool issued_sse_ret_error;
6583 static bool issued_x87_ret_error;
6585 enum machine_mode tmpmode;
6587 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6588 enum x86_64_reg_class regclass[MAX_CLASSES];
6592 int needed_sseregs, needed_intregs;
6593 rtx exp[MAX_CLASSES];
6596 n = classify_argument (mode, type, regclass, 0);
6599 if (!examine_argument (mode, type, in_return, &needed_intregs,
6602 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6605 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6606 some less clueful developer tries to use floating-point anyway. */
6607 if (needed_sseregs && !TARGET_SSE)
6611 if (!issued_sse_ret_error)
6613 error ("SSE register return with SSE disabled");
6614 issued_sse_ret_error = true;
6617 else if (!issued_sse_arg_error)
6619 error ("SSE register argument with SSE disabled");
6620 issued_sse_arg_error = true;
6625 /* Likewise, error if the ABI requires us to return values in the
6626 x87 registers and the user specified -mno-80387. */
6627 if (!TARGET_80387 && in_return)
6628 for (i = 0; i < n; i++)
6629 if (regclass[i] == X86_64_X87_CLASS
6630 || regclass[i] == X86_64_X87UP_CLASS
6631 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6633 if (!issued_x87_ret_error)
6635 error ("x87 register return with x87 disabled");
6636 issued_x87_ret_error = true;
6641 /* First construct simple cases. Avoid SCmode, since we want to use
6642 single register to pass this type. */
6643 if (n == 1 && mode != SCmode)
6644 switch (regclass[0])
6646 case X86_64_INTEGER_CLASS:
6647 case X86_64_INTEGERSI_CLASS:
6648 return gen_rtx_REG (mode, intreg[0]);
6649 case X86_64_SSE_CLASS:
6650 case X86_64_SSESF_CLASS:
6651 case X86_64_SSEDF_CLASS:
6652 if (mode != BLKmode)
6653 return gen_reg_or_parallel (mode, orig_mode,
6654 SSE_REGNO (sse_regno));
6656 case X86_64_X87_CLASS:
6657 case X86_64_COMPLEX_X87_CLASS:
6658 return gen_rtx_REG (mode, FIRST_STACK_REG);
6659 case X86_64_NO_CLASS:
6660 /* Zero sized array, struct or class. */
6666 && regclass[0] == X86_64_SSE_CLASS
6667 && regclass[1] == X86_64_SSEUP_CLASS
6669 return gen_reg_or_parallel (mode, orig_mode,
6670 SSE_REGNO (sse_regno));
6672 && regclass[0] == X86_64_SSE_CLASS
6673 && regclass[1] == X86_64_SSEUP_CLASS
6674 && regclass[2] == X86_64_SSEUP_CLASS
6675 && regclass[3] == X86_64_SSEUP_CLASS
6677 return gen_reg_or_parallel (mode, orig_mode,
6678 SSE_REGNO (sse_regno));
6680 && regclass[0] == X86_64_X87_CLASS
6681 && regclass[1] == X86_64_X87UP_CLASS)
6682 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6685 && regclass[0] == X86_64_INTEGER_CLASS
6686 && regclass[1] == X86_64_INTEGER_CLASS
6687 && (mode == CDImode || mode == TImode || mode == TFmode)
6688 && intreg[0] + 1 == intreg[1])
6689 return gen_rtx_REG (mode, intreg[0]);
6691 /* Otherwise figure out the entries of the PARALLEL. */
6692 for (i = 0; i < n; i++)
6696 switch (regclass[i])
6698 case X86_64_NO_CLASS:
6700 case X86_64_INTEGER_CLASS:
6701 case X86_64_INTEGERSI_CLASS:
6702 /* Merge TImodes on aligned occasions here too. */
6703 if (i * 8 + 8 > bytes)
6705 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6706 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6710 /* We've requested 24 bytes we
6711 don't have mode for. Use DImode. */
6712 if (tmpmode == BLKmode)
6715 = gen_rtx_EXPR_LIST (VOIDmode,
6716 gen_rtx_REG (tmpmode, *intreg),
6720 case X86_64_SSESF_CLASS:
6722 = gen_rtx_EXPR_LIST (VOIDmode,
6723 gen_rtx_REG (SFmode,
6724 SSE_REGNO (sse_regno)),
6728 case X86_64_SSEDF_CLASS:
6730 = gen_rtx_EXPR_LIST (VOIDmode,
6731 gen_rtx_REG (DFmode,
6732 SSE_REGNO (sse_regno)),
6736 case X86_64_SSE_CLASS:
6744 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6754 && regclass[1] == X86_64_SSEUP_CLASS
6755 && regclass[2] == X86_64_SSEUP_CLASS
6756 && regclass[3] == X86_64_SSEUP_CLASS);
6764 = gen_rtx_EXPR_LIST (VOIDmode,
6765 gen_rtx_REG (tmpmode,
6766 SSE_REGNO (sse_regno)),
6775 /* Empty aligned struct, union or class. */
6779 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6780 for (i = 0; i < nexps; i++)
6781 XVECEXP (ret, 0, i) = exp [i];
6785 /* Update the data in CUM to advance over an argument of mode MODE
6786 and data type TYPE. (TYPE is null for libcalls where that information
6787 may not be available.) */
6790 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6791 const_tree type, HOST_WIDE_INT bytes,
6792 HOST_WIDE_INT words)
6808 cum->words += words;
6809 cum->nregs -= words;
6810 cum->regno += words;
6812 if (cum->nregs <= 0)
6820 /* OImode shouldn't be used directly. */
6824 if (cum->float_in_sse < 2)
6827 if (cum->float_in_sse < 1)
6844 if (!type || !AGGREGATE_TYPE_P (type))
6846 cum->sse_words += words;
6847 cum->sse_nregs -= 1;
6848 cum->sse_regno += 1;
6849 if (cum->sse_nregs <= 0)
6863 if (!type || !AGGREGATE_TYPE_P (type))
6865 cum->mmx_words += words;
6866 cum->mmx_nregs -= 1;
6867 cum->mmx_regno += 1;
6868 if (cum->mmx_nregs <= 0)
6879 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6880 const_tree type, HOST_WIDE_INT words, bool named)
6882 int int_nregs, sse_nregs;
6884 /* Unnamed 256bit vector mode parameters are passed on stack. */
6885 if (!named && VALID_AVX256_REG_MODE (mode))
6888 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6889 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6891 cum->nregs -= int_nregs;
6892 cum->sse_nregs -= sse_nregs;
6893 cum->regno += int_nregs;
6894 cum->sse_regno += sse_nregs;
6898 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6899 cum->words = (cum->words + align - 1) & ~(align - 1);
6900 cum->words += words;
6905 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6906 HOST_WIDE_INT words)
6908 /* Otherwise, this should be passed indirect. */
6909 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6911 cum->words += words;
6919 /* Update the data in CUM to advance over an argument of mode MODE and
6920 data type TYPE. (TYPE is null for libcalls where that information
6921 may not be available.) */
6924 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6925 const_tree type, bool named)
6927 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6928 HOST_WIDE_INT bytes, words;
6930 if (mode == BLKmode)
6931 bytes = int_size_in_bytes (type);
6933 bytes = GET_MODE_SIZE (mode);
6934 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6937 mode = type_natural_mode (type, NULL);
6939 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6940 function_arg_advance_ms_64 (cum, bytes, words);
6941 else if (TARGET_64BIT)
6942 function_arg_advance_64 (cum, mode, type, words, named);
6944 function_arg_advance_32 (cum, mode, type, bytes, words);
6947 /* Define where to put the arguments to a function.
6948 Value is zero to push the argument on the stack,
6949 or a hard register in which to store the argument.
6951 MODE is the argument's machine mode.
6952 TYPE is the data type of the argument (as a tree).
6953 This is null for libcalls where that information may
6955 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6956 the preceding args and about the function being called.
6957 NAMED is nonzero if this argument is a named parameter
6958 (otherwise it is an extra parameter matching an ellipsis). */
6961 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6962 enum machine_mode orig_mode, const_tree type,
6963 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6965 static bool warnedsse, warnedmmx;
6967 /* Avoid the AL settings for the Unix64 ABI. */
6968 if (mode == VOIDmode)
6984 if (words <= cum->nregs)
6986 int regno = cum->regno;
6988 /* Fastcall allocates the first two DWORD (SImode) or
6989 smaller arguments to ECX and EDX if it isn't an
6995 || (type && AGGREGATE_TYPE_P (type)))
6998 /* ECX not EAX is the first allocated register. */
6999 if (regno == AX_REG)
7002 return gen_rtx_REG (mode, regno);
7007 if (cum->float_in_sse < 2)
7010 if (cum->float_in_sse < 1)
7014 /* In 32bit, we pass TImode in xmm registers. */
7021 if (!type || !AGGREGATE_TYPE_P (type))
7023 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
7026 warning (0, "SSE vector argument without SSE enabled "
7030 return gen_reg_or_parallel (mode, orig_mode,
7031 cum->sse_regno + FIRST_SSE_REG);
7036 /* OImode shouldn't be used directly. */
7045 if (!type || !AGGREGATE_TYPE_P (type))
7048 return gen_reg_or_parallel (mode, orig_mode,
7049 cum->sse_regno + FIRST_SSE_REG);
7059 if (!type || !AGGREGATE_TYPE_P (type))
7061 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
7064 warning (0, "MMX vector argument without MMX enabled "
7068 return gen_reg_or_parallel (mode, orig_mode,
7069 cum->mmx_regno + FIRST_MMX_REG);
7078 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7079 enum machine_mode orig_mode, const_tree type, bool named)
7081 /* Handle a hidden AL argument containing number of registers
7082 for varargs x86-64 functions. */
7083 if (mode == VOIDmode)
7084 return GEN_INT (cum->maybe_vaarg
7085 ? (cum->sse_nregs < 0
7086 ? X86_64_SSE_REGPARM_MAX
7101 /* Unnamed 256bit vector mode parameters are passed on stack. */
7107 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7109 &x86_64_int_parameter_registers [cum->regno],
7114 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7115 enum machine_mode orig_mode, bool named,
7116 HOST_WIDE_INT bytes)
7120 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7121 We use value of -2 to specify that current function call is MSABI. */
7122 if (mode == VOIDmode)
7123 return GEN_INT (-2);
7125 /* If we've run out of registers, it goes on the stack. */
7126 if (cum->nregs == 0)
7129 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7131 /* Only floating point modes are passed in anything but integer regs. */
7132 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7135 regno = cum->regno + FIRST_SSE_REG;
7140 /* Unnamed floating parameters are passed in both the
7141 SSE and integer registers. */
7142 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7143 t2 = gen_rtx_REG (mode, regno);
7144 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7145 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7146 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7149 /* Handle aggregated types passed in register. */
7150 if (orig_mode == BLKmode)
7152 if (bytes > 0 && bytes <= 8)
7153 mode = (bytes > 4 ? DImode : SImode);
7154 if (mode == BLKmode)
7158 return gen_reg_or_parallel (mode, orig_mode, regno);
7161 /* Return where to put the arguments to a function.
7162 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7164 MODE is the argument's machine mode. TYPE is the data type of the
7165 argument. It is null for libcalls where that information may not be
7166 available. CUM gives information about the preceding args and about
7167 the function being called. NAMED is nonzero if this argument is a
7168 named parameter (otherwise it is an extra parameter matching an
7172 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7173 const_tree type, bool named)
7175 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7176 enum machine_mode mode = omode;
7177 HOST_WIDE_INT bytes, words;
7180 if (mode == BLKmode)
7181 bytes = int_size_in_bytes (type);
7183 bytes = GET_MODE_SIZE (mode);
7184 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7186 /* To simplify the code below, represent vector types with a vector mode
7187 even if MMX/SSE are not active. */
7188 if (type && TREE_CODE (type) == VECTOR_TYPE)
7189 mode = type_natural_mode (type, cum);
7191 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7192 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7193 else if (TARGET_64BIT)
7194 arg = function_arg_64 (cum, mode, omode, type, named);
7196 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7198 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
7200 /* This argument uses 256bit AVX modes. */
7202 cfun->machine->callee_pass_avx256_p = true;
7204 cfun->machine->caller_pass_avx256_p = true;
7210 /* A C expression that indicates when an argument must be passed by
7211 reference. If nonzero for an argument, a copy of that argument is
7212 made in memory and a pointer to the argument is passed instead of
7213 the argument itself. The pointer is passed in whatever way is
7214 appropriate for passing a pointer to that type. */
7217 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
7218 enum machine_mode mode ATTRIBUTE_UNUSED,
7219 const_tree type, bool named ATTRIBUTE_UNUSED)
7221 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7223 /* See Windows x64 Software Convention. */
7224 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7226 int msize = (int) GET_MODE_SIZE (mode);
7229 /* Arrays are passed by reference. */
7230 if (TREE_CODE (type) == ARRAY_TYPE)
7233 if (AGGREGATE_TYPE_P (type))
7235 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7236 are passed by reference. */
7237 msize = int_size_in_bytes (type);
7241 /* __m128 is passed by reference. */
7243 case 1: case 2: case 4: case 8:
7249 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7255 /* Return true when TYPE should be 128bit aligned for 32bit argument
7256 passing ABI. XXX: This function is obsolete and is only used for
7257 checking psABI compatibility with previous versions of GCC. */
7260 ix86_compat_aligned_value_p (const_tree type)
7262 enum machine_mode mode = TYPE_MODE (type);
7263 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7267 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7269 if (TYPE_ALIGN (type) < 128)
7272 if (AGGREGATE_TYPE_P (type))
7274 /* Walk the aggregates recursively. */
7275 switch (TREE_CODE (type))
7279 case QUAL_UNION_TYPE:
7283 /* Walk all the structure fields. */
7284 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7286 if (TREE_CODE (field) == FIELD_DECL
7287 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7294 /* Just for use if some languages passes arrays by value. */
7295 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7306 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7307 XXX: This function is obsolete and is only used for checking psABI
7308 compatibility with previous versions of GCC. */
7311 ix86_compat_function_arg_boundary (enum machine_mode mode,
7312 const_tree type, unsigned int align)
7314 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7315 natural boundaries. */
7316 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7318 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7319 make an exception for SSE modes since these require 128bit
7322 The handling here differs from field_alignment. ICC aligns MMX
7323 arguments to 4 byte boundaries, while structure fields are aligned
7324 to 8 byte boundaries. */
7327 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7328 align = PARM_BOUNDARY;
7332 if (!ix86_compat_aligned_value_p (type))
7333 align = PARM_BOUNDARY;
7336 if (align > BIGGEST_ALIGNMENT)
7337 align = BIGGEST_ALIGNMENT;
7341 /* Return true when TYPE should be 128bit aligned for 32bit argument
7345 ix86_contains_aligned_value_p (const_tree type)
7347 enum machine_mode mode = TYPE_MODE (type);
7349 if (mode == XFmode || mode == XCmode)
7352 if (TYPE_ALIGN (type) < 128)
7355 if (AGGREGATE_TYPE_P (type))
7357 /* Walk the aggregates recursively. */
7358 switch (TREE_CODE (type))
7362 case QUAL_UNION_TYPE:
7366 /* Walk all the structure fields. */
7367 for (field = TYPE_FIELDS (type);
7369 field = DECL_CHAIN (field))
7371 if (TREE_CODE (field) == FIELD_DECL
7372 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7379 /* Just for use if some languages passes arrays by value. */
7380 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7389 return TYPE_ALIGN (type) >= 128;
7394 /* Gives the alignment boundary, in bits, of an argument with the
7395 specified mode and type. */
7398 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7403 /* Since the main variant type is used for call, we convert it to
7404 the main variant type. */
7405 type = TYPE_MAIN_VARIANT (type);
7406 align = TYPE_ALIGN (type);
7409 align = GET_MODE_ALIGNMENT (mode);
7410 if (align < PARM_BOUNDARY)
7411 align = PARM_BOUNDARY;
7415 unsigned int saved_align = align;
7419 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7422 if (mode == XFmode || mode == XCmode)
7423 align = PARM_BOUNDARY;
7425 else if (!ix86_contains_aligned_value_p (type))
7426 align = PARM_BOUNDARY;
7429 align = PARM_BOUNDARY;
7434 && align != ix86_compat_function_arg_boundary (mode, type,
7438 inform (input_location,
7439 "The ABI for passing parameters with %d-byte"
7440 " alignment has changed in GCC 4.6",
7441 align / BITS_PER_UNIT);
7448 /* Return true if N is a possible register number of function value. */
7451 ix86_function_value_regno_p (const unsigned int regno)
7458 case FIRST_FLOAT_REG:
7459 /* TODO: The function should depend on current function ABI but
7460 builtins.c would need updating then. Therefore we use the
7462 if (TARGET_64BIT && ix86_abi == MS_ABI)
7464 return TARGET_FLOAT_RETURNS_IN_80387;
7470 if (TARGET_MACHO || TARGET_64BIT)
7478 /* Define how to find the value returned by a function.
7479 VALTYPE is the data type of the value (as a tree).
7480 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7481 otherwise, FUNC is 0. */
7484 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7485 const_tree fntype, const_tree fn)
7489 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7490 we normally prevent this case when mmx is not available. However
7491 some ABIs may require the result to be returned like DImode. */
7492 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7493 regno = FIRST_MMX_REG;
7495 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7496 we prevent this case when sse is not available. However some ABIs
7497 may require the result to be returned like integer TImode. */
7498 else if (mode == TImode
7499 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7500 regno = FIRST_SSE_REG;
7502 /* 32-byte vector modes in %ymm0. */
7503 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7504 regno = FIRST_SSE_REG;
7506 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7507 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7508 regno = FIRST_FLOAT_REG;
7510 /* Most things go in %eax. */
7513 /* Override FP return register with %xmm0 for local functions when
7514 SSE math is enabled or for functions with sseregparm attribute. */
7515 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7517 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7518 if ((sse_level >= 1 && mode == SFmode)
7519 || (sse_level == 2 && mode == DFmode))
7520 regno = FIRST_SSE_REG;
7523 /* OImode shouldn't be used directly. */
7524 gcc_assert (mode != OImode);
7526 return gen_rtx_REG (orig_mode, regno);
7530 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7535 /* Handle libcalls, which don't provide a type node. */
7536 if (valtype == NULL)
7550 regno = FIRST_SSE_REG;
7554 regno = FIRST_FLOAT_REG;
7562 return gen_rtx_REG (mode, regno);
7564 else if (POINTER_TYPE_P (valtype))
7566 /* Pointers are always returned in word_mode. */
7570 ret = construct_container (mode, orig_mode, valtype, 1,
7571 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7572 x86_64_int_return_registers, 0);
7574 /* For zero sized structures, construct_container returns NULL, but we
7575 need to keep rest of compiler happy by returning meaningful value. */
7577 ret = gen_rtx_REG (orig_mode, AX_REG);
7583 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7585 unsigned int regno = AX_REG;
7589 switch (GET_MODE_SIZE (mode))
7592 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7593 && !COMPLEX_MODE_P (mode))
7594 regno = FIRST_SSE_REG;
7598 if (mode == SFmode || mode == DFmode)
7599 regno = FIRST_SSE_REG;
7605 return gen_rtx_REG (orig_mode, regno);
7609 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7610 enum machine_mode orig_mode, enum machine_mode mode)
7612 const_tree fn, fntype;
7615 if (fntype_or_decl && DECL_P (fntype_or_decl))
7616 fn = fntype_or_decl;
7617 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7619 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7620 return function_value_ms_64 (orig_mode, mode);
7621 else if (TARGET_64BIT)
7622 return function_value_64 (orig_mode, mode, valtype);
7624 return function_value_32 (orig_mode, mode, fntype, fn);
7628 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7629 bool outgoing ATTRIBUTE_UNUSED)
7631 enum machine_mode mode, orig_mode;
7633 orig_mode = TYPE_MODE (valtype);
7634 mode = type_natural_mode (valtype, NULL);
7635 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7638 /* Pointer function arguments and return values are promoted to
7641 static enum machine_mode
7642 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7643 int *punsignedp, const_tree fntype,
7646 if (type != NULL_TREE && POINTER_TYPE_P (type))
7648 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7651 return default_promote_function_mode (type, mode, punsignedp, fntype,
7655 /* Return true if a structure, union or array with MODE containing FIELD
7656 should be accessed using BLKmode. */
7659 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
7661 /* Union with XFmode must be in BLKmode. */
7662 return (mode == XFmode
7663 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
7664 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
7668 ix86_libcall_value (enum machine_mode mode)
7670 return ix86_function_value_1 (NULL, NULL, mode, mode);
7673 /* Return true iff type is returned in memory. */
7675 static bool ATTRIBUTE_UNUSED
7676 return_in_memory_32 (const_tree type, enum machine_mode mode)
7680 if (mode == BLKmode)
7683 size = int_size_in_bytes (type);
7685 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7688 if (VECTOR_MODE_P (mode) || mode == TImode)
7690 /* User-created vectors small enough to fit in EAX. */
7694 /* MMX/3dNow values are returned in MM0,
7695 except when it doesn't exits or the ABI prescribes otherwise. */
7697 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7699 /* SSE values are returned in XMM0, except when it doesn't exist. */
7703 /* AVX values are returned in YMM0, except when it doesn't exist. */
7714 /* OImode shouldn't be used directly. */
7715 gcc_assert (mode != OImode);
7720 static bool ATTRIBUTE_UNUSED
7721 return_in_memory_64 (const_tree type, enum machine_mode mode)
7723 int needed_intregs, needed_sseregs;
7724 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7727 static bool ATTRIBUTE_UNUSED
7728 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7730 HOST_WIDE_INT size = int_size_in_bytes (type);
7732 /* __m128 is returned in xmm0. */
7733 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7734 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7737 /* Otherwise, the size must be exactly in [1248]. */
7738 return size != 1 && size != 2 && size != 4 && size != 8;
7742 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7744 #ifdef SUBTARGET_RETURN_IN_MEMORY
7745 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7747 const enum machine_mode mode = type_natural_mode (type, NULL);
7751 if (ix86_function_type_abi (fntype) == MS_ABI)
7752 return return_in_memory_ms_64 (type, mode);
7754 return return_in_memory_64 (type, mode);
7757 return return_in_memory_32 (type, mode);
7761 /* When returning SSE vector types, we have a choice of either
7762 (1) being abi incompatible with a -march switch, or
7763 (2) generating an error.
7764 Given no good solution, I think the safest thing is one warning.
7765 The user won't be able to use -Werror, but....
7767 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7768 called in response to actually generating a caller or callee that
7769 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7770 via aggregate_value_p for general type probing from tree-ssa. */
7773 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7775 static bool warnedsse, warnedmmx;
7777 if (!TARGET_64BIT && type)
7779 /* Look at the return type of the function, not the function type. */
7780 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7782 if (!TARGET_SSE && !warnedsse)
7785 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7788 warning (0, "SSE vector return without SSE enabled "
7793 if (!TARGET_MMX && !warnedmmx)
7795 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7798 warning (0, "MMX vector return without MMX enabled "
7808 /* Create the va_list data type. */
7810 /* Returns the calling convention specific va_list date type.
7811 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7814 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7816 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7818 /* For i386 we use plain pointer to argument area. */
7819 if (!TARGET_64BIT || abi == MS_ABI)
7820 return build_pointer_type (char_type_node);
7822 record = lang_hooks.types.make_type (RECORD_TYPE);
7823 type_decl = build_decl (BUILTINS_LOCATION,
7824 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7826 f_gpr = build_decl (BUILTINS_LOCATION,
7827 FIELD_DECL, get_identifier ("gp_offset"),
7828 unsigned_type_node);
7829 f_fpr = build_decl (BUILTINS_LOCATION,
7830 FIELD_DECL, get_identifier ("fp_offset"),
7831 unsigned_type_node);
7832 f_ovf = build_decl (BUILTINS_LOCATION,
7833 FIELD_DECL, get_identifier ("overflow_arg_area"),
7835 f_sav = build_decl (BUILTINS_LOCATION,
7836 FIELD_DECL, get_identifier ("reg_save_area"),
7839 va_list_gpr_counter_field = f_gpr;
7840 va_list_fpr_counter_field = f_fpr;
7842 DECL_FIELD_CONTEXT (f_gpr) = record;
7843 DECL_FIELD_CONTEXT (f_fpr) = record;
7844 DECL_FIELD_CONTEXT (f_ovf) = record;
7845 DECL_FIELD_CONTEXT (f_sav) = record;
7847 TYPE_STUB_DECL (record) = type_decl;
7848 TYPE_NAME (record) = type_decl;
7849 TYPE_FIELDS (record) = f_gpr;
7850 DECL_CHAIN (f_gpr) = f_fpr;
7851 DECL_CHAIN (f_fpr) = f_ovf;
7852 DECL_CHAIN (f_ovf) = f_sav;
7854 layout_type (record);
7856 /* The correct type is an array type of one element. */
7857 return build_array_type (record, build_index_type (size_zero_node));
7860 /* Setup the builtin va_list data type and for 64-bit the additional
7861 calling convention specific va_list data types. */
7864 ix86_build_builtin_va_list (void)
7866 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7868 /* Initialize abi specific va_list builtin types. */
7872 if (ix86_abi == MS_ABI)
7874 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7875 if (TREE_CODE (t) != RECORD_TYPE)
7876 t = build_variant_type_copy (t);
7877 sysv_va_list_type_node = t;
7882 if (TREE_CODE (t) != RECORD_TYPE)
7883 t = build_variant_type_copy (t);
7884 sysv_va_list_type_node = t;
7886 if (ix86_abi != MS_ABI)
7888 t = ix86_build_builtin_va_list_abi (MS_ABI);
7889 if (TREE_CODE (t) != RECORD_TYPE)
7890 t = build_variant_type_copy (t);
7891 ms_va_list_type_node = t;
7896 if (TREE_CODE (t) != RECORD_TYPE)
7897 t = build_variant_type_copy (t);
7898 ms_va_list_type_node = t;
7905 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7908 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7914 /* GPR size of varargs save area. */
7915 if (cfun->va_list_gpr_size)
7916 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7918 ix86_varargs_gpr_size = 0;
7920 /* FPR size of varargs save area. We don't need it if we don't pass
7921 anything in SSE registers. */
7922 if (TARGET_SSE && cfun->va_list_fpr_size)
7923 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7925 ix86_varargs_fpr_size = 0;
7927 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7930 save_area = frame_pointer_rtx;
7931 set = get_varargs_alias_set ();
7933 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7934 if (max > X86_64_REGPARM_MAX)
7935 max = X86_64_REGPARM_MAX;
7937 for (i = cum->regno; i < max; i++)
7939 mem = gen_rtx_MEM (word_mode,
7940 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
7941 MEM_NOTRAP_P (mem) = 1;
7942 set_mem_alias_set (mem, set);
7943 emit_move_insn (mem,
7944 gen_rtx_REG (word_mode,
7945 x86_64_int_parameter_registers[i]));
7948 if (ix86_varargs_fpr_size)
7950 enum machine_mode smode;
7953 /* Now emit code to save SSE registers. The AX parameter contains number
7954 of SSE parameter registers used to call this function, though all we
7955 actually check here is the zero/non-zero status. */
7957 label = gen_label_rtx ();
7958 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7959 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7962 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7963 we used movdqa (i.e. TImode) instead? Perhaps even better would
7964 be if we could determine the real mode of the data, via a hook
7965 into pass_stdarg. Ignore all that for now. */
7967 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7968 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7970 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7971 if (max > X86_64_SSE_REGPARM_MAX)
7972 max = X86_64_SSE_REGPARM_MAX;
7974 for (i = cum->sse_regno; i < max; ++i)
7976 mem = plus_constant (Pmode, save_area,
7977 i * 16 + ix86_varargs_gpr_size);
7978 mem = gen_rtx_MEM (smode, mem);
7979 MEM_NOTRAP_P (mem) = 1;
7980 set_mem_alias_set (mem, set);
7981 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7983 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7991 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7993 alias_set_type set = get_varargs_alias_set ();
7996 /* Reset to zero, as there might be a sysv vaarg used
7998 ix86_varargs_gpr_size = 0;
7999 ix86_varargs_fpr_size = 0;
8001 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8005 mem = gen_rtx_MEM (Pmode,
8006 plus_constant (Pmode, virtual_incoming_args_rtx,
8007 i * UNITS_PER_WORD));
8008 MEM_NOTRAP_P (mem) = 1;
8009 set_mem_alias_set (mem, set);
8011 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8012 emit_move_insn (mem, reg);
8017 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8018 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8021 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8022 CUMULATIVE_ARGS next_cum;
8025 /* This argument doesn't appear to be used anymore. Which is good,
8026 because the old code here didn't suppress rtl generation. */
8027 gcc_assert (!no_rtl);
8032 fntype = TREE_TYPE (current_function_decl);
8034 /* For varargs, we do not want to skip the dummy va_dcl argument.
8035 For stdargs, we do want to skip the last named argument. */
8037 if (stdarg_p (fntype))
8038 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8041 if (cum->call_abi == MS_ABI)
8042 setup_incoming_varargs_ms_64 (&next_cum);
8044 setup_incoming_varargs_64 (&next_cum);
8047 /* Checks if TYPE is of kind va_list char *. */
8050 is_va_list_char_pointer (tree type)
8054 /* For 32-bit it is always true. */
8057 canonic = ix86_canonical_va_list_type (type);
8058 return (canonic == ms_va_list_type_node
8059 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8062 /* Implement va_start. */
8065 ix86_va_start (tree valist, rtx nextarg)
8067 HOST_WIDE_INT words, n_gpr, n_fpr;
8068 tree f_gpr, f_fpr, f_ovf, f_sav;
8069 tree gpr, fpr, ovf, sav, t;
8073 if (flag_split_stack
8074 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8076 unsigned int scratch_regno;
8078 /* When we are splitting the stack, we can't refer to the stack
8079 arguments using internal_arg_pointer, because they may be on
8080 the old stack. The split stack prologue will arrange to
8081 leave a pointer to the old stack arguments in a scratch
8082 register, which we here copy to a pseudo-register. The split
8083 stack prologue can't set the pseudo-register directly because
8084 it (the prologue) runs before any registers have been saved. */
8086 scratch_regno = split_stack_prologue_scratch_regno ();
8087 if (scratch_regno != INVALID_REGNUM)
8091 reg = gen_reg_rtx (Pmode);
8092 cfun->machine->split_stack_varargs_pointer = reg;
8095 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8099 push_topmost_sequence ();
8100 emit_insn_after (seq, entry_of_function ());
8101 pop_topmost_sequence ();
8105 /* Only 64bit target needs something special. */
8106 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8108 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8109 std_expand_builtin_va_start (valist, nextarg);
8114 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8115 next = expand_binop (ptr_mode, add_optab,
8116 cfun->machine->split_stack_varargs_pointer,
8117 crtl->args.arg_offset_rtx,
8118 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8119 convert_move (va_r, next, 0);
8124 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8125 f_fpr = DECL_CHAIN (f_gpr);
8126 f_ovf = DECL_CHAIN (f_fpr);
8127 f_sav = DECL_CHAIN (f_ovf);
8129 valist = build_simple_mem_ref (valist);
8130 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8131 /* The following should be folded into the MEM_REF offset. */
8132 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8134 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8136 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8138 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8141 /* Count number of gp and fp argument registers used. */
8142 words = crtl->args.info.words;
8143 n_gpr = crtl->args.info.regno;
8144 n_fpr = crtl->args.info.sse_regno;
8146 if (cfun->va_list_gpr_size)
8148 type = TREE_TYPE (gpr);
8149 t = build2 (MODIFY_EXPR, type,
8150 gpr, build_int_cst (type, n_gpr * 8));
8151 TREE_SIDE_EFFECTS (t) = 1;
8152 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8155 if (TARGET_SSE && cfun->va_list_fpr_size)
8157 type = TREE_TYPE (fpr);
8158 t = build2 (MODIFY_EXPR, type, fpr,
8159 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8160 TREE_SIDE_EFFECTS (t) = 1;
8161 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8164 /* Find the overflow area. */
8165 type = TREE_TYPE (ovf);
8166 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8167 ovf_rtx = crtl->args.internal_arg_pointer;
8169 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8170 t = make_tree (type, ovf_rtx);
8172 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8173 t = build2 (MODIFY_EXPR, type, ovf, t);
8174 TREE_SIDE_EFFECTS (t) = 1;
8175 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8177 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8179 /* Find the register save area.
8180 Prologue of the function save it right above stack frame. */
8181 type = TREE_TYPE (sav);
8182 t = make_tree (type, frame_pointer_rtx);
8183 if (!ix86_varargs_gpr_size)
8184 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8185 t = build2 (MODIFY_EXPR, type, sav, t);
8186 TREE_SIDE_EFFECTS (t) = 1;
8187 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8191 /* Implement va_arg. */
8194 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8197 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8198 tree f_gpr, f_fpr, f_ovf, f_sav;
8199 tree gpr, fpr, ovf, sav, t;
8201 tree lab_false, lab_over = NULL_TREE;
8206 enum machine_mode nat_mode;
8207 unsigned int arg_boundary;
8209 /* Only 64bit target needs something special. */
8210 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8211 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8213 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8214 f_fpr = DECL_CHAIN (f_gpr);
8215 f_ovf = DECL_CHAIN (f_fpr);
8216 f_sav = DECL_CHAIN (f_ovf);
8218 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8219 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8220 valist = build_va_arg_indirect_ref (valist);
8221 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8222 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8223 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8225 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8227 type = build_pointer_type (type);
8228 size = int_size_in_bytes (type);
8229 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8231 nat_mode = type_natural_mode (type, NULL);
8240 /* Unnamed 256bit vector mode parameters are passed on stack. */
8241 if (!TARGET_64BIT_MS_ABI)
8248 container = construct_container (nat_mode, TYPE_MODE (type),
8249 type, 0, X86_64_REGPARM_MAX,
8250 X86_64_SSE_REGPARM_MAX, intreg,
8255 /* Pull the value out of the saved registers. */
8257 addr = create_tmp_var (ptr_type_node, "addr");
8261 int needed_intregs, needed_sseregs;
8263 tree int_addr, sse_addr;
8265 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8266 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8268 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8270 need_temp = (!REG_P (container)
8271 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8272 || TYPE_ALIGN (type) > 128));
8274 /* In case we are passing structure, verify that it is consecutive block
8275 on the register save area. If not we need to do moves. */
8276 if (!need_temp && !REG_P (container))
8278 /* Verify that all registers are strictly consecutive */
8279 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8283 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8285 rtx slot = XVECEXP (container, 0, i);
8286 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8287 || INTVAL (XEXP (slot, 1)) != i * 16)
8295 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8297 rtx slot = XVECEXP (container, 0, i);
8298 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8299 || INTVAL (XEXP (slot, 1)) != i * 8)
8311 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8312 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8315 /* First ensure that we fit completely in registers. */
8318 t = build_int_cst (TREE_TYPE (gpr),
8319 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8320 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8321 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8322 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8323 gimplify_and_add (t, pre_p);
8327 t = build_int_cst (TREE_TYPE (fpr),
8328 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8329 + X86_64_REGPARM_MAX * 8);
8330 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8331 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8332 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8333 gimplify_and_add (t, pre_p);
8336 /* Compute index to start of area used for integer regs. */
8339 /* int_addr = gpr + sav; */
8340 t = fold_build_pointer_plus (sav, gpr);
8341 gimplify_assign (int_addr, t, pre_p);
8345 /* sse_addr = fpr + sav; */
8346 t = fold_build_pointer_plus (sav, fpr);
8347 gimplify_assign (sse_addr, t, pre_p);
8351 int i, prev_size = 0;
8352 tree temp = create_tmp_var (type, "va_arg_tmp");
8355 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8356 gimplify_assign (addr, t, pre_p);
8358 for (i = 0; i < XVECLEN (container, 0); i++)
8360 rtx slot = XVECEXP (container, 0, i);
8361 rtx reg = XEXP (slot, 0);
8362 enum machine_mode mode = GET_MODE (reg);
8368 tree dest_addr, dest;
8369 int cur_size = GET_MODE_SIZE (mode);
8371 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8372 prev_size = INTVAL (XEXP (slot, 1));
8373 if (prev_size + cur_size > size)
8375 cur_size = size - prev_size;
8376 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8377 if (mode == BLKmode)
8380 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8381 if (mode == GET_MODE (reg))
8382 addr_type = build_pointer_type (piece_type);
8384 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8386 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8389 if (SSE_REGNO_P (REGNO (reg)))
8391 src_addr = sse_addr;
8392 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8396 src_addr = int_addr;
8397 src_offset = REGNO (reg) * 8;
8399 src_addr = fold_convert (addr_type, src_addr);
8400 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8402 dest_addr = fold_convert (daddr_type, addr);
8403 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8404 if (cur_size == GET_MODE_SIZE (mode))
8406 src = build_va_arg_indirect_ref (src_addr);
8407 dest = build_va_arg_indirect_ref (dest_addr);
8409 gimplify_assign (dest, src, pre_p);
8414 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8415 3, dest_addr, src_addr,
8416 size_int (cur_size));
8417 gimplify_and_add (copy, pre_p);
8419 prev_size += cur_size;
8425 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8426 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8427 gimplify_assign (gpr, t, pre_p);
8432 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8433 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8434 gimplify_assign (fpr, t, pre_p);
8437 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8439 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8442 /* ... otherwise out of the overflow area. */
8444 /* When we align parameter on stack for caller, if the parameter
8445 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8446 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8447 here with caller. */
8448 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8449 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8450 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8452 /* Care for on-stack alignment if needed. */
8453 if (arg_boundary <= 64 || size == 0)
8457 HOST_WIDE_INT align = arg_boundary / 8;
8458 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8459 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8460 build_int_cst (TREE_TYPE (t), -align));
8463 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8464 gimplify_assign (addr, t, pre_p);
8466 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8467 gimplify_assign (unshare_expr (ovf), t, pre_p);
8470 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8472 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8473 addr = fold_convert (ptrtype, addr);
8476 addr = build_va_arg_indirect_ref (addr);
8477 return build_va_arg_indirect_ref (addr);
8480 /* Return true if OPNUM's MEM should be matched
8481 in movabs* patterns. */
8484 ix86_check_movabs (rtx insn, int opnum)
8488 set = PATTERN (insn);
8489 if (GET_CODE (set) == PARALLEL)
8490 set = XVECEXP (set, 0, 0);
8491 gcc_assert (GET_CODE (set) == SET);
8492 mem = XEXP (set, opnum);
8493 while (GET_CODE (mem) == SUBREG)
8494 mem = SUBREG_REG (mem);
8495 gcc_assert (MEM_P (mem));
8496 return volatile_ok || !MEM_VOLATILE_P (mem);
8499 /* Initialize the table of extra 80387 mathematical constants. */
8502 init_ext_80387_constants (void)
8504 static const char * cst[5] =
8506 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8507 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8508 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8509 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8510 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8514 for (i = 0; i < 5; i++)
8516 real_from_string (&ext_80387_constants_table[i], cst[i]);
8517 /* Ensure each constant is rounded to XFmode precision. */
8518 real_convert (&ext_80387_constants_table[i],
8519 XFmode, &ext_80387_constants_table[i]);
8522 ext_80387_constants_init = 1;
8525 /* Return non-zero if the constant is something that
8526 can be loaded with a special instruction. */
8529 standard_80387_constant_p (rtx x)
8531 enum machine_mode mode = GET_MODE (x);
8535 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8538 if (x == CONST0_RTX (mode))
8540 if (x == CONST1_RTX (mode))
8543 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8545 /* For XFmode constants, try to find a special 80387 instruction when
8546 optimizing for size or on those CPUs that benefit from them. */
8548 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8552 if (! ext_80387_constants_init)
8553 init_ext_80387_constants ();
8555 for (i = 0; i < 5; i++)
8556 if (real_identical (&r, &ext_80387_constants_table[i]))
8560 /* Load of the constant -0.0 or -1.0 will be split as
8561 fldz;fchs or fld1;fchs sequence. */
8562 if (real_isnegzero (&r))
8564 if (real_identical (&r, &dconstm1))
8570 /* Return the opcode of the special instruction to be used to load
8574 standard_80387_constant_opcode (rtx x)
8576 switch (standard_80387_constant_p (x))
8600 /* Return the CONST_DOUBLE representing the 80387 constant that is
8601 loaded by the specified special instruction. The argument IDX
8602 matches the return value from standard_80387_constant_p. */
8605 standard_80387_constant_rtx (int idx)
8609 if (! ext_80387_constants_init)
8610 init_ext_80387_constants ();
8626 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8630 /* Return 1 if X is all 0s and 2 if x is all 1s
8631 in supported SSE/AVX vector mode. */
8634 standard_sse_constant_p (rtx x)
8636 enum machine_mode mode = GET_MODE (x);
8638 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8640 if (vector_all_ones_operand (x, mode))
8662 /* Return the opcode of the special instruction to be used to load
8666 standard_sse_constant_opcode (rtx insn, rtx x)
8668 switch (standard_sse_constant_p (x))
8671 switch (get_attr_mode (insn))
8674 return "%vpxor\t%0, %d0";
8676 return "%vxorpd\t%0, %d0";
8678 return "%vxorps\t%0, %d0";
8681 return "vpxor\t%x0, %x0, %x0";
8683 return "vxorpd\t%x0, %x0, %x0";
8685 return "vxorps\t%x0, %x0, %x0";
8693 return "vpcmpeqd\t%0, %0, %0";
8695 return "pcmpeqd\t%0, %0";
8703 /* Returns true if OP contains a symbol reference */
8706 symbolic_reference_mentioned_p (rtx op)
8711 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8714 fmt = GET_RTX_FORMAT (GET_CODE (op));
8715 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8721 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8722 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8726 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8733 /* Return true if it is appropriate to emit `ret' instructions in the
8734 body of a function. Do this only if the epilogue is simple, needing a
8735 couple of insns. Prior to reloading, we can't tell how many registers
8736 must be saved, so return false then. Return false if there is no frame
8737 marker to de-allocate. */
8740 ix86_can_use_return_insn_p (void)
8742 struct ix86_frame frame;
8744 if (! reload_completed || frame_pointer_needed)
8747 /* Don't allow more than 32k pop, since that's all we can do
8748 with one instruction. */
8749 if (crtl->args.pops_args && crtl->args.size >= 32768)
8752 ix86_compute_frame_layout (&frame);
8753 return (frame.stack_pointer_offset == UNITS_PER_WORD
8754 && (frame.nregs + frame.nsseregs) == 0);
8757 /* Value should be nonzero if functions must have frame pointers.
8758 Zero means the frame pointer need not be set up (and parms may
8759 be accessed via the stack pointer) in functions that seem suitable. */
8762 ix86_frame_pointer_required (void)
8764 /* If we accessed previous frames, then the generated code expects
8765 to be able to access the saved ebp value in our frame. */
8766 if (cfun->machine->accesses_prev_frame)
8769 /* Several x86 os'es need a frame pointer for other reasons,
8770 usually pertaining to setjmp. */
8771 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8774 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8775 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
8778 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8779 allocation is 4GB. */
8780 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
8783 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8784 turns off the frame pointer by default. Turn it back on now if
8785 we've not got a leaf function. */
8786 if (TARGET_OMIT_LEAF_FRAME_POINTER
8788 || ix86_current_function_calls_tls_descriptor))
8791 if (crtl->profile && !flag_fentry)
8797 /* Record that the current function accesses previous call frames. */
8800 ix86_setup_frame_addresses (void)
8802 cfun->machine->accesses_prev_frame = 1;
8805 #ifndef USE_HIDDEN_LINKONCE
8806 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8807 # define USE_HIDDEN_LINKONCE 1
8809 # define USE_HIDDEN_LINKONCE 0
8813 static int pic_labels_used;
8815 /* Fills in the label name that should be used for a pc thunk for
8816 the given register. */
8819 get_pc_thunk_name (char name[32], unsigned int regno)
8821 gcc_assert (!TARGET_64BIT);
8823 if (USE_HIDDEN_LINKONCE)
8824 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
8826 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8830 /* This function generates code for -fpic that loads %ebx with
8831 the return address of the caller and then returns. */
8834 ix86_code_end (void)
8839 for (regno = AX_REG; regno <= SP_REG; regno++)
8844 if (!(pic_labels_used & (1 << regno)))
8847 get_pc_thunk_name (name, regno);
8849 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8850 get_identifier (name),
8851 build_function_type_list (void_type_node, NULL_TREE));
8852 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8853 NULL_TREE, void_type_node);
8854 TREE_PUBLIC (decl) = 1;
8855 TREE_STATIC (decl) = 1;
8856 DECL_IGNORED_P (decl) = 1;
8861 switch_to_section (darwin_sections[text_coal_section]);
8862 fputs ("\t.weak_definition\t", asm_out_file);
8863 assemble_name (asm_out_file, name);
8864 fputs ("\n\t.private_extern\t", asm_out_file);
8865 assemble_name (asm_out_file, name);
8866 putc ('\n', asm_out_file);
8867 ASM_OUTPUT_LABEL (asm_out_file, name);
8868 DECL_WEAK (decl) = 1;
8872 if (USE_HIDDEN_LINKONCE)
8874 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8876 targetm.asm_out.unique_section (decl, 0);
8877 switch_to_section (get_named_section (decl, NULL, 0));
8879 targetm.asm_out.globalize_label (asm_out_file, name);
8880 fputs ("\t.hidden\t", asm_out_file);
8881 assemble_name (asm_out_file, name);
8882 putc ('\n', asm_out_file);
8883 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8887 switch_to_section (text_section);
8888 ASM_OUTPUT_LABEL (asm_out_file, name);
8891 DECL_INITIAL (decl) = make_node (BLOCK);
8892 current_function_decl = decl;
8893 init_function_start (decl);
8894 first_function_block_is_cold = false;
8895 /* Make sure unwind info is emitted for the thunk if needed. */
8896 final_start_function (emit_barrier (), asm_out_file, 1);
8898 /* Pad stack IP move with 4 instructions (two NOPs count
8899 as one instruction). */
8900 if (TARGET_PAD_SHORT_FUNCTION)
8905 fputs ("\tnop\n", asm_out_file);
8908 xops[0] = gen_rtx_REG (Pmode, regno);
8909 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8910 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8911 fputs ("\tret\n", asm_out_file);
8912 final_end_function ();
8913 init_insn_lengths ();
8914 free_after_compilation (cfun);
8916 current_function_decl = NULL;
8919 if (flag_split_stack)
8920 file_end_indicate_split_stack ();
8923 /* Emit code for the SET_GOT patterns. */
8926 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8932 if (TARGET_VXWORKS_RTP && flag_pic)
8934 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8935 xops[2] = gen_rtx_MEM (Pmode,
8936 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8937 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8939 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8940 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8941 an unadorned address. */
8942 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8943 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8944 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8948 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8952 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8954 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8957 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8958 is what will be referenced by the Mach-O PIC subsystem. */
8960 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8963 targetm.asm_out.internal_label (asm_out_file, "L",
8964 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8969 get_pc_thunk_name (name, REGNO (dest));
8970 pic_labels_used |= 1 << REGNO (dest);
8972 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8973 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8974 output_asm_insn ("call\t%X2", xops);
8975 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8976 is what will be referenced by the Mach-O PIC subsystem. */
8979 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8981 targetm.asm_out.internal_label (asm_out_file, "L",
8982 CODE_LABEL_NUMBER (label));
8987 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8992 /* Generate an "push" pattern for input ARG. */
8997 struct machine_function *m = cfun->machine;
8999 if (m->fs.cfa_reg == stack_pointer_rtx)
9000 m->fs.cfa_offset += UNITS_PER_WORD;
9001 m->fs.sp_offset += UNITS_PER_WORD;
9003 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9004 arg = gen_rtx_REG (word_mode, REGNO (arg));
9006 return gen_rtx_SET (VOIDmode,
9007 gen_rtx_MEM (word_mode,
9008 gen_rtx_PRE_DEC (Pmode,
9009 stack_pointer_rtx)),
9013 /* Generate an "pop" pattern for input ARG. */
9018 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9019 arg = gen_rtx_REG (word_mode, REGNO (arg));
9021 return gen_rtx_SET (VOIDmode,
9023 gen_rtx_MEM (word_mode,
9024 gen_rtx_POST_INC (Pmode,
9025 stack_pointer_rtx)));
9028 /* Return >= 0 if there is an unused call-clobbered register available
9029 for the entire function. */
9032 ix86_select_alt_pic_regnum (void)
9036 && !ix86_current_function_calls_tls_descriptor)
9039 /* Can't use the same register for both PIC and DRAP. */
9041 drap = REGNO (crtl->drap_reg);
9044 for (i = 2; i >= 0; --i)
9045 if (i != drap && !df_regs_ever_live_p (i))
9049 return INVALID_REGNUM;
9052 /* Return TRUE if we need to save REGNO. */
9055 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9057 if (pic_offset_table_rtx
9058 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9059 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9061 || crtl->calls_eh_return
9062 || crtl->uses_const_pool))
9063 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9065 if (crtl->calls_eh_return && maybe_eh_return)
9070 unsigned test = EH_RETURN_DATA_REGNO (i);
9071 if (test == INVALID_REGNUM)
9078 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9081 return (df_regs_ever_live_p (regno)
9082 && !call_used_regs[regno]
9083 && !fixed_regs[regno]
9084 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9087 /* Return number of saved general prupose registers. */
9090 ix86_nsaved_regs (void)
9095 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9096 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9101 /* Return number of saved SSE registrers. */
9104 ix86_nsaved_sseregs (void)
9109 if (!TARGET_64BIT_MS_ABI)
9111 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9112 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9117 /* Given FROM and TO register numbers, say whether this elimination is
9118 allowed. If stack alignment is needed, we can only replace argument
9119 pointer with hard frame pointer, or replace frame pointer with stack
9120 pointer. Otherwise, frame pointer elimination is automatically
9121 handled and all other eliminations are valid. */
9124 ix86_can_eliminate (const int from, const int to)
9126 if (stack_realign_fp)
9127 return ((from == ARG_POINTER_REGNUM
9128 && to == HARD_FRAME_POINTER_REGNUM)
9129 || (from == FRAME_POINTER_REGNUM
9130 && to == STACK_POINTER_REGNUM));
9132 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9135 /* Return the offset between two registers, one to be eliminated, and the other
9136 its replacement, at the start of a routine. */
9139 ix86_initial_elimination_offset (int from, int to)
9141 struct ix86_frame frame;
9142 ix86_compute_frame_layout (&frame);
9144 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9145 return frame.hard_frame_pointer_offset;
9146 else if (from == FRAME_POINTER_REGNUM
9147 && to == HARD_FRAME_POINTER_REGNUM)
9148 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9151 gcc_assert (to == STACK_POINTER_REGNUM);
9153 if (from == ARG_POINTER_REGNUM)
9154 return frame.stack_pointer_offset;
9156 gcc_assert (from == FRAME_POINTER_REGNUM);
9157 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9161 /* In a dynamically-aligned function, we can't know the offset from
9162 stack pointer to frame pointer, so we must ensure that setjmp
9163 eliminates fp against the hard fp (%ebp) rather than trying to
9164 index from %esp up to the top of the frame across a gap that is
9165 of unknown (at compile-time) size. */
9167 ix86_builtin_setjmp_frame_value (void)
9169 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9172 /* When using -fsplit-stack, the allocation routines set a field in
9173 the TCB to the bottom of the stack plus this much space, measured
9176 #define SPLIT_STACK_AVAILABLE 256
9178 /* Fill structure ix86_frame about frame of currently computed function. */
9181 ix86_compute_frame_layout (struct ix86_frame *frame)
9183 unsigned HOST_WIDE_INT stack_alignment_needed;
9184 HOST_WIDE_INT offset;
9185 unsigned HOST_WIDE_INT preferred_alignment;
9186 HOST_WIDE_INT size = get_frame_size ();
9187 HOST_WIDE_INT to_allocate;
9189 frame->nregs = ix86_nsaved_regs ();
9190 frame->nsseregs = ix86_nsaved_sseregs ();
9192 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9193 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9195 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9196 function prologues and leaf. */
9197 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
9198 && (!crtl->is_leaf || cfun->calls_alloca != 0
9199 || ix86_current_function_calls_tls_descriptor))
9201 preferred_alignment = 16;
9202 stack_alignment_needed = 16;
9203 crtl->preferred_stack_boundary = 128;
9204 crtl->stack_alignment_needed = 128;
9207 gcc_assert (!size || stack_alignment_needed);
9208 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9209 gcc_assert (preferred_alignment <= stack_alignment_needed);
9211 /* For SEH we have to limit the amount of code movement into the prologue.
9212 At present we do this via a BLOCKAGE, at which point there's very little
9213 scheduling that can be done, which means that there's very little point
9214 in doing anything except PUSHs. */
9216 cfun->machine->use_fast_prologue_epilogue = false;
9218 /* During reload iteration the amount of registers saved can change.
9219 Recompute the value as needed. Do not recompute when amount of registers
9220 didn't change as reload does multiple calls to the function and does not
9221 expect the decision to change within single iteration. */
9222 else if (!optimize_function_for_size_p (cfun)
9223 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9225 int count = frame->nregs;
9226 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9228 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9230 /* The fast prologue uses move instead of push to save registers. This
9231 is significantly longer, but also executes faster as modern hardware
9232 can execute the moves in parallel, but can't do that for push/pop.
9234 Be careful about choosing what prologue to emit: When function takes
9235 many instructions to execute we may use slow version as well as in
9236 case function is known to be outside hot spot (this is known with
9237 feedback only). Weight the size of function by number of registers
9238 to save as it is cheap to use one or two push instructions but very
9239 slow to use many of them. */
9241 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9242 if (node->frequency < NODE_FREQUENCY_NORMAL
9243 || (flag_branch_probabilities
9244 && node->frequency < NODE_FREQUENCY_HOT))
9245 cfun->machine->use_fast_prologue_epilogue = false;
9247 cfun->machine->use_fast_prologue_epilogue
9248 = !expensive_function_p (count);
9251 frame->save_regs_using_mov
9252 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9253 /* If static stack checking is enabled and done with probes,
9254 the registers need to be saved before allocating the frame. */
9255 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9257 /* Skip return address. */
9258 offset = UNITS_PER_WORD;
9260 /* Skip pushed static chain. */
9261 if (ix86_static_chain_on_stack)
9262 offset += UNITS_PER_WORD;
9264 /* Skip saved base pointer. */
9265 if (frame_pointer_needed)
9266 offset += UNITS_PER_WORD;
9267 frame->hfp_save_offset = offset;
9269 /* The traditional frame pointer location is at the top of the frame. */
9270 frame->hard_frame_pointer_offset = offset;
9272 /* Register save area */
9273 offset += frame->nregs * UNITS_PER_WORD;
9274 frame->reg_save_offset = offset;
9276 /* On SEH target, registers are pushed just before the frame pointer
9279 frame->hard_frame_pointer_offset = offset;
9281 /* Align and set SSE register save area. */
9282 if (frame->nsseregs)
9284 /* The only ABI that has saved SSE registers (Win64) also has a
9285 16-byte aligned default stack, and thus we don't need to be
9286 within the re-aligned local stack frame to save them. */
9287 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9288 offset = (offset + 16 - 1) & -16;
9289 offset += frame->nsseregs * 16;
9291 frame->sse_reg_save_offset = offset;
9293 /* The re-aligned stack starts here. Values before this point are not
9294 directly comparable with values below this point. In order to make
9295 sure that no value happens to be the same before and after, force
9296 the alignment computation below to add a non-zero value. */
9297 if (stack_realign_fp)
9298 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9301 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9302 offset += frame->va_arg_size;
9304 /* Align start of frame for local function. */
9305 if (stack_realign_fp
9306 || offset != frame->sse_reg_save_offset
9309 || cfun->calls_alloca
9310 || ix86_current_function_calls_tls_descriptor)
9311 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9313 /* Frame pointer points here. */
9314 frame->frame_pointer_offset = offset;
9318 /* Add outgoing arguments area. Can be skipped if we eliminated
9319 all the function calls as dead code.
9320 Skipping is however impossible when function calls alloca. Alloca
9321 expander assumes that last crtl->outgoing_args_size
9322 of stack frame are unused. */
9323 if (ACCUMULATE_OUTGOING_ARGS
9324 && (!crtl->is_leaf || cfun->calls_alloca
9325 || ix86_current_function_calls_tls_descriptor))
9327 offset += crtl->outgoing_args_size;
9328 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9331 frame->outgoing_arguments_size = 0;
9333 /* Align stack boundary. Only needed if we're calling another function
9335 if (!crtl->is_leaf || cfun->calls_alloca
9336 || ix86_current_function_calls_tls_descriptor)
9337 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9339 /* We've reached end of stack frame. */
9340 frame->stack_pointer_offset = offset;
9342 /* Size prologue needs to allocate. */
9343 to_allocate = offset - frame->sse_reg_save_offset;
9345 if ((!to_allocate && frame->nregs <= 1)
9346 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9347 frame->save_regs_using_mov = false;
9349 if (ix86_using_red_zone ()
9350 && crtl->sp_is_unchanging
9352 && !ix86_current_function_calls_tls_descriptor)
9354 frame->red_zone_size = to_allocate;
9355 if (frame->save_regs_using_mov)
9356 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9357 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9358 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9361 frame->red_zone_size = 0;
9362 frame->stack_pointer_offset -= frame->red_zone_size;
9364 /* The SEH frame pointer location is near the bottom of the frame.
9365 This is enforced by the fact that the difference between the
9366 stack pointer and the frame pointer is limited to 240 bytes in
9367 the unwind data structure. */
9372 /* If we can leave the frame pointer where it is, do so. Also, returns
9373 the establisher frame for __builtin_frame_address (0). */
9374 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9375 if (diff <= SEH_MAX_FRAME_SIZE
9376 && (diff > 240 || (diff & 15) != 0)
9377 && !crtl->accesses_prior_frames)
9379 /* Ideally we'd determine what portion of the local stack frame
9380 (within the constraint of the lowest 240) is most heavily used.
9381 But without that complication, simply bias the frame pointer
9382 by 128 bytes so as to maximize the amount of the local stack
9383 frame that is addressable with 8-bit offsets. */
9384 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9389 /* This is semi-inlined memory_address_length, but simplified
9390 since we know that we're always dealing with reg+offset, and
9391 to avoid having to create and discard all that rtl. */
9394 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9400 /* EBP and R13 cannot be encoded without an offset. */
9401 len = (regno == BP_REG || regno == R13_REG);
9403 else if (IN_RANGE (offset, -128, 127))
9406 /* ESP and R12 must be encoded with a SIB byte. */
9407 if (regno == SP_REG || regno == R12_REG)
9413 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9414 The valid base registers are taken from CFUN->MACHINE->FS. */
9417 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9419 const struct machine_function *m = cfun->machine;
9420 rtx base_reg = NULL;
9421 HOST_WIDE_INT base_offset = 0;
9423 if (m->use_fast_prologue_epilogue)
9425 /* Choose the base register most likely to allow the most scheduling
9426 opportunities. Generally FP is valid throughout the function,
9427 while DRAP must be reloaded within the epilogue. But choose either
9428 over the SP due to increased encoding size. */
9432 base_reg = hard_frame_pointer_rtx;
9433 base_offset = m->fs.fp_offset - cfa_offset;
9435 else if (m->fs.drap_valid)
9437 base_reg = crtl->drap_reg;
9438 base_offset = 0 - cfa_offset;
9440 else if (m->fs.sp_valid)
9442 base_reg = stack_pointer_rtx;
9443 base_offset = m->fs.sp_offset - cfa_offset;
9448 HOST_WIDE_INT toffset;
9451 /* Choose the base register with the smallest address encoding.
9452 With a tie, choose FP > DRAP > SP. */
9455 base_reg = stack_pointer_rtx;
9456 base_offset = m->fs.sp_offset - cfa_offset;
9457 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9459 if (m->fs.drap_valid)
9461 toffset = 0 - cfa_offset;
9462 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9465 base_reg = crtl->drap_reg;
9466 base_offset = toffset;
9472 toffset = m->fs.fp_offset - cfa_offset;
9473 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9476 base_reg = hard_frame_pointer_rtx;
9477 base_offset = toffset;
9482 gcc_assert (base_reg != NULL);
9484 return plus_constant (Pmode, base_reg, base_offset);
9487 /* Emit code to save registers in the prologue. */
9490 ix86_emit_save_regs (void)
9495 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9496 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9498 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9499 RTX_FRAME_RELATED_P (insn) = 1;
9503 /* Emit a single register save at CFA - CFA_OFFSET. */
9506 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9507 HOST_WIDE_INT cfa_offset)
9509 struct machine_function *m = cfun->machine;
9510 rtx reg = gen_rtx_REG (mode, regno);
9511 rtx mem, addr, base, insn;
9513 addr = choose_baseaddr (cfa_offset);
9514 mem = gen_frame_mem (mode, addr);
9516 /* For SSE saves, we need to indicate the 128-bit alignment. */
9517 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9519 insn = emit_move_insn (mem, reg);
9520 RTX_FRAME_RELATED_P (insn) = 1;
9523 if (GET_CODE (base) == PLUS)
9524 base = XEXP (base, 0);
9525 gcc_checking_assert (REG_P (base));
9527 /* When saving registers into a re-aligned local stack frame, avoid
9528 any tricky guessing by dwarf2out. */
9529 if (m->fs.realigned)
9531 gcc_checking_assert (stack_realign_drap);
9533 if (regno == REGNO (crtl->drap_reg))
9535 /* A bit of a hack. We force the DRAP register to be saved in
9536 the re-aligned stack frame, which provides us with a copy
9537 of the CFA that will last past the prologue. Install it. */
9538 gcc_checking_assert (cfun->machine->fs.fp_valid);
9539 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9540 cfun->machine->fs.fp_offset - cfa_offset);
9541 mem = gen_rtx_MEM (mode, addr);
9542 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9546 /* The frame pointer is a stable reference within the
9547 aligned frame. Use it. */
9548 gcc_checking_assert (cfun->machine->fs.fp_valid);
9549 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9550 cfun->machine->fs.fp_offset - cfa_offset);
9551 mem = gen_rtx_MEM (mode, addr);
9552 add_reg_note (insn, REG_CFA_EXPRESSION,
9553 gen_rtx_SET (VOIDmode, mem, reg));
9557 /* The memory may not be relative to the current CFA register,
9558 which means that we may need to generate a new pattern for
9559 use by the unwind info. */
9560 else if (base != m->fs.cfa_reg)
9562 addr = plus_constant (Pmode, m->fs.cfa_reg,
9563 m->fs.cfa_offset - cfa_offset);
9564 mem = gen_rtx_MEM (mode, addr);
9565 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9569 /* Emit code to save registers using MOV insns.
9570 First register is stored at CFA - CFA_OFFSET. */
9572 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9576 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9577 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9579 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9580 cfa_offset -= UNITS_PER_WORD;
9584 /* Emit code to save SSE registers using MOV insns.
9585 First register is stored at CFA - CFA_OFFSET. */
9587 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9591 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9592 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9594 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9599 static GTY(()) rtx queued_cfa_restores;
9601 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9602 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9603 Don't add the note if the previously saved value will be left untouched
9604 within stack red-zone till return, as unwinders can find the same value
9605 in the register and on the stack. */
9608 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9610 if (!crtl->shrink_wrapped
9611 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9616 add_reg_note (insn, REG_CFA_RESTORE, reg);
9617 RTX_FRAME_RELATED_P (insn) = 1;
9621 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9624 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9627 ix86_add_queued_cfa_restore_notes (rtx insn)
9630 if (!queued_cfa_restores)
9632 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9634 XEXP (last, 1) = REG_NOTES (insn);
9635 REG_NOTES (insn) = queued_cfa_restores;
9636 queued_cfa_restores = NULL_RTX;
9637 RTX_FRAME_RELATED_P (insn) = 1;
9640 /* Expand prologue or epilogue stack adjustment.
9641 The pattern exist to put a dependency on all ebp-based memory accesses.
9642 STYLE should be negative if instructions should be marked as frame related,
9643 zero if %r11 register is live and cannot be freely used and positive
9647 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9648 int style, bool set_cfa)
9650 struct machine_function *m = cfun->machine;
9652 bool add_frame_related_expr = false;
9654 if (Pmode == SImode)
9655 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9656 else if (x86_64_immediate_operand (offset, DImode))
9657 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9661 /* r11 is used by indirect sibcall return as well, set before the
9662 epilogue and used after the epilogue. */
9664 tmp = gen_rtx_REG (DImode, R11_REG);
9667 gcc_assert (src != hard_frame_pointer_rtx
9668 && dest != hard_frame_pointer_rtx);
9669 tmp = hard_frame_pointer_rtx;
9671 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9673 add_frame_related_expr = true;
9675 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9678 insn = emit_insn (insn);
9680 ix86_add_queued_cfa_restore_notes (insn);
9686 gcc_assert (m->fs.cfa_reg == src);
9687 m->fs.cfa_offset += INTVAL (offset);
9688 m->fs.cfa_reg = dest;
9690 r = gen_rtx_PLUS (Pmode, src, offset);
9691 r = gen_rtx_SET (VOIDmode, dest, r);
9692 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9693 RTX_FRAME_RELATED_P (insn) = 1;
9697 RTX_FRAME_RELATED_P (insn) = 1;
9698 if (add_frame_related_expr)
9700 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9701 r = gen_rtx_SET (VOIDmode, dest, r);
9702 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9706 if (dest == stack_pointer_rtx)
9708 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9709 bool valid = m->fs.sp_valid;
9711 if (src == hard_frame_pointer_rtx)
9713 valid = m->fs.fp_valid;
9714 ooffset = m->fs.fp_offset;
9716 else if (src == crtl->drap_reg)
9718 valid = m->fs.drap_valid;
9723 /* Else there are two possibilities: SP itself, which we set
9724 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9725 taken care of this by hand along the eh_return path. */
9726 gcc_checking_assert (src == stack_pointer_rtx
9727 || offset == const0_rtx);
9730 m->fs.sp_offset = ooffset - INTVAL (offset);
9731 m->fs.sp_valid = valid;
9735 /* Find an available register to be used as dynamic realign argument
9736 pointer regsiter. Such a register will be written in prologue and
9737 used in begin of body, so it must not be
9738 1. parameter passing register.
9740 We reuse static-chain register if it is available. Otherwise, we
9741 use DI for i386 and R13 for x86-64. We chose R13 since it has
9744 Return: the regno of chosen register. */
9747 find_drap_reg (void)
9749 tree decl = cfun->decl;
9753 /* Use R13 for nested function or function need static chain.
9754 Since function with tail call may use any caller-saved
9755 registers in epilogue, DRAP must not use caller-saved
9756 register in such case. */
9757 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9764 /* Use DI for nested function or function need static chain.
9765 Since function with tail call may use any caller-saved
9766 registers in epilogue, DRAP must not use caller-saved
9767 register in such case. */
9768 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9771 /* Reuse static chain register if it isn't used for parameter
9773 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9775 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9776 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9783 /* Return minimum incoming stack alignment. */
9786 ix86_minimum_incoming_stack_boundary (bool sibcall)
9788 unsigned int incoming_stack_boundary;
9790 /* Prefer the one specified at command line. */
9791 if (ix86_user_incoming_stack_boundary)
9792 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9793 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9794 if -mstackrealign is used, it isn't used for sibcall check and
9795 estimated stack alignment is 128bit. */
9798 && ix86_force_align_arg_pointer
9799 && crtl->stack_alignment_estimated == 128)
9800 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9802 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9804 /* Incoming stack alignment can be changed on individual functions
9805 via force_align_arg_pointer attribute. We use the smallest
9806 incoming stack boundary. */
9807 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9808 && lookup_attribute (ix86_force_align_arg_pointer_string,
9809 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9810 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9812 /* The incoming stack frame has to be aligned at least at
9813 parm_stack_boundary. */
9814 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9815 incoming_stack_boundary = crtl->parm_stack_boundary;
9817 /* Stack at entrance of main is aligned by runtime. We use the
9818 smallest incoming stack boundary. */
9819 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9820 && DECL_NAME (current_function_decl)
9821 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9822 && DECL_FILE_SCOPE_P (current_function_decl))
9823 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9825 return incoming_stack_boundary;
9828 /* Update incoming stack boundary and estimated stack alignment. */
9831 ix86_update_stack_boundary (void)
9833 ix86_incoming_stack_boundary
9834 = ix86_minimum_incoming_stack_boundary (false);
9836 /* x86_64 vararg needs 16byte stack alignment for register save
9840 && crtl->stack_alignment_estimated < 128)
9841 crtl->stack_alignment_estimated = 128;
9844 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9845 needed or an rtx for DRAP otherwise. */
9848 ix86_get_drap_rtx (void)
9850 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9851 crtl->need_drap = true;
9853 if (stack_realign_drap)
9855 /* Assign DRAP to vDRAP and returns vDRAP */
9856 unsigned int regno = find_drap_reg ();
9861 arg_ptr = gen_rtx_REG (Pmode, regno);
9862 crtl->drap_reg = arg_ptr;
9865 drap_vreg = copy_to_reg (arg_ptr);
9869 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9872 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9873 RTX_FRAME_RELATED_P (insn) = 1;
9881 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9884 ix86_internal_arg_pointer (void)
9886 return virtual_incoming_args_rtx;
9889 struct scratch_reg {
9894 /* Return a short-lived scratch register for use on function entry.
9895 In 32-bit mode, it is valid only after the registers are saved
9896 in the prologue. This register must be released by means of
9897 release_scratch_register_on_entry once it is dead. */
9900 get_scratch_register_on_entry (struct scratch_reg *sr)
9908 /* We always use R11 in 64-bit mode. */
9913 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9915 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9916 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9917 int regparm = ix86_function_regparm (fntype, decl);
9919 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9921 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9922 for the static chain register. */
9923 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9924 && drap_regno != AX_REG)
9926 else if (regparm < 2 && drap_regno != DX_REG)
9928 /* ecx is the static chain register. */
9929 else if (regparm < 3 && !fastcall_p && !static_chain_p
9930 && drap_regno != CX_REG)
9932 else if (ix86_save_reg (BX_REG, true))
9934 /* esi is the static chain register. */
9935 else if (!(regparm == 3 && static_chain_p)
9936 && ix86_save_reg (SI_REG, true))
9938 else if (ix86_save_reg (DI_REG, true))
9942 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9947 sr->reg = gen_rtx_REG (Pmode, regno);
9950 rtx insn = emit_insn (gen_push (sr->reg));
9951 RTX_FRAME_RELATED_P (insn) = 1;
9955 /* Release a scratch register obtained from the preceding function. */
9958 release_scratch_register_on_entry (struct scratch_reg *sr)
9962 rtx x, insn = emit_insn (gen_pop (sr->reg));
9964 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9965 RTX_FRAME_RELATED_P (insn) = 1;
9966 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9967 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9968 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9972 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9974 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9977 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9979 /* We skip the probe for the first interval + a small dope of 4 words and
9980 probe that many bytes past the specified size to maintain a protection
9981 area at the botton of the stack. */
9982 const int dope = 4 * UNITS_PER_WORD;
9983 rtx size_rtx = GEN_INT (size), last;
9985 /* See if we have a constant small number of probes to generate. If so,
9986 that's the easy case. The run-time loop is made up of 11 insns in the
9987 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9988 for n # of intervals. */
9989 if (size <= 5 * PROBE_INTERVAL)
9991 HOST_WIDE_INT i, adjust;
9992 bool first_probe = true;
9994 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9995 values of N from 1 until it exceeds SIZE. If only one probe is
9996 needed, this will not generate any code. Then adjust and probe
9997 to PROBE_INTERVAL + SIZE. */
9998 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10002 adjust = 2 * PROBE_INTERVAL + dope;
10003 first_probe = false;
10006 adjust = PROBE_INTERVAL;
10008 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10009 plus_constant (Pmode, stack_pointer_rtx,
10011 emit_stack_probe (stack_pointer_rtx);
10015 adjust = size + PROBE_INTERVAL + dope;
10017 adjust = size + PROBE_INTERVAL - i;
10019 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10020 plus_constant (Pmode, stack_pointer_rtx,
10022 emit_stack_probe (stack_pointer_rtx);
10024 /* Adjust back to account for the additional first interval. */
10025 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10026 plus_constant (Pmode, stack_pointer_rtx,
10027 PROBE_INTERVAL + dope)));
10030 /* Otherwise, do the same as above, but in a loop. Note that we must be
10031 extra careful with variables wrapping around because we might be at
10032 the very top (or the very bottom) of the address space and we have
10033 to be able to handle this case properly; in particular, we use an
10034 equality test for the loop condition. */
10037 HOST_WIDE_INT rounded_size;
10038 struct scratch_reg sr;
10040 get_scratch_register_on_entry (&sr);
10043 /* Step 1: round SIZE to the previous multiple of the interval. */
10045 rounded_size = size & -PROBE_INTERVAL;
10048 /* Step 2: compute initial and final value of the loop counter. */
10050 /* SP = SP_0 + PROBE_INTERVAL. */
10051 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10052 plus_constant (Pmode, stack_pointer_rtx,
10053 - (PROBE_INTERVAL + dope))));
10055 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10056 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10057 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10058 gen_rtx_PLUS (Pmode, sr.reg,
10059 stack_pointer_rtx)));
10062 /* Step 3: the loop
10064 while (SP != LAST_ADDR)
10066 SP = SP + PROBE_INTERVAL
10070 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10071 values of N from 1 until it is equal to ROUNDED_SIZE. */
10073 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10076 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10077 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10079 if (size != rounded_size)
10081 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10082 plus_constant (Pmode, stack_pointer_rtx,
10083 rounded_size - size)));
10084 emit_stack_probe (stack_pointer_rtx);
10087 /* Adjust back to account for the additional first interval. */
10088 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10089 plus_constant (Pmode, stack_pointer_rtx,
10090 PROBE_INTERVAL + dope)));
10092 release_scratch_register_on_entry (&sr);
10095 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10097 /* Even if the stack pointer isn't the CFA register, we need to correctly
10098 describe the adjustments made to it, in particular differentiate the
10099 frame-related ones from the frame-unrelated ones. */
10102 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10103 XVECEXP (expr, 0, 0)
10104 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10105 plus_constant (Pmode, stack_pointer_rtx, -size));
10106 XVECEXP (expr, 0, 1)
10107 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10108 plus_constant (Pmode, stack_pointer_rtx,
10109 PROBE_INTERVAL + dope + size));
10110 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10111 RTX_FRAME_RELATED_P (last) = 1;
10113 cfun->machine->fs.sp_offset += size;
10116 /* Make sure nothing is scheduled before we are done. */
10117 emit_insn (gen_blockage ());
10120 /* Adjust the stack pointer up to REG while probing it. */
10123 output_adjust_stack_and_probe (rtx reg)
10125 static int labelno = 0;
10126 char loop_lab[32], end_lab[32];
10129 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10130 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10132 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10134 /* Jump to END_LAB if SP == LAST_ADDR. */
10135 xops[0] = stack_pointer_rtx;
10137 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10138 fputs ("\tje\t", asm_out_file);
10139 assemble_name_raw (asm_out_file, end_lab);
10140 fputc ('\n', asm_out_file);
10142 /* SP = SP + PROBE_INTERVAL. */
10143 xops[1] = GEN_INT (PROBE_INTERVAL);
10144 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10147 xops[1] = const0_rtx;
10148 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10150 fprintf (asm_out_file, "\tjmp\t");
10151 assemble_name_raw (asm_out_file, loop_lab);
10152 fputc ('\n', asm_out_file);
10154 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10159 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10160 inclusive. These are offsets from the current stack pointer. */
10163 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10165 /* See if we have a constant small number of probes to generate. If so,
10166 that's the easy case. The run-time loop is made up of 7 insns in the
10167 generic case while the compile-time loop is made up of n insns for n #
10169 if (size <= 7 * PROBE_INTERVAL)
10173 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10174 it exceeds SIZE. If only one probe is needed, this will not
10175 generate any code. Then probe at FIRST + SIZE. */
10176 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10177 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10180 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10184 /* Otherwise, do the same as above, but in a loop. Note that we must be
10185 extra careful with variables wrapping around because we might be at
10186 the very top (or the very bottom) of the address space and we have
10187 to be able to handle this case properly; in particular, we use an
10188 equality test for the loop condition. */
10191 HOST_WIDE_INT rounded_size, last;
10192 struct scratch_reg sr;
10194 get_scratch_register_on_entry (&sr);
10197 /* Step 1: round SIZE to the previous multiple of the interval. */
10199 rounded_size = size & -PROBE_INTERVAL;
10202 /* Step 2: compute initial and final value of the loop counter. */
10204 /* TEST_OFFSET = FIRST. */
10205 emit_move_insn (sr.reg, GEN_INT (-first));
10207 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10208 last = first + rounded_size;
10211 /* Step 3: the loop
10213 while (TEST_ADDR != LAST_ADDR)
10215 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10219 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10220 until it is equal to ROUNDED_SIZE. */
10222 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10225 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10226 that SIZE is equal to ROUNDED_SIZE. */
10228 if (size != rounded_size)
10229 emit_stack_probe (plus_constant (Pmode,
10230 gen_rtx_PLUS (Pmode,
10233 rounded_size - size));
10235 release_scratch_register_on_entry (&sr);
10238 /* Make sure nothing is scheduled before we are done. */
10239 emit_insn (gen_blockage ());
10242 /* Probe a range of stack addresses from REG to END, inclusive. These are
10243 offsets from the current stack pointer. */
10246 output_probe_stack_range (rtx reg, rtx end)
10248 static int labelno = 0;
10249 char loop_lab[32], end_lab[32];
10252 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10253 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10255 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10257 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10260 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10261 fputs ("\tje\t", asm_out_file);
10262 assemble_name_raw (asm_out_file, end_lab);
10263 fputc ('\n', asm_out_file);
10265 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10266 xops[1] = GEN_INT (PROBE_INTERVAL);
10267 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10269 /* Probe at TEST_ADDR. */
10270 xops[0] = stack_pointer_rtx;
10272 xops[2] = const0_rtx;
10273 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10275 fprintf (asm_out_file, "\tjmp\t");
10276 assemble_name_raw (asm_out_file, loop_lab);
10277 fputc ('\n', asm_out_file);
10279 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10284 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10285 to be generated in correct form. */
10287 ix86_finalize_stack_realign_flags (void)
10289 /* Check if stack realign is really needed after reload, and
10290 stores result in cfun */
10291 unsigned int incoming_stack_boundary
10292 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10293 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10294 unsigned int stack_realign = (incoming_stack_boundary
10296 ? crtl->max_used_stack_slot_alignment
10297 : crtl->stack_alignment_needed));
10299 if (crtl->stack_realign_finalized)
10301 /* After stack_realign_needed is finalized, we can't no longer
10303 gcc_assert (crtl->stack_realign_needed == stack_realign);
10307 /* If the only reason for frame_pointer_needed is that we conservatively
10308 assumed stack realignment might be needed, but in the end nothing that
10309 needed the stack alignment had been spilled, clear frame_pointer_needed
10310 and say we don't need stack realignment. */
10312 && !crtl->need_drap
10313 && frame_pointer_needed
10315 && flag_omit_frame_pointer
10316 && crtl->sp_is_unchanging
10317 && !ix86_current_function_calls_tls_descriptor
10318 && !crtl->accesses_prior_frames
10319 && !cfun->calls_alloca
10320 && !crtl->calls_eh_return
10321 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10322 && !ix86_frame_pointer_required ()
10323 && get_frame_size () == 0
10324 && ix86_nsaved_sseregs () == 0
10325 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10327 HARD_REG_SET set_up_by_prologue, prologue_used;
10330 CLEAR_HARD_REG_SET (prologue_used);
10331 CLEAR_HARD_REG_SET (set_up_by_prologue);
10332 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10333 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10334 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10335 HARD_FRAME_POINTER_REGNUM);
10339 FOR_BB_INSNS (bb, insn)
10340 if (NONDEBUG_INSN_P (insn)
10341 && requires_stack_frame_p (insn, prologue_used,
10342 set_up_by_prologue))
10344 crtl->stack_realign_needed = stack_realign;
10345 crtl->stack_realign_finalized = true;
10350 frame_pointer_needed = false;
10351 stack_realign = false;
10352 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10353 crtl->stack_alignment_needed = incoming_stack_boundary;
10354 crtl->stack_alignment_estimated = incoming_stack_boundary;
10355 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10356 crtl->preferred_stack_boundary = incoming_stack_boundary;
10357 df_finish_pass (true);
10358 df_scan_alloc (NULL);
10360 df_compute_regs_ever_live (true);
10364 crtl->stack_realign_needed = stack_realign;
10365 crtl->stack_realign_finalized = true;
10368 /* Expand the prologue into a bunch of separate insns. */
10371 ix86_expand_prologue (void)
10373 struct machine_function *m = cfun->machine;
10376 struct ix86_frame frame;
10377 HOST_WIDE_INT allocate;
10378 bool int_registers_saved;
10379 bool sse_registers_saved;
10381 ix86_finalize_stack_realign_flags ();
10383 /* DRAP should not coexist with stack_realign_fp */
10384 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10386 memset (&m->fs, 0, sizeof (m->fs));
10388 /* Initialize CFA state for before the prologue. */
10389 m->fs.cfa_reg = stack_pointer_rtx;
10390 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10392 /* Track SP offset to the CFA. We continue tracking this after we've
10393 swapped the CFA register away from SP. In the case of re-alignment
10394 this is fudged; we're interested to offsets within the local frame. */
10395 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10396 m->fs.sp_valid = true;
10398 ix86_compute_frame_layout (&frame);
10400 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10402 /* We should have already generated an error for any use of
10403 ms_hook on a nested function. */
10404 gcc_checking_assert (!ix86_static_chain_on_stack);
10406 /* Check if profiling is active and we shall use profiling before
10407 prologue variant. If so sorry. */
10408 if (crtl->profile && flag_fentry != 0)
10409 sorry ("ms_hook_prologue attribute isn%'t compatible "
10410 "with -mfentry for 32-bit");
10412 /* In ix86_asm_output_function_label we emitted:
10413 8b ff movl.s %edi,%edi
10415 8b ec movl.s %esp,%ebp
10417 This matches the hookable function prologue in Win32 API
10418 functions in Microsoft Windows XP Service Pack 2 and newer.
10419 Wine uses this to enable Windows apps to hook the Win32 API
10420 functions provided by Wine.
10422 What that means is that we've already set up the frame pointer. */
10424 if (frame_pointer_needed
10425 && !(crtl->drap_reg && crtl->stack_realign_needed))
10429 /* We've decided to use the frame pointer already set up.
10430 Describe this to the unwinder by pretending that both
10431 push and mov insns happen right here.
10433 Putting the unwind info here at the end of the ms_hook
10434 is done so that we can make absolutely certain we get
10435 the required byte sequence at the start of the function,
10436 rather than relying on an assembler that can produce
10437 the exact encoding required.
10439 However it does mean (in the unpatched case) that we have
10440 a 1 insn window where the asynchronous unwind info is
10441 incorrect. However, if we placed the unwind info at
10442 its correct location we would have incorrect unwind info
10443 in the patched case. Which is probably all moot since
10444 I don't expect Wine generates dwarf2 unwind info for the
10445 system libraries that use this feature. */
10447 insn = emit_insn (gen_blockage ());
10449 push = gen_push (hard_frame_pointer_rtx);
10450 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10451 stack_pointer_rtx);
10452 RTX_FRAME_RELATED_P (push) = 1;
10453 RTX_FRAME_RELATED_P (mov) = 1;
10455 RTX_FRAME_RELATED_P (insn) = 1;
10456 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10457 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10459 /* Note that gen_push incremented m->fs.cfa_offset, even
10460 though we didn't emit the push insn here. */
10461 m->fs.cfa_reg = hard_frame_pointer_rtx;
10462 m->fs.fp_offset = m->fs.cfa_offset;
10463 m->fs.fp_valid = true;
10467 /* The frame pointer is not needed so pop %ebp again.
10468 This leaves us with a pristine state. */
10469 emit_insn (gen_pop (hard_frame_pointer_rtx));
10473 /* The first insn of a function that accepts its static chain on the
10474 stack is to push the register that would be filled in by a direct
10475 call. This insn will be skipped by the trampoline. */
10476 else if (ix86_static_chain_on_stack)
10478 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10479 emit_insn (gen_blockage ());
10481 /* We don't want to interpret this push insn as a register save,
10482 only as a stack adjustment. The real copy of the register as
10483 a save will be done later, if needed. */
10484 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10485 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10486 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10487 RTX_FRAME_RELATED_P (insn) = 1;
10490 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10491 of DRAP is needed and stack realignment is really needed after reload */
10492 if (stack_realign_drap)
10494 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10496 /* Only need to push parameter pointer reg if it is caller saved. */
10497 if (!call_used_regs[REGNO (crtl->drap_reg)])
10499 /* Push arg pointer reg */
10500 insn = emit_insn (gen_push (crtl->drap_reg));
10501 RTX_FRAME_RELATED_P (insn) = 1;
10504 /* Grab the argument pointer. */
10505 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10506 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10507 RTX_FRAME_RELATED_P (insn) = 1;
10508 m->fs.cfa_reg = crtl->drap_reg;
10509 m->fs.cfa_offset = 0;
10511 /* Align the stack. */
10512 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10514 GEN_INT (-align_bytes)));
10515 RTX_FRAME_RELATED_P (insn) = 1;
10517 /* Replicate the return address on the stack so that return
10518 address can be reached via (argp - 1) slot. This is needed
10519 to implement macro RETURN_ADDR_RTX and intrinsic function
10520 expand_builtin_return_addr etc. */
10521 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10522 t = gen_frame_mem (word_mode, t);
10523 insn = emit_insn (gen_push (t));
10524 RTX_FRAME_RELATED_P (insn) = 1;
10526 /* For the purposes of frame and register save area addressing,
10527 we've started over with a new frame. */
10528 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10529 m->fs.realigned = true;
10532 int_registers_saved = (frame.nregs == 0);
10533 sse_registers_saved = (frame.nsseregs == 0);
10535 if (frame_pointer_needed && !m->fs.fp_valid)
10537 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10538 slower on all targets. Also sdb doesn't like it. */
10539 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10540 RTX_FRAME_RELATED_P (insn) = 1;
10542 /* Push registers now, before setting the frame pointer
10544 if (!int_registers_saved
10546 && !frame.save_regs_using_mov)
10548 ix86_emit_save_regs ();
10549 int_registers_saved = true;
10550 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10553 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10555 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10556 RTX_FRAME_RELATED_P (insn) = 1;
10558 if (m->fs.cfa_reg == stack_pointer_rtx)
10559 m->fs.cfa_reg = hard_frame_pointer_rtx;
10560 m->fs.fp_offset = m->fs.sp_offset;
10561 m->fs.fp_valid = true;
10565 if (!int_registers_saved)
10567 /* If saving registers via PUSH, do so now. */
10568 if (!frame.save_regs_using_mov)
10570 ix86_emit_save_regs ();
10571 int_registers_saved = true;
10572 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10575 /* When using red zone we may start register saving before allocating
10576 the stack frame saving one cycle of the prologue. However, avoid
10577 doing this if we have to probe the stack; at least on x86_64 the
10578 stack probe can turn into a call that clobbers a red zone location. */
10579 else if (ix86_using_red_zone ()
10580 && (! TARGET_STACK_PROBE
10581 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10583 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10584 int_registers_saved = true;
10588 if (stack_realign_fp)
10590 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10591 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10593 /* The computation of the size of the re-aligned stack frame means
10594 that we must allocate the size of the register save area before
10595 performing the actual alignment. Otherwise we cannot guarantee
10596 that there's enough storage above the realignment point. */
10597 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10598 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10599 GEN_INT (m->fs.sp_offset
10600 - frame.sse_reg_save_offset),
10603 /* Align the stack. */
10604 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10606 GEN_INT (-align_bytes)));
10608 /* For the purposes of register save area addressing, the stack
10609 pointer is no longer valid. As for the value of sp_offset,
10610 see ix86_compute_frame_layout, which we need to match in order
10611 to pass verification of stack_pointer_offset at the end. */
10612 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10613 m->fs.sp_valid = false;
10616 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10618 if (flag_stack_usage_info)
10620 /* We start to count from ARG_POINTER. */
10621 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10623 /* If it was realigned, take into account the fake frame. */
10624 if (stack_realign_drap)
10626 if (ix86_static_chain_on_stack)
10627 stack_size += UNITS_PER_WORD;
10629 if (!call_used_regs[REGNO (crtl->drap_reg)])
10630 stack_size += UNITS_PER_WORD;
10632 /* This over-estimates by 1 minimal-stack-alignment-unit but
10633 mitigates that by counting in the new return address slot. */
10634 current_function_dynamic_stack_size
10635 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10638 current_function_static_stack_size = stack_size;
10641 /* On SEH target with very large frame size, allocate an area to save
10642 SSE registers (as the very large allocation won't be described). */
10644 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10645 && !sse_registers_saved)
10647 HOST_WIDE_INT sse_size =
10648 frame.sse_reg_save_offset - frame.reg_save_offset;
10650 gcc_assert (int_registers_saved);
10652 /* No need to do stack checking as the area will be immediately
10654 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10655 GEN_INT (-sse_size), -1,
10656 m->fs.cfa_reg == stack_pointer_rtx);
10657 allocate -= sse_size;
10658 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10659 sse_registers_saved = true;
10662 /* The stack has already been decremented by the instruction calling us
10663 so probe if the size is non-negative to preserve the protection area. */
10664 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10666 /* We expect the registers to be saved when probes are used. */
10667 gcc_assert (int_registers_saved);
10669 if (STACK_CHECK_MOVING_SP)
10671 ix86_adjust_stack_and_probe (allocate);
10676 HOST_WIDE_INT size = allocate;
10678 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10679 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10681 if (TARGET_STACK_PROBE)
10682 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10684 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10690 else if (!ix86_target_stack_probe ()
10691 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10693 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10694 GEN_INT (-allocate), -1,
10695 m->fs.cfa_reg == stack_pointer_rtx);
10699 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10701 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10702 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
10703 bool eax_live = false;
10704 bool r10_live = false;
10707 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10708 if (!TARGET_64BIT_MS_ABI)
10709 eax_live = ix86_eax_live_at_start_p ();
10711 /* Note that SEH directives need to continue tracking the stack
10712 pointer even after the frame pointer has been set up. */
10715 insn = emit_insn (gen_push (eax));
10716 allocate -= UNITS_PER_WORD;
10717 if (sp_is_cfa_reg || TARGET_SEH)
10720 m->fs.cfa_offset += UNITS_PER_WORD;
10721 RTX_FRAME_RELATED_P (insn) = 1;
10727 r10 = gen_rtx_REG (Pmode, R10_REG);
10728 insn = emit_insn (gen_push (r10));
10729 allocate -= UNITS_PER_WORD;
10730 if (sp_is_cfa_reg || TARGET_SEH)
10733 m->fs.cfa_offset += UNITS_PER_WORD;
10734 RTX_FRAME_RELATED_P (insn) = 1;
10738 emit_move_insn (eax, GEN_INT (allocate));
10739 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10741 /* Use the fact that AX still contains ALLOCATE. */
10742 adjust_stack_insn = (Pmode == DImode
10743 ? gen_pro_epilogue_adjust_stack_di_sub
10744 : gen_pro_epilogue_adjust_stack_si_sub);
10746 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10747 stack_pointer_rtx, eax));
10749 if (sp_is_cfa_reg || TARGET_SEH)
10752 m->fs.cfa_offset += allocate;
10753 RTX_FRAME_RELATED_P (insn) = 1;
10754 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10755 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10756 plus_constant (Pmode, stack_pointer_rtx,
10759 m->fs.sp_offset += allocate;
10761 if (r10_live && eax_live)
10763 t = choose_baseaddr (m->fs.sp_offset - allocate);
10764 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
10765 gen_frame_mem (word_mode, t));
10766 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10767 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
10768 gen_frame_mem (word_mode, t));
10770 else if (eax_live || r10_live)
10772 t = choose_baseaddr (m->fs.sp_offset - allocate);
10773 emit_move_insn (gen_rtx_REG (word_mode,
10774 (eax_live ? AX_REG : R10_REG)),
10775 gen_frame_mem (word_mode, t));
10778 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10780 /* If we havn't already set up the frame pointer, do so now. */
10781 if (frame_pointer_needed && !m->fs.fp_valid)
10783 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10784 GEN_INT (frame.stack_pointer_offset
10785 - frame.hard_frame_pointer_offset));
10786 insn = emit_insn (insn);
10787 RTX_FRAME_RELATED_P (insn) = 1;
10788 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10790 if (m->fs.cfa_reg == stack_pointer_rtx)
10791 m->fs.cfa_reg = hard_frame_pointer_rtx;
10792 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10793 m->fs.fp_valid = true;
10796 if (!int_registers_saved)
10797 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10798 if (!sse_registers_saved)
10799 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10801 pic_reg_used = false;
10802 if (pic_offset_table_rtx
10803 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10806 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10808 if (alt_pic_reg_used != INVALID_REGNUM)
10809 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10811 pic_reg_used = true;
10818 if (ix86_cmodel == CM_LARGE_PIC)
10820 rtx label, tmp_reg;
10822 gcc_assert (Pmode == DImode);
10823 label = gen_label_rtx ();
10824 emit_label (label);
10825 LABEL_PRESERVE_P (label) = 1;
10826 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
10827 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10828 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
10830 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10831 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
10832 pic_offset_table_rtx, tmp_reg));
10835 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10839 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10840 RTX_FRAME_RELATED_P (insn) = 1;
10841 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10845 /* In the pic_reg_used case, make sure that the got load isn't deleted
10846 when mcount needs it. Blockage to avoid call movement across mcount
10847 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10849 if (crtl->profile && !flag_fentry && pic_reg_used)
10850 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10852 if (crtl->drap_reg && !crtl->stack_realign_needed)
10854 /* vDRAP is setup but after reload it turns out stack realign
10855 isn't necessary, here we will emit prologue to setup DRAP
10856 without stack realign adjustment */
10857 t = choose_baseaddr (0);
10858 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10861 /* Prevent instructions from being scheduled into register save push
10862 sequence when access to the redzone area is done through frame pointer.
10863 The offset between the frame pointer and the stack pointer is calculated
10864 relative to the value of the stack pointer at the end of the function
10865 prologue, and moving instructions that access redzone area via frame
10866 pointer inside push sequence violates this assumption. */
10867 if (frame_pointer_needed && frame.red_zone_size)
10868 emit_insn (gen_memory_blockage ());
10870 /* Emit cld instruction if stringops are used in the function. */
10871 if (TARGET_CLD && ix86_current_function_needs_cld)
10872 emit_insn (gen_cld ());
10874 /* SEH requires that the prologue end within 256 bytes of the start of
10875 the function. Prevent instruction schedules that would extend that.
10876 Further, prevent alloca modifications to the stack pointer from being
10877 combined with prologue modifications. */
10879 emit_insn (gen_prologue_use (stack_pointer_rtx));
10882 /* Emit code to restore REG using a POP insn. */
10885 ix86_emit_restore_reg_using_pop (rtx reg)
10887 struct machine_function *m = cfun->machine;
10888 rtx insn = emit_insn (gen_pop (reg));
10890 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10891 m->fs.sp_offset -= UNITS_PER_WORD;
10893 if (m->fs.cfa_reg == crtl->drap_reg
10894 && REGNO (reg) == REGNO (crtl->drap_reg))
10896 /* Previously we'd represented the CFA as an expression
10897 like *(%ebp - 8). We've just popped that value from
10898 the stack, which means we need to reset the CFA to
10899 the drap register. This will remain until we restore
10900 the stack pointer. */
10901 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10902 RTX_FRAME_RELATED_P (insn) = 1;
10904 /* This means that the DRAP register is valid for addressing too. */
10905 m->fs.drap_valid = true;
10909 if (m->fs.cfa_reg == stack_pointer_rtx)
10911 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10912 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10913 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10914 RTX_FRAME_RELATED_P (insn) = 1;
10916 m->fs.cfa_offset -= UNITS_PER_WORD;
10919 /* When the frame pointer is the CFA, and we pop it, we are
10920 swapping back to the stack pointer as the CFA. This happens
10921 for stack frames that don't allocate other data, so we assume
10922 the stack pointer is now pointing at the return address, i.e.
10923 the function entry state, which makes the offset be 1 word. */
10924 if (reg == hard_frame_pointer_rtx)
10926 m->fs.fp_valid = false;
10927 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10929 m->fs.cfa_reg = stack_pointer_rtx;
10930 m->fs.cfa_offset -= UNITS_PER_WORD;
10932 add_reg_note (insn, REG_CFA_DEF_CFA,
10933 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10934 GEN_INT (m->fs.cfa_offset)));
10935 RTX_FRAME_RELATED_P (insn) = 1;
10940 /* Emit code to restore saved registers using POP insns. */
10943 ix86_emit_restore_regs_using_pop (void)
10945 unsigned int regno;
10947 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10948 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10949 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
10952 /* Emit code and notes for the LEAVE instruction. */
10955 ix86_emit_leave (void)
10957 struct machine_function *m = cfun->machine;
10958 rtx insn = emit_insn (ix86_gen_leave ());
10960 ix86_add_queued_cfa_restore_notes (insn);
10962 gcc_assert (m->fs.fp_valid);
10963 m->fs.sp_valid = true;
10964 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10965 m->fs.fp_valid = false;
10967 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10969 m->fs.cfa_reg = stack_pointer_rtx;
10970 m->fs.cfa_offset = m->fs.sp_offset;
10972 add_reg_note (insn, REG_CFA_DEF_CFA,
10973 plus_constant (Pmode, stack_pointer_rtx,
10975 RTX_FRAME_RELATED_P (insn) = 1;
10977 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10981 /* Emit code to restore saved registers using MOV insns.
10982 First register is restored from CFA - CFA_OFFSET. */
10984 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10985 bool maybe_eh_return)
10987 struct machine_function *m = cfun->machine;
10988 unsigned int regno;
10990 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10991 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10993 rtx reg = gen_rtx_REG (word_mode, regno);
10996 mem = choose_baseaddr (cfa_offset);
10997 mem = gen_frame_mem (word_mode, mem);
10998 insn = emit_move_insn (reg, mem);
11000 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11002 /* Previously we'd represented the CFA as an expression
11003 like *(%ebp - 8). We've just popped that value from
11004 the stack, which means we need to reset the CFA to
11005 the drap register. This will remain until we restore
11006 the stack pointer. */
11007 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11008 RTX_FRAME_RELATED_P (insn) = 1;
11010 /* This means that the DRAP register is valid for addressing. */
11011 m->fs.drap_valid = true;
11014 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11016 cfa_offset -= UNITS_PER_WORD;
11020 /* Emit code to restore saved registers using MOV insns.
11021 First register is restored from CFA - CFA_OFFSET. */
11023 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11024 bool maybe_eh_return)
11026 unsigned int regno;
11028 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11029 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11031 rtx reg = gen_rtx_REG (V4SFmode, regno);
11034 mem = choose_baseaddr (cfa_offset);
11035 mem = gen_rtx_MEM (V4SFmode, mem);
11036 set_mem_align (mem, 128);
11037 emit_move_insn (reg, mem);
11039 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11045 /* Emit vzeroupper if needed. */
11048 ix86_maybe_emit_epilogue_vzeroupper (void)
11050 if (TARGET_VZEROUPPER
11051 && !TREE_THIS_VOLATILE (cfun->decl)
11052 && !cfun->machine->caller_return_avx256_p)
11053 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
11056 /* Restore function stack, frame, and registers. */
11059 ix86_expand_epilogue (int style)
11061 struct machine_function *m = cfun->machine;
11062 struct machine_frame_state frame_state_save = m->fs;
11063 struct ix86_frame frame;
11064 bool restore_regs_via_mov;
11067 ix86_finalize_stack_realign_flags ();
11068 ix86_compute_frame_layout (&frame);
11070 m->fs.sp_valid = (!frame_pointer_needed
11071 || (crtl->sp_is_unchanging
11072 && !stack_realign_fp));
11073 gcc_assert (!m->fs.sp_valid
11074 || m->fs.sp_offset == frame.stack_pointer_offset);
11076 /* The FP must be valid if the frame pointer is present. */
11077 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11078 gcc_assert (!m->fs.fp_valid
11079 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11081 /* We must have *some* valid pointer to the stack frame. */
11082 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11084 /* The DRAP is never valid at this point. */
11085 gcc_assert (!m->fs.drap_valid);
11087 /* See the comment about red zone and frame
11088 pointer usage in ix86_expand_prologue. */
11089 if (frame_pointer_needed && frame.red_zone_size)
11090 emit_insn (gen_memory_blockage ());
11092 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11093 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11095 /* Determine the CFA offset of the end of the red-zone. */
11096 m->fs.red_zone_offset = 0;
11097 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11099 /* The red-zone begins below the return address. */
11100 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11102 /* When the register save area is in the aligned portion of
11103 the stack, determine the maximum runtime displacement that
11104 matches up with the aligned frame. */
11105 if (stack_realign_drap)
11106 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11110 /* Special care must be taken for the normal return case of a function
11111 using eh_return: the eax and edx registers are marked as saved, but
11112 not restored along this path. Adjust the save location to match. */
11113 if (crtl->calls_eh_return && style != 2)
11114 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11116 /* EH_RETURN requires the use of moves to function properly. */
11117 if (crtl->calls_eh_return)
11118 restore_regs_via_mov = true;
11119 /* SEH requires the use of pops to identify the epilogue. */
11120 else if (TARGET_SEH)
11121 restore_regs_via_mov = false;
11122 /* If we're only restoring one register and sp is not valid then
11123 using a move instruction to restore the register since it's
11124 less work than reloading sp and popping the register. */
11125 else if (!m->fs.sp_valid && frame.nregs <= 1)
11126 restore_regs_via_mov = true;
11127 else if (TARGET_EPILOGUE_USING_MOVE
11128 && cfun->machine->use_fast_prologue_epilogue
11129 && (frame.nregs > 1
11130 || m->fs.sp_offset != frame.reg_save_offset))
11131 restore_regs_via_mov = true;
11132 else if (frame_pointer_needed
11134 && m->fs.sp_offset != frame.reg_save_offset)
11135 restore_regs_via_mov = true;
11136 else if (frame_pointer_needed
11137 && TARGET_USE_LEAVE
11138 && cfun->machine->use_fast_prologue_epilogue
11139 && frame.nregs == 1)
11140 restore_regs_via_mov = true;
11142 restore_regs_via_mov = false;
11144 if (restore_regs_via_mov || frame.nsseregs)
11146 /* Ensure that the entire register save area is addressable via
11147 the stack pointer, if we will restore via sp. */
11149 && m->fs.sp_offset > 0x7fffffff
11150 && !(m->fs.fp_valid || m->fs.drap_valid)
11151 && (frame.nsseregs + frame.nregs) != 0)
11153 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11154 GEN_INT (m->fs.sp_offset
11155 - frame.sse_reg_save_offset),
11157 m->fs.cfa_reg == stack_pointer_rtx);
11161 /* If there are any SSE registers to restore, then we have to do it
11162 via moves, since there's obviously no pop for SSE regs. */
11163 if (frame.nsseregs)
11164 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11167 if (restore_regs_via_mov)
11172 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11174 /* eh_return epilogues need %ecx added to the stack pointer. */
11177 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11179 /* Stack align doesn't work with eh_return. */
11180 gcc_assert (!stack_realign_drap);
11181 /* Neither does regparm nested functions. */
11182 gcc_assert (!ix86_static_chain_on_stack);
11184 if (frame_pointer_needed)
11186 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11187 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11188 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11190 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11191 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11193 /* Note that we use SA as a temporary CFA, as the return
11194 address is at the proper place relative to it. We
11195 pretend this happens at the FP restore insn because
11196 prior to this insn the FP would be stored at the wrong
11197 offset relative to SA, and after this insn we have no
11198 other reasonable register to use for the CFA. We don't
11199 bother resetting the CFA to the SP for the duration of
11200 the return insn. */
11201 add_reg_note (insn, REG_CFA_DEF_CFA,
11202 plus_constant (Pmode, sa, UNITS_PER_WORD));
11203 ix86_add_queued_cfa_restore_notes (insn);
11204 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11205 RTX_FRAME_RELATED_P (insn) = 1;
11207 m->fs.cfa_reg = sa;
11208 m->fs.cfa_offset = UNITS_PER_WORD;
11209 m->fs.fp_valid = false;
11211 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11212 const0_rtx, style, false);
11216 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11217 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11218 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11219 ix86_add_queued_cfa_restore_notes (insn);
11221 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11222 if (m->fs.cfa_offset != UNITS_PER_WORD)
11224 m->fs.cfa_offset = UNITS_PER_WORD;
11225 add_reg_note (insn, REG_CFA_DEF_CFA,
11226 plus_constant (Pmode, stack_pointer_rtx,
11228 RTX_FRAME_RELATED_P (insn) = 1;
11231 m->fs.sp_offset = UNITS_PER_WORD;
11232 m->fs.sp_valid = true;
11237 /* SEH requires that the function end with (1) a stack adjustment
11238 if necessary, (2) a sequence of pops, and (3) a return or
11239 jump instruction. Prevent insns from the function body from
11240 being scheduled into this sequence. */
11243 /* Prevent a catch region from being adjacent to the standard
11244 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11245 several other flags that would be interesting to test are
11247 if (flag_non_call_exceptions)
11248 emit_insn (gen_nops (const1_rtx));
11250 emit_insn (gen_blockage ());
11253 /* First step is to deallocate the stack frame so that we can
11254 pop the registers. Also do it on SEH target for very large
11255 frame as the emitted instructions aren't allowed by the ABI in
11257 if (!m->fs.sp_valid
11259 && (m->fs.sp_offset - frame.reg_save_offset
11260 >= SEH_MAX_FRAME_SIZE)))
11262 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11263 GEN_INT (m->fs.fp_offset
11264 - frame.reg_save_offset),
11267 else if (m->fs.sp_offset != frame.reg_save_offset)
11269 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11270 GEN_INT (m->fs.sp_offset
11271 - frame.reg_save_offset),
11273 m->fs.cfa_reg == stack_pointer_rtx);
11276 ix86_emit_restore_regs_using_pop ();
11279 /* If we used a stack pointer and haven't already got rid of it,
11281 if (m->fs.fp_valid)
11283 /* If the stack pointer is valid and pointing at the frame
11284 pointer store address, then we only need a pop. */
11285 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11286 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11287 /* Leave results in shorter dependency chains on CPUs that are
11288 able to grok it fast. */
11289 else if (TARGET_USE_LEAVE
11290 || optimize_function_for_size_p (cfun)
11291 || !cfun->machine->use_fast_prologue_epilogue)
11292 ix86_emit_leave ();
11295 pro_epilogue_adjust_stack (stack_pointer_rtx,
11296 hard_frame_pointer_rtx,
11297 const0_rtx, style, !using_drap);
11298 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11304 int param_ptr_offset = UNITS_PER_WORD;
11307 gcc_assert (stack_realign_drap);
11309 if (ix86_static_chain_on_stack)
11310 param_ptr_offset += UNITS_PER_WORD;
11311 if (!call_used_regs[REGNO (crtl->drap_reg)])
11312 param_ptr_offset += UNITS_PER_WORD;
11314 insn = emit_insn (gen_rtx_SET
11315 (VOIDmode, stack_pointer_rtx,
11316 gen_rtx_PLUS (Pmode,
11318 GEN_INT (-param_ptr_offset))));
11319 m->fs.cfa_reg = stack_pointer_rtx;
11320 m->fs.cfa_offset = param_ptr_offset;
11321 m->fs.sp_offset = param_ptr_offset;
11322 m->fs.realigned = false;
11324 add_reg_note (insn, REG_CFA_DEF_CFA,
11325 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11326 GEN_INT (param_ptr_offset)));
11327 RTX_FRAME_RELATED_P (insn) = 1;
11329 if (!call_used_regs[REGNO (crtl->drap_reg)])
11330 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11333 /* At this point the stack pointer must be valid, and we must have
11334 restored all of the registers. We may not have deallocated the
11335 entire stack frame. We've delayed this until now because it may
11336 be possible to merge the local stack deallocation with the
11337 deallocation forced by ix86_static_chain_on_stack. */
11338 gcc_assert (m->fs.sp_valid);
11339 gcc_assert (!m->fs.fp_valid);
11340 gcc_assert (!m->fs.realigned);
11341 if (m->fs.sp_offset != UNITS_PER_WORD)
11343 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11344 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11348 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11350 /* Sibcall epilogues don't want a return instruction. */
11353 m->fs = frame_state_save;
11357 /* Emit vzeroupper if needed. */
11358 ix86_maybe_emit_epilogue_vzeroupper ();
11360 if (crtl->args.pops_args && crtl->args.size)
11362 rtx popc = GEN_INT (crtl->args.pops_args);
11364 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11365 address, do explicit add, and jump indirectly to the caller. */
11367 if (crtl->args.pops_args >= 65536)
11369 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11372 /* There is no "pascal" calling convention in any 64bit ABI. */
11373 gcc_assert (!TARGET_64BIT);
11375 insn = emit_insn (gen_pop (ecx));
11376 m->fs.cfa_offset -= UNITS_PER_WORD;
11377 m->fs.sp_offset -= UNITS_PER_WORD;
11379 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11380 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11381 add_reg_note (insn, REG_CFA_REGISTER,
11382 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11383 RTX_FRAME_RELATED_P (insn) = 1;
11385 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11387 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11390 emit_jump_insn (gen_simple_return_pop_internal (popc));
11393 emit_jump_insn (gen_simple_return_internal ());
11395 /* Restore the state back to the state from the prologue,
11396 so that it's correct for the next epilogue. */
11397 m->fs = frame_state_save;
11400 /* Reset from the function's potential modifications. */
11403 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11404 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11406 if (pic_offset_table_rtx)
11407 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11409 /* Mach-O doesn't support labels at the end of objects, so if
11410 it looks like we might want one, insert a NOP. */
11412 rtx insn = get_last_insn ();
11413 rtx deleted_debug_label = NULL_RTX;
11416 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11418 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11419 notes only, instead set their CODE_LABEL_NUMBER to -1,
11420 otherwise there would be code generation differences
11421 in between -g and -g0. */
11422 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11423 deleted_debug_label = insn;
11424 insn = PREV_INSN (insn);
11429 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11430 fputs ("\tnop\n", file);
11431 else if (deleted_debug_label)
11432 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11433 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11434 CODE_LABEL_NUMBER (insn) = -1;
11440 /* Return a scratch register to use in the split stack prologue. The
11441 split stack prologue is used for -fsplit-stack. It is the first
11442 instructions in the function, even before the regular prologue.
11443 The scratch register can be any caller-saved register which is not
11444 used for parameters or for the static chain. */
11446 static unsigned int
11447 split_stack_prologue_scratch_regno (void)
11456 is_fastcall = (lookup_attribute ("fastcall",
11457 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11459 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11463 if (DECL_STATIC_CHAIN (cfun->decl))
11465 sorry ("-fsplit-stack does not support fastcall with "
11466 "nested function");
11467 return INVALID_REGNUM;
11471 else if (regparm < 3)
11473 if (!DECL_STATIC_CHAIN (cfun->decl))
11479 sorry ("-fsplit-stack does not support 2 register "
11480 " parameters for a nested function");
11481 return INVALID_REGNUM;
11488 /* FIXME: We could make this work by pushing a register
11489 around the addition and comparison. */
11490 sorry ("-fsplit-stack does not support 3 register parameters");
11491 return INVALID_REGNUM;
11496 /* A SYMBOL_REF for the function which allocates new stackspace for
11499 static GTY(()) rtx split_stack_fn;
11501 /* A SYMBOL_REF for the more stack function when using the large
11504 static GTY(()) rtx split_stack_fn_large;
11506 /* Handle -fsplit-stack. These are the first instructions in the
11507 function, even before the regular prologue. */
11510 ix86_expand_split_stack_prologue (void)
11512 struct ix86_frame frame;
11513 HOST_WIDE_INT allocate;
11514 unsigned HOST_WIDE_INT args_size;
11515 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11516 rtx scratch_reg = NULL_RTX;
11517 rtx varargs_label = NULL_RTX;
11520 gcc_assert (flag_split_stack && reload_completed);
11522 ix86_finalize_stack_realign_flags ();
11523 ix86_compute_frame_layout (&frame);
11524 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11526 /* This is the label we will branch to if we have enough stack
11527 space. We expect the basic block reordering pass to reverse this
11528 branch if optimizing, so that we branch in the unlikely case. */
11529 label = gen_label_rtx ();
11531 /* We need to compare the stack pointer minus the frame size with
11532 the stack boundary in the TCB. The stack boundary always gives
11533 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11534 can compare directly. Otherwise we need to do an addition. */
11536 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11537 UNSPEC_STACK_CHECK);
11538 limit = gen_rtx_CONST (Pmode, limit);
11539 limit = gen_rtx_MEM (Pmode, limit);
11540 if (allocate < SPLIT_STACK_AVAILABLE)
11541 current = stack_pointer_rtx;
11544 unsigned int scratch_regno;
11547 /* We need a scratch register to hold the stack pointer minus
11548 the required frame size. Since this is the very start of the
11549 function, the scratch register can be any caller-saved
11550 register which is not used for parameters. */
11551 offset = GEN_INT (- allocate);
11552 scratch_regno = split_stack_prologue_scratch_regno ();
11553 if (scratch_regno == INVALID_REGNUM)
11555 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11556 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11558 /* We don't use ix86_gen_add3 in this case because it will
11559 want to split to lea, but when not optimizing the insn
11560 will not be split after this point. */
11561 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11562 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11567 emit_move_insn (scratch_reg, offset);
11568 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11569 stack_pointer_rtx));
11571 current = scratch_reg;
11574 ix86_expand_branch (GEU, current, limit, label);
11575 jump_insn = get_last_insn ();
11576 JUMP_LABEL (jump_insn) = label;
11578 /* Mark the jump as very likely to be taken. */
11579 add_reg_note (jump_insn, REG_BR_PROB,
11580 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11582 if (split_stack_fn == NULL_RTX)
11583 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11584 fn = split_stack_fn;
11586 /* Get more stack space. We pass in the desired stack space and the
11587 size of the arguments to copy to the new stack. In 32-bit mode
11588 we push the parameters; __morestack will return on a new stack
11589 anyhow. In 64-bit mode we pass the parameters in r10 and
11591 allocate_rtx = GEN_INT (allocate);
11592 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11593 call_fusage = NULL_RTX;
11598 reg10 = gen_rtx_REG (Pmode, R10_REG);
11599 reg11 = gen_rtx_REG (Pmode, R11_REG);
11601 /* If this function uses a static chain, it will be in %r10.
11602 Preserve it across the call to __morestack. */
11603 if (DECL_STATIC_CHAIN (cfun->decl))
11607 rax = gen_rtx_REG (word_mode, AX_REG);
11608 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11609 use_reg (&call_fusage, rax);
11612 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11614 HOST_WIDE_INT argval;
11616 gcc_assert (Pmode == DImode);
11617 /* When using the large model we need to load the address
11618 into a register, and we've run out of registers. So we
11619 switch to a different calling convention, and we call a
11620 different function: __morestack_large. We pass the
11621 argument size in the upper 32 bits of r10 and pass the
11622 frame size in the lower 32 bits. */
11623 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11624 gcc_assert ((args_size & 0xffffffff) == args_size);
11626 if (split_stack_fn_large == NULL_RTX)
11627 split_stack_fn_large =
11628 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11630 if (ix86_cmodel == CM_LARGE_PIC)
11634 label = gen_label_rtx ();
11635 emit_label (label);
11636 LABEL_PRESERVE_P (label) = 1;
11637 emit_insn (gen_set_rip_rex64 (reg10, label));
11638 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11639 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
11640 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11642 x = gen_rtx_CONST (Pmode, x);
11643 emit_move_insn (reg11, x);
11644 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11645 x = gen_const_mem (Pmode, x);
11646 emit_move_insn (reg11, x);
11649 emit_move_insn (reg11, split_stack_fn_large);
11653 argval = ((args_size << 16) << 16) + allocate;
11654 emit_move_insn (reg10, GEN_INT (argval));
11658 emit_move_insn (reg10, allocate_rtx);
11659 emit_move_insn (reg11, GEN_INT (args_size));
11660 use_reg (&call_fusage, reg11);
11663 use_reg (&call_fusage, reg10);
11667 emit_insn (gen_push (GEN_INT (args_size)));
11668 emit_insn (gen_push (allocate_rtx));
11670 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11671 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11673 add_function_usage_to (call_insn, call_fusage);
11675 /* In order to make call/return prediction work right, we now need
11676 to execute a return instruction. See
11677 libgcc/config/i386/morestack.S for the details on how this works.
11679 For flow purposes gcc must not see this as a return
11680 instruction--we need control flow to continue at the subsequent
11681 label. Therefore, we use an unspec. */
11682 gcc_assert (crtl->args.pops_args < 65536);
11683 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11685 /* If we are in 64-bit mode and this function uses a static chain,
11686 we saved %r10 in %rax before calling _morestack. */
11687 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11688 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11689 gen_rtx_REG (word_mode, AX_REG));
11691 /* If this function calls va_start, we need to store a pointer to
11692 the arguments on the old stack, because they may not have been
11693 all copied to the new stack. At this point the old stack can be
11694 found at the frame pointer value used by __morestack, because
11695 __morestack has set that up before calling back to us. Here we
11696 store that pointer in a scratch register, and in
11697 ix86_expand_prologue we store the scratch register in a stack
11699 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11701 unsigned int scratch_regno;
11705 scratch_regno = split_stack_prologue_scratch_regno ();
11706 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11707 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11711 return address within this function
11712 return address of caller of this function
11714 So we add three words to get to the stack arguments.
11718 return address within this function
11719 first argument to __morestack
11720 second argument to __morestack
11721 return address of caller of this function
11723 So we add five words to get to the stack arguments.
11725 words = TARGET_64BIT ? 3 : 5;
11726 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11727 gen_rtx_PLUS (Pmode, frame_reg,
11728 GEN_INT (words * UNITS_PER_WORD))));
11730 varargs_label = gen_label_rtx ();
11731 emit_jump_insn (gen_jump (varargs_label));
11732 JUMP_LABEL (get_last_insn ()) = varargs_label;
11737 emit_label (label);
11738 LABEL_NUSES (label) = 1;
11740 /* If this function calls va_start, we now have to set the scratch
11741 register for the case where we do not call __morestack. In this
11742 case we need to set it based on the stack pointer. */
11743 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11745 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11746 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11747 GEN_INT (UNITS_PER_WORD))));
11749 emit_label (varargs_label);
11750 LABEL_NUSES (varargs_label) = 1;
11754 /* We may have to tell the dataflow pass that the split stack prologue
11755 is initializing a scratch register. */
11758 ix86_live_on_entry (bitmap regs)
11760 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11762 gcc_assert (flag_split_stack);
11763 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11767 /* Determine if op is suitable SUBREG RTX for address. */
11770 ix86_address_subreg_operand (rtx op)
11772 enum machine_mode mode;
11777 mode = GET_MODE (op);
11779 if (GET_MODE_CLASS (mode) != MODE_INT)
11782 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11783 failures when the register is one word out of a two word structure. */
11784 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11787 /* simplify_subreg does not handle stack pointer. */
11788 if (REGNO (op) == STACK_POINTER_REGNUM)
11791 /* Allow only SUBREGs of non-eliminable hard registers. */
11792 return register_no_elim_operand (op, mode);
11795 /* Extract the parts of an RTL expression that is a valid memory address
11796 for an instruction. Return 0 if the structure of the address is
11797 grossly off. Return -1 if the address contains ASHIFT, so it is not
11798 strictly valid, but still used for computing length of lea instruction. */
11801 ix86_decompose_address (rtx addr, struct ix86_address *out)
11803 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11804 rtx base_reg, index_reg;
11805 HOST_WIDE_INT scale = 1;
11806 rtx scale_rtx = NULL_RTX;
11809 enum ix86_address_seg seg = SEG_DEFAULT;
11811 /* Allow zero-extended SImode addresses,
11812 they will be emitted with addr32 prefix. */
11813 if (TARGET_64BIT && GET_MODE (addr) == DImode)
11815 if (GET_CODE (addr) == ZERO_EXTEND
11816 && GET_MODE (XEXP (addr, 0)) == SImode)
11818 addr = XEXP (addr, 0);
11819 if (CONST_INT_P (addr))
11822 else if (GET_CODE (addr) == AND
11823 && const_32bit_mask (XEXP (addr, 1), DImode))
11825 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
11826 if (addr == NULL_RTX)
11829 if (CONST_INT_P (addr))
11834 /* Allow SImode subregs of DImode addresses,
11835 they will be emitted with addr32 prefix. */
11836 if (TARGET_64BIT && GET_MODE (addr) == SImode)
11838 if (GET_CODE (addr) == SUBREG
11839 && GET_MODE (SUBREG_REG (addr)) == DImode)
11841 addr = SUBREG_REG (addr);
11842 if (CONST_INT_P (addr))
11849 else if (GET_CODE (addr) == SUBREG)
11851 if (ix86_address_subreg_operand (SUBREG_REG (addr)))
11856 else if (GET_CODE (addr) == PLUS)
11858 rtx addends[4], op;
11866 addends[n++] = XEXP (op, 1);
11869 while (GET_CODE (op) == PLUS);
11874 for (i = n; i >= 0; --i)
11877 switch (GET_CODE (op))
11882 index = XEXP (op, 0);
11883 scale_rtx = XEXP (op, 1);
11889 index = XEXP (op, 0);
11890 tmp = XEXP (op, 1);
11891 if (!CONST_INT_P (tmp))
11893 scale = INTVAL (tmp);
11894 if ((unsigned HOST_WIDE_INT) scale > 3)
11896 scale = 1 << scale;
11901 if (GET_CODE (op) != UNSPEC)
11906 if (XINT (op, 1) == UNSPEC_TP
11907 && TARGET_TLS_DIRECT_SEG_REFS
11908 && seg == SEG_DEFAULT)
11909 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11915 if (!ix86_address_subreg_operand (SUBREG_REG (op)))
11942 else if (GET_CODE (addr) == MULT)
11944 index = XEXP (addr, 0); /* index*scale */
11945 scale_rtx = XEXP (addr, 1);
11947 else if (GET_CODE (addr) == ASHIFT)
11949 /* We're called for lea too, which implements ashift on occasion. */
11950 index = XEXP (addr, 0);
11951 tmp = XEXP (addr, 1);
11952 if (!CONST_INT_P (tmp))
11954 scale = INTVAL (tmp);
11955 if ((unsigned HOST_WIDE_INT) scale > 3)
11957 scale = 1 << scale;
11960 else if (CONST_INT_P (addr))
11962 if (!x86_64_immediate_operand (addr, VOIDmode))
11965 /* Constant addresses are sign extended to 64bit, we have to
11966 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11968 && val_signbit_known_set_p (SImode, INTVAL (addr)))
11974 disp = addr; /* displacement */
11980 else if (GET_CODE (index) == SUBREG
11981 && ix86_address_subreg_operand (SUBREG_REG (index)))
11987 /* Address override works only on the (%reg) part of %fs:(%reg). */
11988 if (seg != SEG_DEFAULT
11989 && ((base && GET_MODE (base) != word_mode)
11990 || (index && GET_MODE (index) != word_mode)))
11993 /* Extract the integral value of scale. */
11996 if (!CONST_INT_P (scale_rtx))
11998 scale = INTVAL (scale_rtx);
12001 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12002 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12004 /* Avoid useless 0 displacement. */
12005 if (disp == const0_rtx && (base || index))
12008 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12009 if (base_reg && index_reg && scale == 1
12010 && (index_reg == arg_pointer_rtx
12011 || index_reg == frame_pointer_rtx
12012 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12015 tmp = base, base = index, index = tmp;
12016 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12019 /* Special case: %ebp cannot be encoded as a base without a displacement.
12023 && (base_reg == hard_frame_pointer_rtx
12024 || base_reg == frame_pointer_rtx
12025 || base_reg == arg_pointer_rtx
12026 || (REG_P (base_reg)
12027 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12028 || REGNO (base_reg) == R13_REG))))
12031 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12032 Avoid this by transforming to [%esi+0].
12033 Reload calls address legitimization without cfun defined, so we need
12034 to test cfun for being non-NULL. */
12035 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12036 && base_reg && !index_reg && !disp
12037 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12040 /* Special case: encode reg+reg instead of reg*2. */
12041 if (!base && index && scale == 2)
12042 base = index, base_reg = index_reg, scale = 1;
12044 /* Special case: scaling cannot be encoded without base or displacement. */
12045 if (!base && !disp && index && scale != 1)
12049 out->index = index;
12051 out->scale = scale;
12057 /* Return cost of the memory address x.
12058 For i386, it is better to use a complex address than let gcc copy
12059 the address into a reg and make a new pseudo. But not if the address
12060 requires to two regs - that would mean more pseudos with longer
12063 ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
12064 addr_space_t as ATTRIBUTE_UNUSED,
12065 bool speed ATTRIBUTE_UNUSED)
12067 struct ix86_address parts;
12069 int ok = ix86_decompose_address (x, &parts);
12073 if (parts.base && GET_CODE (parts.base) == SUBREG)
12074 parts.base = SUBREG_REG (parts.base);
12075 if (parts.index && GET_CODE (parts.index) == SUBREG)
12076 parts.index = SUBREG_REG (parts.index);
12078 /* Attempt to minimize number of registers in the address. */
12080 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12082 && (!REG_P (parts.index)
12083 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12087 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12089 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12090 && parts.base != parts.index)
12093 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12094 since it's predecode logic can't detect the length of instructions
12095 and it degenerates to vector decoded. Increase cost of such
12096 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12097 to split such addresses or even refuse such addresses at all.
12099 Following addressing modes are affected:
12104 The first and last case may be avoidable by explicitly coding the zero in
12105 memory address, but I don't have AMD-K6 machine handy to check this
12109 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12110 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12111 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12117 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12118 this is used for to form addresses to local data when -fPIC is in
12122 darwin_local_data_pic (rtx disp)
12124 return (GET_CODE (disp) == UNSPEC
12125 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12128 /* Determine if a given RTX is a valid constant. We already know this
12129 satisfies CONSTANT_P. */
12132 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
12134 switch (GET_CODE (x))
12139 if (GET_CODE (x) == PLUS)
12141 if (!CONST_INT_P (XEXP (x, 1)))
12146 if (TARGET_MACHO && darwin_local_data_pic (x))
12149 /* Only some unspecs are valid as "constants". */
12150 if (GET_CODE (x) == UNSPEC)
12151 switch (XINT (x, 1))
12154 case UNSPEC_GOTOFF:
12155 case UNSPEC_PLTOFF:
12156 return TARGET_64BIT;
12158 case UNSPEC_NTPOFF:
12159 x = XVECEXP (x, 0, 0);
12160 return (GET_CODE (x) == SYMBOL_REF
12161 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12162 case UNSPEC_DTPOFF:
12163 x = XVECEXP (x, 0, 0);
12164 return (GET_CODE (x) == SYMBOL_REF
12165 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12170 /* We must have drilled down to a symbol. */
12171 if (GET_CODE (x) == LABEL_REF)
12173 if (GET_CODE (x) != SYMBOL_REF)
12178 /* TLS symbols are never valid. */
12179 if (SYMBOL_REF_TLS_MODEL (x))
12182 /* DLLIMPORT symbols are never valid. */
12183 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12184 && SYMBOL_REF_DLLIMPORT_P (x))
12188 /* mdynamic-no-pic */
12189 if (MACHO_DYNAMIC_NO_PIC_P)
12190 return machopic_symbol_defined_p (x);
12195 if (GET_MODE (x) == TImode
12196 && x != CONST0_RTX (TImode)
12202 if (!standard_sse_constant_p (x))
12209 /* Otherwise we handle everything else in the move patterns. */
12213 /* Determine if it's legal to put X into the constant pool. This
12214 is not possible for the address of thread-local symbols, which
12215 is checked above. */
12218 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12220 /* We can always put integral constants and vectors in memory. */
12221 switch (GET_CODE (x))
12231 return !ix86_legitimate_constant_p (mode, x);
12235 /* Nonzero if the constant value X is a legitimate general operand
12236 when generating PIC code. It is given that flag_pic is on and
12237 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12240 legitimate_pic_operand_p (rtx x)
12244 switch (GET_CODE (x))
12247 inner = XEXP (x, 0);
12248 if (GET_CODE (inner) == PLUS
12249 && CONST_INT_P (XEXP (inner, 1)))
12250 inner = XEXP (inner, 0);
12252 /* Only some unspecs are valid as "constants". */
12253 if (GET_CODE (inner) == UNSPEC)
12254 switch (XINT (inner, 1))
12257 case UNSPEC_GOTOFF:
12258 case UNSPEC_PLTOFF:
12259 return TARGET_64BIT;
12261 x = XVECEXP (inner, 0, 0);
12262 return (GET_CODE (x) == SYMBOL_REF
12263 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12264 case UNSPEC_MACHOPIC_OFFSET:
12265 return legitimate_pic_address_disp_p (x);
12273 return legitimate_pic_address_disp_p (x);
12280 /* Determine if a given CONST RTX is a valid memory displacement
12284 legitimate_pic_address_disp_p (rtx disp)
12288 /* In 64bit mode we can allow direct addresses of symbols and labels
12289 when they are not dynamic symbols. */
12292 rtx op0 = disp, op1;
12294 switch (GET_CODE (disp))
12300 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12302 op0 = XEXP (XEXP (disp, 0), 0);
12303 op1 = XEXP (XEXP (disp, 0), 1);
12304 if (!CONST_INT_P (op1)
12305 || INTVAL (op1) >= 16*1024*1024
12306 || INTVAL (op1) < -16*1024*1024)
12308 if (GET_CODE (op0) == LABEL_REF)
12310 if (GET_CODE (op0) == CONST
12311 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12312 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12314 if (GET_CODE (op0) == UNSPEC
12315 && XINT (op0, 1) == UNSPEC_PCREL)
12317 if (GET_CODE (op0) != SYMBOL_REF)
12322 /* TLS references should always be enclosed in UNSPEC. */
12323 if (SYMBOL_REF_TLS_MODEL (op0))
12325 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
12326 && ix86_cmodel != CM_LARGE_PIC)
12334 if (GET_CODE (disp) != CONST)
12336 disp = XEXP (disp, 0);
12340 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12341 of GOT tables. We should not need these anyway. */
12342 if (GET_CODE (disp) != UNSPEC
12343 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12344 && XINT (disp, 1) != UNSPEC_GOTOFF
12345 && XINT (disp, 1) != UNSPEC_PCREL
12346 && XINT (disp, 1) != UNSPEC_PLTOFF))
12349 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12350 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12356 if (GET_CODE (disp) == PLUS)
12358 if (!CONST_INT_P (XEXP (disp, 1)))
12360 disp = XEXP (disp, 0);
12364 if (TARGET_MACHO && darwin_local_data_pic (disp))
12367 if (GET_CODE (disp) != UNSPEC)
12370 switch (XINT (disp, 1))
12375 /* We need to check for both symbols and labels because VxWorks loads
12376 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12378 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12379 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12380 case UNSPEC_GOTOFF:
12381 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12382 While ABI specify also 32bit relocation but we don't produce it in
12383 small PIC model at all. */
12384 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12385 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12387 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12389 case UNSPEC_GOTTPOFF:
12390 case UNSPEC_GOTNTPOFF:
12391 case UNSPEC_INDNTPOFF:
12394 disp = XVECEXP (disp, 0, 0);
12395 return (GET_CODE (disp) == SYMBOL_REF
12396 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12397 case UNSPEC_NTPOFF:
12398 disp = XVECEXP (disp, 0, 0);
12399 return (GET_CODE (disp) == SYMBOL_REF
12400 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12401 case UNSPEC_DTPOFF:
12402 disp = XVECEXP (disp, 0, 0);
12403 return (GET_CODE (disp) == SYMBOL_REF
12404 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12410 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12411 replace the input X, or the original X if no replacement is called for.
12412 The output parameter *WIN is 1 if the calling macro should goto WIN,
12413 0 if it should not. */
12416 ix86_legitimize_reload_address (rtx x,
12417 enum machine_mode mode ATTRIBUTE_UNUSED,
12418 int opnum, int type,
12419 int ind_levels ATTRIBUTE_UNUSED)
12421 /* Reload can generate:
12423 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12427 This RTX is rejected from ix86_legitimate_address_p due to
12428 non-strictness of base register 97. Following this rejection,
12429 reload pushes all three components into separate registers,
12430 creating invalid memory address RTX.
12432 Following code reloads only the invalid part of the
12433 memory address RTX. */
12435 if (GET_CODE (x) == PLUS
12436 && REG_P (XEXP (x, 1))
12437 && GET_CODE (XEXP (x, 0)) == PLUS
12438 && REG_P (XEXP (XEXP (x, 0), 1)))
12441 bool something_reloaded = false;
12443 base = XEXP (XEXP (x, 0), 1);
12444 if (!REG_OK_FOR_BASE_STRICT_P (base))
12446 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12447 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12448 opnum, (enum reload_type) type);
12449 something_reloaded = true;
12452 index = XEXP (x, 1);
12453 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12455 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12456 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12457 opnum, (enum reload_type) type);
12458 something_reloaded = true;
12461 gcc_assert (something_reloaded);
12468 /* Recognizes RTL expressions that are valid memory addresses for an
12469 instruction. The MODE argument is the machine mode for the MEM
12470 expression that wants to use this address.
12472 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12473 convert common non-canonical forms to canonical form so that they will
12477 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12478 rtx addr, bool strict)
12480 struct ix86_address parts;
12481 rtx base, index, disp;
12482 HOST_WIDE_INT scale;
12484 if (ix86_decompose_address (addr, &parts) <= 0)
12485 /* Decomposition failed. */
12489 index = parts.index;
12491 scale = parts.scale;
12493 /* Validate base register. */
12500 else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
12501 reg = SUBREG_REG (base);
12503 /* Base is not a register. */
12506 if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
12509 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12510 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12511 /* Base is not valid. */
12515 /* Validate index register. */
12522 else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
12523 reg = SUBREG_REG (index);
12525 /* Index is not a register. */
12528 if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
12531 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12532 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12533 /* Index is not valid. */
12537 /* Index and base should have the same mode. */
12539 && GET_MODE (base) != GET_MODE (index))
12542 /* Validate scale factor. */
12546 /* Scale without index. */
12549 if (scale != 2 && scale != 4 && scale != 8)
12550 /* Scale is not a valid multiplier. */
12554 /* Validate displacement. */
12557 if (GET_CODE (disp) == CONST
12558 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12559 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12560 switch (XINT (XEXP (disp, 0), 1))
12562 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12563 used. While ABI specify also 32bit relocations, we don't produce
12564 them at all and use IP relative instead. */
12566 case UNSPEC_GOTOFF:
12567 gcc_assert (flag_pic);
12569 goto is_legitimate_pic;
12571 /* 64bit address unspec. */
12574 case UNSPEC_GOTPCREL:
12576 gcc_assert (flag_pic);
12577 goto is_legitimate_pic;
12579 case UNSPEC_GOTTPOFF:
12580 case UNSPEC_GOTNTPOFF:
12581 case UNSPEC_INDNTPOFF:
12582 case UNSPEC_NTPOFF:
12583 case UNSPEC_DTPOFF:
12586 case UNSPEC_STACK_CHECK:
12587 gcc_assert (flag_split_stack);
12591 /* Invalid address unspec. */
12595 else if (SYMBOLIC_CONST (disp)
12599 && MACHOPIC_INDIRECT
12600 && !machopic_operand_p (disp)
12606 if (TARGET_64BIT && (index || base))
12608 /* foo@dtpoff(%rX) is ok. */
12609 if (GET_CODE (disp) != CONST
12610 || GET_CODE (XEXP (disp, 0)) != PLUS
12611 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12612 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12613 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12614 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12615 /* Non-constant pic memory reference. */
12618 else if ((!TARGET_MACHO || flag_pic)
12619 && ! legitimate_pic_address_disp_p (disp))
12620 /* Displacement is an invalid pic construct. */
12623 else if (MACHO_DYNAMIC_NO_PIC_P
12624 && !ix86_legitimate_constant_p (Pmode, disp))
12625 /* displacment must be referenced via non_lazy_pointer */
12629 /* This code used to verify that a symbolic pic displacement
12630 includes the pic_offset_table_rtx register.
12632 While this is good idea, unfortunately these constructs may
12633 be created by "adds using lea" optimization for incorrect
12642 This code is nonsensical, but results in addressing
12643 GOT table with pic_offset_table_rtx base. We can't
12644 just refuse it easily, since it gets matched by
12645 "addsi3" pattern, that later gets split to lea in the
12646 case output register differs from input. While this
12647 can be handled by separate addsi pattern for this case
12648 that never results in lea, this seems to be easier and
12649 correct fix for crash to disable this test. */
12651 else if (GET_CODE (disp) != LABEL_REF
12652 && !CONST_INT_P (disp)
12653 && (GET_CODE (disp) != CONST
12654 || !ix86_legitimate_constant_p (Pmode, disp))
12655 && (GET_CODE (disp) != SYMBOL_REF
12656 || !ix86_legitimate_constant_p (Pmode, disp)))
12657 /* Displacement is not constant. */
12659 else if (TARGET_64BIT
12660 && !x86_64_immediate_operand (disp, VOIDmode))
12661 /* Displacement is out of range. */
12665 /* Everything looks valid. */
12669 /* Determine if a given RTX is a valid constant address. */
12672 constant_address_p (rtx x)
12674 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12677 /* Return a unique alias set for the GOT. */
12679 static alias_set_type
12680 ix86_GOT_alias_set (void)
12682 static alias_set_type set = -1;
12684 set = new_alias_set ();
12688 /* Return a legitimate reference for ORIG (an address) using the
12689 register REG. If REG is 0, a new pseudo is generated.
12691 There are two types of references that must be handled:
12693 1. Global data references must load the address from the GOT, via
12694 the PIC reg. An insn is emitted to do this load, and the reg is
12697 2. Static data references, constant pool addresses, and code labels
12698 compute the address as an offset from the GOT, whose base is in
12699 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12700 differentiate them from global data objects. The returned
12701 address is the PIC reg + an unspec constant.
12703 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12704 reg also appears in the address. */
12707 legitimize_pic_address (rtx orig, rtx reg)
12710 rtx new_rtx = orig;
12714 if (TARGET_MACHO && !TARGET_64BIT)
12717 reg = gen_reg_rtx (Pmode);
12718 /* Use the generic Mach-O PIC machinery. */
12719 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12723 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12725 else if (TARGET_64BIT
12726 && ix86_cmodel != CM_SMALL_PIC
12727 && gotoff_operand (addr, Pmode))
12730 /* This symbol may be referenced via a displacement from the PIC
12731 base address (@GOTOFF). */
12733 if (reload_in_progress)
12734 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12735 if (GET_CODE (addr) == CONST)
12736 addr = XEXP (addr, 0);
12737 if (GET_CODE (addr) == PLUS)
12739 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12741 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12744 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12745 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12747 tmpreg = gen_reg_rtx (Pmode);
12750 emit_move_insn (tmpreg, new_rtx);
12754 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12755 tmpreg, 1, OPTAB_DIRECT);
12758 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12760 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12762 /* This symbol may be referenced via a displacement from the PIC
12763 base address (@GOTOFF). */
12765 if (reload_in_progress)
12766 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12767 if (GET_CODE (addr) == CONST)
12768 addr = XEXP (addr, 0);
12769 if (GET_CODE (addr) == PLUS)
12771 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12773 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12776 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12777 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12778 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12782 emit_move_insn (reg, new_rtx);
12786 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12787 /* We can't use @GOTOFF for text labels on VxWorks;
12788 see gotoff_operand. */
12789 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12791 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12793 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12794 return legitimize_dllimport_symbol (addr, true);
12795 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12796 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12797 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12799 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12800 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12804 /* For x64 PE-COFF there is no GOT table. So we use address
12806 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12808 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12809 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12812 reg = gen_reg_rtx (Pmode);
12813 emit_move_insn (reg, new_rtx);
12816 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12818 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12819 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12820 new_rtx = gen_const_mem (Pmode, new_rtx);
12821 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12824 reg = gen_reg_rtx (Pmode);
12825 /* Use directly gen_movsi, otherwise the address is loaded
12826 into register for CSE. We don't want to CSE this addresses,
12827 instead we CSE addresses from the GOT table, so skip this. */
12828 emit_insn (gen_movsi (reg, new_rtx));
12833 /* This symbol must be referenced via a load from the
12834 Global Offset Table (@GOT). */
12836 if (reload_in_progress)
12837 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12838 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12839 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12841 new_rtx = force_reg (Pmode, new_rtx);
12842 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12843 new_rtx = gen_const_mem (Pmode, new_rtx);
12844 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12847 reg = gen_reg_rtx (Pmode);
12848 emit_move_insn (reg, new_rtx);
12854 if (CONST_INT_P (addr)
12855 && !x86_64_immediate_operand (addr, VOIDmode))
12859 emit_move_insn (reg, addr);
12863 new_rtx = force_reg (Pmode, addr);
12865 else if (GET_CODE (addr) == CONST)
12867 addr = XEXP (addr, 0);
12869 /* We must match stuff we generate before. Assume the only
12870 unspecs that can get here are ours. Not that we could do
12871 anything with them anyway.... */
12872 if (GET_CODE (addr) == UNSPEC
12873 || (GET_CODE (addr) == PLUS
12874 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12876 gcc_assert (GET_CODE (addr) == PLUS);
12878 if (GET_CODE (addr) == PLUS)
12880 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12882 /* Check first to see if this is a constant offset from a @GOTOFF
12883 symbol reference. */
12884 if (gotoff_operand (op0, Pmode)
12885 && CONST_INT_P (op1))
12889 if (reload_in_progress)
12890 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12891 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12893 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12894 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12895 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12899 emit_move_insn (reg, new_rtx);
12905 if (INTVAL (op1) < -16*1024*1024
12906 || INTVAL (op1) >= 16*1024*1024)
12908 if (!x86_64_immediate_operand (op1, Pmode))
12909 op1 = force_reg (Pmode, op1);
12910 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12916 base = legitimize_pic_address (XEXP (addr, 0), reg);
12917 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12918 base == reg ? NULL_RTX : reg);
12920 if (CONST_INT_P (new_rtx))
12921 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
12924 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12926 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12927 new_rtx = XEXP (new_rtx, 1);
12929 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12937 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12940 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
12942 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12944 if (GET_MODE (tp) != tp_mode)
12946 gcc_assert (GET_MODE (tp) == SImode);
12947 gcc_assert (tp_mode == DImode);
12949 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
12953 tp = copy_to_mode_reg (tp_mode, tp);
12958 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12960 static GTY(()) rtx ix86_tls_symbol;
12963 ix86_tls_get_addr (void)
12965 if (!ix86_tls_symbol)
12968 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12969 ? "___tls_get_addr" : "__tls_get_addr");
12971 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12974 return ix86_tls_symbol;
12977 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12979 static GTY(()) rtx ix86_tls_module_base_symbol;
12982 ix86_tls_module_base (void)
12984 if (!ix86_tls_module_base_symbol)
12986 ix86_tls_module_base_symbol
12987 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12989 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12990 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12993 return ix86_tls_module_base_symbol;
12996 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12997 false if we expect this to be used for a memory address and true if
12998 we expect to load the address into a register. */
13001 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13003 rtx dest, base, off;
13004 rtx pic = NULL_RTX, tp = NULL_RTX;
13005 enum machine_mode tp_mode = Pmode;
13010 case TLS_MODEL_GLOBAL_DYNAMIC:
13011 dest = gen_reg_rtx (Pmode);
13016 pic = pic_offset_table_rtx;
13019 pic = gen_reg_rtx (Pmode);
13020 emit_insn (gen_set_got (pic));
13024 if (TARGET_GNU2_TLS)
13027 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13029 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13031 tp = get_thread_pointer (Pmode, true);
13032 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13034 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13038 rtx caddr = ix86_tls_get_addr ();
13042 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
13045 emit_call_insn (ix86_gen_tls_global_dynamic_64 (rax, x,
13047 insns = get_insns ();
13050 RTL_CONST_CALL_P (insns) = 1;
13051 emit_libcall_block (insns, dest, rax, x);
13054 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13058 case TLS_MODEL_LOCAL_DYNAMIC:
13059 base = gen_reg_rtx (Pmode);
13064 pic = pic_offset_table_rtx;
13067 pic = gen_reg_rtx (Pmode);
13068 emit_insn (gen_set_got (pic));
13072 if (TARGET_GNU2_TLS)
13074 rtx tmp = ix86_tls_module_base ();
13077 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13079 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13081 tp = get_thread_pointer (Pmode, true);
13082 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13083 gen_rtx_MINUS (Pmode, tmp, tp));
13087 rtx caddr = ix86_tls_get_addr ();
13091 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
13094 emit_call_insn (ix86_gen_tls_local_dynamic_base_64 (rax,
13096 insns = get_insns ();
13099 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13100 share the LD_BASE result with other LD model accesses. */
13101 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13102 UNSPEC_TLS_LD_BASE);
13104 RTL_CONST_CALL_P (insns) = 1;
13105 emit_libcall_block (insns, base, rax, eqv);
13108 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13111 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13112 off = gen_rtx_CONST (Pmode, off);
13114 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13116 if (TARGET_GNU2_TLS)
13118 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13120 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13124 case TLS_MODEL_INITIAL_EXEC:
13127 if (TARGET_SUN_TLS && !TARGET_X32)
13129 /* The Sun linker took the AMD64 TLS spec literally
13130 and can only handle %rax as destination of the
13131 initial executable code sequence. */
13133 dest = gen_reg_rtx (DImode);
13134 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13138 /* Generate DImode references to avoid %fs:(%reg32)
13139 problems and linker IE->LE relaxation bug. */
13142 type = UNSPEC_GOTNTPOFF;
13146 if (reload_in_progress)
13147 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13148 pic = pic_offset_table_rtx;
13149 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13151 else if (!TARGET_ANY_GNU_TLS)
13153 pic = gen_reg_rtx (Pmode);
13154 emit_insn (gen_set_got (pic));
13155 type = UNSPEC_GOTTPOFF;
13160 type = UNSPEC_INDNTPOFF;
13163 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13164 off = gen_rtx_CONST (tp_mode, off);
13166 off = gen_rtx_PLUS (tp_mode, pic, off);
13167 off = gen_const_mem (tp_mode, off);
13168 set_mem_alias_set (off, ix86_GOT_alias_set ());
13170 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13172 base = get_thread_pointer (tp_mode,
13173 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13174 off = force_reg (tp_mode, off);
13175 return gen_rtx_PLUS (tp_mode, base, off);
13179 base = get_thread_pointer (Pmode, true);
13180 dest = gen_reg_rtx (Pmode);
13181 emit_insn (ix86_gen_sub3 (dest, base, off));
13185 case TLS_MODEL_LOCAL_EXEC:
13186 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13187 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13188 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13189 off = gen_rtx_CONST (Pmode, off);
13191 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13193 base = get_thread_pointer (Pmode,
13194 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13195 return gen_rtx_PLUS (Pmode, base, off);
13199 base = get_thread_pointer (Pmode, true);
13200 dest = gen_reg_rtx (Pmode);
13201 emit_insn (ix86_gen_sub3 (dest, base, off));
13206 gcc_unreachable ();
13212 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13215 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13216 htab_t dllimport_map;
13219 get_dllimport_decl (tree decl)
13221 struct tree_map *h, in;
13224 const char *prefix;
13225 size_t namelen, prefixlen;
13230 if (!dllimport_map)
13231 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13233 in.hash = htab_hash_pointer (decl);
13234 in.base.from = decl;
13235 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13236 h = (struct tree_map *) *loc;
13240 *loc = h = ggc_alloc_tree_map ();
13242 h->base.from = decl;
13243 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13244 VAR_DECL, NULL, ptr_type_node);
13245 DECL_ARTIFICIAL (to) = 1;
13246 DECL_IGNORED_P (to) = 1;
13247 DECL_EXTERNAL (to) = 1;
13248 TREE_READONLY (to) = 1;
13250 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13251 name = targetm.strip_name_encoding (name);
13252 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13253 ? "*__imp_" : "*__imp__";
13254 namelen = strlen (name);
13255 prefixlen = strlen (prefix);
13256 imp_name = (char *) alloca (namelen + prefixlen + 1);
13257 memcpy (imp_name, prefix, prefixlen);
13258 memcpy (imp_name + prefixlen, name, namelen + 1);
13260 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13261 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13262 SET_SYMBOL_REF_DECL (rtl, to);
13263 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
13265 rtl = gen_const_mem (Pmode, rtl);
13266 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13268 SET_DECL_RTL (to, rtl);
13269 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13274 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13275 true if we require the result be a register. */
13278 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13283 gcc_assert (SYMBOL_REF_DECL (symbol));
13284 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
13286 x = DECL_RTL (imp_decl);
13288 x = force_reg (Pmode, x);
13292 /* Try machine-dependent ways of modifying an illegitimate address
13293 to be legitimate. If we find one, return the new, valid address.
13294 This macro is used in only one place: `memory_address' in explow.c.
13296 OLDX is the address as it was before break_out_memory_refs was called.
13297 In some cases it is useful to look at this to decide what needs to be done.
13299 It is always safe for this macro to do nothing. It exists to recognize
13300 opportunities to optimize the output.
13302 For the 80386, we handle X+REG by loading X into a register R and
13303 using R+REG. R will go in a general reg and indexing will be used.
13304 However, if REG is a broken-out memory address or multiplication,
13305 nothing needs to be done because REG can certainly go in a general reg.
13307 When -fpic is used, special handling is needed for symbolic references.
13308 See comments by legitimize_pic_address in i386.c for details. */
13311 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
13312 enum machine_mode mode)
13317 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13319 return legitimize_tls_address (x, (enum tls_model) log, false);
13320 if (GET_CODE (x) == CONST
13321 && GET_CODE (XEXP (x, 0)) == PLUS
13322 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13323 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13325 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13326 (enum tls_model) log, false);
13327 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13330 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13332 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
13333 return legitimize_dllimport_symbol (x, true);
13334 if (GET_CODE (x) == CONST
13335 && GET_CODE (XEXP (x, 0)) == PLUS
13336 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13337 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
13339 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
13340 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13344 if (flag_pic && SYMBOLIC_CONST (x))
13345 return legitimize_pic_address (x, 0);
13348 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13349 return machopic_indirect_data_reference (x, 0);
13352 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13353 if (GET_CODE (x) == ASHIFT
13354 && CONST_INT_P (XEXP (x, 1))
13355 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13358 log = INTVAL (XEXP (x, 1));
13359 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13360 GEN_INT (1 << log));
13363 if (GET_CODE (x) == PLUS)
13365 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13367 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13368 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13369 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13372 log = INTVAL (XEXP (XEXP (x, 0), 1));
13373 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13374 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13375 GEN_INT (1 << log));
13378 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13379 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13380 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13383 log = INTVAL (XEXP (XEXP (x, 1), 1));
13384 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13385 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13386 GEN_INT (1 << log));
13389 /* Put multiply first if it isn't already. */
13390 if (GET_CODE (XEXP (x, 1)) == MULT)
13392 rtx tmp = XEXP (x, 0);
13393 XEXP (x, 0) = XEXP (x, 1);
13398 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13399 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13400 created by virtual register instantiation, register elimination, and
13401 similar optimizations. */
13402 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13405 x = gen_rtx_PLUS (Pmode,
13406 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13407 XEXP (XEXP (x, 1), 0)),
13408 XEXP (XEXP (x, 1), 1));
13412 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13413 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13414 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13415 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13416 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13417 && CONSTANT_P (XEXP (x, 1)))
13420 rtx other = NULL_RTX;
13422 if (CONST_INT_P (XEXP (x, 1)))
13424 constant = XEXP (x, 1);
13425 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13427 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13429 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13430 other = XEXP (x, 1);
13438 x = gen_rtx_PLUS (Pmode,
13439 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13440 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13441 plus_constant (Pmode, other,
13442 INTVAL (constant)));
13446 if (changed && ix86_legitimate_address_p (mode, x, false))
13449 if (GET_CODE (XEXP (x, 0)) == MULT)
13452 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13455 if (GET_CODE (XEXP (x, 1)) == MULT)
13458 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13462 && REG_P (XEXP (x, 1))
13463 && REG_P (XEXP (x, 0)))
13466 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13469 x = legitimize_pic_address (x, 0);
13472 if (changed && ix86_legitimate_address_p (mode, x, false))
13475 if (REG_P (XEXP (x, 0)))
13477 rtx temp = gen_reg_rtx (Pmode);
13478 rtx val = force_operand (XEXP (x, 1), temp);
13481 if (GET_MODE (val) != Pmode)
13482 val = convert_to_mode (Pmode, val, 1);
13483 emit_move_insn (temp, val);
13486 XEXP (x, 1) = temp;
13490 else if (REG_P (XEXP (x, 1)))
13492 rtx temp = gen_reg_rtx (Pmode);
13493 rtx val = force_operand (XEXP (x, 0), temp);
13496 if (GET_MODE (val) != Pmode)
13497 val = convert_to_mode (Pmode, val, 1);
13498 emit_move_insn (temp, val);
13501 XEXP (x, 0) = temp;
13509 /* Print an integer constant expression in assembler syntax. Addition
13510 and subtraction are the only arithmetic that may appear in these
13511 expressions. FILE is the stdio stream to write to, X is the rtx, and
13512 CODE is the operand print code from the output string. */
13515 output_pic_addr_const (FILE *file, rtx x, int code)
13519 switch (GET_CODE (x))
13522 gcc_assert (flag_pic);
13527 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13528 output_addr_const (file, x);
13531 const char *name = XSTR (x, 0);
13533 /* Mark the decl as referenced so that cgraph will
13534 output the function. */
13535 if (SYMBOL_REF_DECL (x))
13536 mark_decl_referenced (SYMBOL_REF_DECL (x));
13539 if (MACHOPIC_INDIRECT
13540 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13541 name = machopic_indirection_name (x, /*stub_p=*/true);
13543 assemble_name (file, name);
13545 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
13546 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
13547 fputs ("@PLT", file);
13554 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13555 assemble_name (asm_out_file, buf);
13559 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13563 /* This used to output parentheses around the expression,
13564 but that does not work on the 386 (either ATT or BSD assembler). */
13565 output_pic_addr_const (file, XEXP (x, 0), code);
13569 if (GET_MODE (x) == VOIDmode)
13571 /* We can use %d if the number is <32 bits and positive. */
13572 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
13573 fprintf (file, "0x%lx%08lx",
13574 (unsigned long) CONST_DOUBLE_HIGH (x),
13575 (unsigned long) CONST_DOUBLE_LOW (x));
13577 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
13580 /* We can't handle floating point constants;
13581 TARGET_PRINT_OPERAND must handle them. */
13582 output_operand_lossage ("floating constant misused");
13586 /* Some assemblers need integer constants to appear first. */
13587 if (CONST_INT_P (XEXP (x, 0)))
13589 output_pic_addr_const (file, XEXP (x, 0), code);
13591 output_pic_addr_const (file, XEXP (x, 1), code);
13595 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13596 output_pic_addr_const (file, XEXP (x, 1), code);
13598 output_pic_addr_const (file, XEXP (x, 0), code);
13604 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13605 output_pic_addr_const (file, XEXP (x, 0), code);
13607 output_pic_addr_const (file, XEXP (x, 1), code);
13609 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13613 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
13615 bool f = i386_asm_output_addr_const_extra (file, x);
13620 gcc_assert (XVECLEN (x, 0) == 1);
13621 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13622 switch (XINT (x, 1))
13625 fputs ("@GOT", file);
13627 case UNSPEC_GOTOFF:
13628 fputs ("@GOTOFF", file);
13630 case UNSPEC_PLTOFF:
13631 fputs ("@PLTOFF", file);
13634 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13635 "(%rip)" : "[rip]", file);
13637 case UNSPEC_GOTPCREL:
13638 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13639 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13641 case UNSPEC_GOTTPOFF:
13642 /* FIXME: This might be @TPOFF in Sun ld too. */
13643 fputs ("@gottpoff", file);
13646 fputs ("@tpoff", file);
13648 case UNSPEC_NTPOFF:
13650 fputs ("@tpoff", file);
13652 fputs ("@ntpoff", file);
13654 case UNSPEC_DTPOFF:
13655 fputs ("@dtpoff", file);
13657 case UNSPEC_GOTNTPOFF:
13659 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13660 "@gottpoff(%rip)": "@gottpoff[rip]", file);
13662 fputs ("@gotntpoff", file);
13664 case UNSPEC_INDNTPOFF:
13665 fputs ("@indntpoff", file);
13668 case UNSPEC_MACHOPIC_OFFSET:
13670 machopic_output_function_base_name (file);
13674 output_operand_lossage ("invalid UNSPEC as operand");
13680 output_operand_lossage ("invalid expression as operand");
13684 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13685 We need to emit DTP-relative relocations. */
13687 static void ATTRIBUTE_UNUSED
13688 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13690 fputs (ASM_LONG, file);
13691 output_addr_const (file, x);
13692 fputs ("@dtpoff", file);
13698 fputs (", 0", file);
13701 gcc_unreachable ();
13705 /* Return true if X is a representation of the PIC register. This copes
13706 with calls from ix86_find_base_term, where the register might have
13707 been replaced by a cselib value. */
13710 ix86_pic_register_p (rtx x)
13712 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13713 return (pic_offset_table_rtx
13714 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13716 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13719 /* Helper function for ix86_delegitimize_address.
13720 Attempt to delegitimize TLS local-exec accesses. */
13723 ix86_delegitimize_tls_address (rtx orig_x)
13725 rtx x = orig_x, unspec;
13726 struct ix86_address addr;
13728 if (!TARGET_TLS_DIRECT_SEG_REFS)
13732 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13734 if (ix86_decompose_address (x, &addr) == 0
13735 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13736 || addr.disp == NULL_RTX
13737 || GET_CODE (addr.disp) != CONST)
13739 unspec = XEXP (addr.disp, 0);
13740 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13741 unspec = XEXP (unspec, 0);
13742 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13744 x = XVECEXP (unspec, 0, 0);
13745 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13746 if (unspec != XEXP (addr.disp, 0))
13747 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13750 rtx idx = addr.index;
13751 if (addr.scale != 1)
13752 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13753 x = gen_rtx_PLUS (Pmode, idx, x);
13756 x = gen_rtx_PLUS (Pmode, addr.base, x);
13757 if (MEM_P (orig_x))
13758 x = replace_equiv_address_nv (orig_x, x);
13762 /* In the name of slightly smaller debug output, and to cater to
13763 general assembler lossage, recognize PIC+GOTOFF and turn it back
13764 into a direct symbol reference.
13766 On Darwin, this is necessary to avoid a crash, because Darwin
13767 has a different PIC label for each routine but the DWARF debugging
13768 information is not associated with any particular routine, so it's
13769 necessary to remove references to the PIC label from RTL stored by
13770 the DWARF output code. */
13773 ix86_delegitimize_address (rtx x)
13775 rtx orig_x = delegitimize_mem_from_attrs (x);
13776 /* addend is NULL or some rtx if x is something+GOTOFF where
13777 something doesn't include the PIC register. */
13778 rtx addend = NULL_RTX;
13779 /* reg_addend is NULL or a multiple of some register. */
13780 rtx reg_addend = NULL_RTX;
13781 /* const_addend is NULL or a const_int. */
13782 rtx const_addend = NULL_RTX;
13783 /* This is the result, or NULL. */
13784 rtx result = NULL_RTX;
13793 if (GET_CODE (x) == CONST
13794 && GET_CODE (XEXP (x, 0)) == PLUS
13795 && GET_MODE (XEXP (x, 0)) == Pmode
13796 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13797 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
13798 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
13800 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
13801 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
13802 if (MEM_P (orig_x))
13803 x = replace_equiv_address_nv (orig_x, x);
13806 if (GET_CODE (x) != CONST
13807 || GET_CODE (XEXP (x, 0)) != UNSPEC
13808 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13809 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13810 || (!MEM_P (orig_x) && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL))
13811 return ix86_delegitimize_tls_address (orig_x);
13812 x = XVECEXP (XEXP (x, 0), 0, 0);
13813 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
13815 x = simplify_gen_subreg (GET_MODE (orig_x), x,
13823 if (GET_CODE (x) != PLUS
13824 || GET_CODE (XEXP (x, 1)) != CONST)
13825 return ix86_delegitimize_tls_address (orig_x);
13827 if (ix86_pic_register_p (XEXP (x, 0)))
13828 /* %ebx + GOT/GOTOFF */
13830 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13832 /* %ebx + %reg * scale + GOT/GOTOFF */
13833 reg_addend = XEXP (x, 0);
13834 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13835 reg_addend = XEXP (reg_addend, 1);
13836 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13837 reg_addend = XEXP (reg_addend, 0);
13840 reg_addend = NULL_RTX;
13841 addend = XEXP (x, 0);
13845 addend = XEXP (x, 0);
13847 x = XEXP (XEXP (x, 1), 0);
13848 if (GET_CODE (x) == PLUS
13849 && CONST_INT_P (XEXP (x, 1)))
13851 const_addend = XEXP (x, 1);
13855 if (GET_CODE (x) == UNSPEC
13856 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13857 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13858 result = XVECEXP (x, 0, 0);
13860 if (TARGET_MACHO && darwin_local_data_pic (x)
13861 && !MEM_P (orig_x))
13862 result = XVECEXP (x, 0, 0);
13865 return ix86_delegitimize_tls_address (orig_x);
13868 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13870 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13873 /* If the rest of original X doesn't involve the PIC register, add
13874 addend and subtract pic_offset_table_rtx. This can happen e.g.
13876 leal (%ebx, %ecx, 4), %ecx
13878 movl foo@GOTOFF(%ecx), %edx
13879 in which case we return (%ecx - %ebx) + foo. */
13880 if (pic_offset_table_rtx)
13881 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13882 pic_offset_table_rtx),
13887 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13889 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13890 if (result == NULL_RTX)
13896 /* If X is a machine specific address (i.e. a symbol or label being
13897 referenced as a displacement from the GOT implemented using an
13898 UNSPEC), then return the base term. Otherwise return X. */
13901 ix86_find_base_term (rtx x)
13907 if (GET_CODE (x) != CONST)
13909 term = XEXP (x, 0);
13910 if (GET_CODE (term) == PLUS
13911 && (CONST_INT_P (XEXP (term, 1))
13912 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13913 term = XEXP (term, 0);
13914 if (GET_CODE (term) != UNSPEC
13915 || (XINT (term, 1) != UNSPEC_GOTPCREL
13916 && XINT (term, 1) != UNSPEC_PCREL))
13919 return XVECEXP (term, 0, 0);
13922 return ix86_delegitimize_address (x);
13926 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
13927 bool fp, FILE *file)
13929 const char *suffix;
13931 if (mode == CCFPmode || mode == CCFPUmode)
13933 code = ix86_fp_compare_code_to_integer (code);
13937 code = reverse_condition (code);
13988 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13992 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13993 Those same assemblers have the same but opposite lossage on cmov. */
13994 if (mode == CCmode)
13995 suffix = fp ? "nbe" : "a";
13996 else if (mode == CCCmode)
13999 gcc_unreachable ();
14015 gcc_unreachable ();
14019 gcc_assert (mode == CCmode || mode == CCCmode);
14036 gcc_unreachable ();
14040 /* ??? As above. */
14041 gcc_assert (mode == CCmode || mode == CCCmode);
14042 suffix = fp ? "nb" : "ae";
14045 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14049 /* ??? As above. */
14050 if (mode == CCmode)
14052 else if (mode == CCCmode)
14053 suffix = fp ? "nb" : "ae";
14055 gcc_unreachable ();
14058 suffix = fp ? "u" : "p";
14061 suffix = fp ? "nu" : "np";
14064 gcc_unreachable ();
14066 fputs (suffix, file);
14069 /* Print the name of register X to FILE based on its machine mode and number.
14070 If CODE is 'w', pretend the mode is HImode.
14071 If CODE is 'b', pretend the mode is QImode.
14072 If CODE is 'k', pretend the mode is SImode.
14073 If CODE is 'q', pretend the mode is DImode.
14074 If CODE is 'x', pretend the mode is V4SFmode.
14075 If CODE is 't', pretend the mode is V8SFmode.
14076 If CODE is 'h', pretend the reg is the 'high' byte register.
14077 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14078 If CODE is 'd', duplicate the operand for AVX instruction.
14082 print_reg (rtx x, int code, FILE *file)
14085 bool duplicated = code == 'd' && TARGET_AVX;
14087 gcc_assert (x == pc_rtx
14088 || (REGNO (x) != ARG_POINTER_REGNUM
14089 && REGNO (x) != FRAME_POINTER_REGNUM
14090 && REGNO (x) != FLAGS_REG
14091 && REGNO (x) != FPSR_REG
14092 && REGNO (x) != FPCR_REG));
14094 if (ASSEMBLER_DIALECT == ASM_ATT)
14099 gcc_assert (TARGET_64BIT);
14100 fputs ("rip", file);
14104 if (code == 'w' || MMX_REG_P (x))
14106 else if (code == 'b')
14108 else if (code == 'k')
14110 else if (code == 'q')
14112 else if (code == 'y')
14114 else if (code == 'h')
14116 else if (code == 'x')
14118 else if (code == 't')
14121 code = GET_MODE_SIZE (GET_MODE (x));
14123 /* Irritatingly, AMD extended registers use different naming convention
14124 from the normal registers: "r%d[bwd]" */
14125 if (REX_INT_REG_P (x))
14127 gcc_assert (TARGET_64BIT);
14129 fprint_ul (file, REGNO (x) - FIRST_REX_INT_REG + 8);
14133 error ("extended registers have no high halves");
14148 error ("unsupported operand size for extended register");
14158 if (STACK_TOP_P (x))
14167 if (! ANY_FP_REG_P (x))
14168 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14173 reg = hi_reg_name[REGNO (x)];
14176 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
14178 reg = qi_reg_name[REGNO (x)];
14181 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
14183 reg = qi_high_reg_name[REGNO (x)];
14188 gcc_assert (!duplicated);
14190 fputs (hi_reg_name[REGNO (x)] + 1, file);
14195 gcc_unreachable ();
14201 if (ASSEMBLER_DIALECT == ASM_ATT)
14202 fprintf (file, ", %%%s", reg);
14204 fprintf (file, ", %s", reg);
14208 /* Locate some local-dynamic symbol still in use by this function
14209 so that we can print its name in some tls_local_dynamic_base
14213 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
14217 if (GET_CODE (x) == SYMBOL_REF
14218 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14220 cfun->machine->some_ld_name = XSTR (x, 0);
14227 static const char *
14228 get_some_local_dynamic_name (void)
14232 if (cfun->machine->some_ld_name)
14233 return cfun->machine->some_ld_name;
14235 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14236 if (NONDEBUG_INSN_P (insn)
14237 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14238 return cfun->machine->some_ld_name;
14243 /* Meaning of CODE:
14244 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14245 C -- print opcode suffix for set/cmov insn.
14246 c -- like C, but print reversed condition
14247 F,f -- likewise, but for floating-point.
14248 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14250 R -- print the prefix for register names.
14251 z -- print the opcode suffix for the size of the current operand.
14252 Z -- likewise, with special suffixes for x87 instructions.
14253 * -- print a star (in certain assembler syntax)
14254 A -- print an absolute memory reference.
14255 E -- print address with DImode register names if TARGET_64BIT.
14256 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14257 s -- print a shift double count, followed by the assemblers argument
14259 b -- print the QImode name of the register for the indicated operand.
14260 %b0 would print %al if operands[0] is reg 0.
14261 w -- likewise, print the HImode name of the register.
14262 k -- likewise, print the SImode name of the register.
14263 q -- likewise, print the DImode name of the register.
14264 x -- likewise, print the V4SFmode name of the register.
14265 t -- likewise, print the V8SFmode name of the register.
14266 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14267 y -- print "st(0)" instead of "st" as a register.
14268 d -- print duplicated register operand for AVX instruction.
14269 D -- print condition for SSE cmp instruction.
14270 P -- if PIC, print an @PLT suffix.
14271 p -- print raw symbol name.
14272 X -- don't print any sort of PIC '@' suffix for a symbol.
14273 & -- print some in-use local-dynamic symbol name.
14274 H -- print a memory address offset by 8; used for sse high-parts
14275 Y -- print condition for XOP pcom* instruction.
14276 + -- print a branch hint as 'cs' or 'ds' prefix
14277 ; -- print a semicolon (after prefixes due to bug in older gas).
14278 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14279 @ -- print a segment register of thread base pointer load
14280 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14284 ix86_print_operand (FILE *file, rtx x, int code)
14291 switch (ASSEMBLER_DIALECT)
14298 /* Intel syntax. For absolute addresses, registers should not
14299 be surrounded by braces. */
14303 ix86_print_operand (file, x, 0);
14310 gcc_unreachable ();
14313 ix86_print_operand (file, x, 0);
14317 /* Wrap address in an UNSPEC to declare special handling. */
14319 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14321 output_address (x);
14325 if (ASSEMBLER_DIALECT == ASM_ATT)
14330 if (ASSEMBLER_DIALECT == ASM_ATT)
14335 if (ASSEMBLER_DIALECT == ASM_ATT)
14340 if (ASSEMBLER_DIALECT == ASM_ATT)
14345 if (ASSEMBLER_DIALECT == ASM_ATT)
14350 if (ASSEMBLER_DIALECT == ASM_ATT)
14355 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14356 if (ASSEMBLER_DIALECT != ASM_ATT)
14359 switch (GET_MODE_SIZE (GET_MODE (x)))
14374 output_operand_lossage
14375 ("invalid operand size for operand code 'O'");
14384 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14386 /* Opcodes don't get size suffixes if using Intel opcodes. */
14387 if (ASSEMBLER_DIALECT == ASM_INTEL)
14390 switch (GET_MODE_SIZE (GET_MODE (x)))
14409 output_operand_lossage
14410 ("invalid operand size for operand code 'z'");
14415 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14417 (0, "non-integer operand used with operand code 'z'");
14421 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14422 if (ASSEMBLER_DIALECT == ASM_INTEL)
14425 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14427 switch (GET_MODE_SIZE (GET_MODE (x)))
14430 #ifdef HAVE_AS_IX86_FILDS
14440 #ifdef HAVE_AS_IX86_FILDQ
14443 fputs ("ll", file);
14451 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14453 /* 387 opcodes don't get size suffixes
14454 if the operands are registers. */
14455 if (STACK_REG_P (x))
14458 switch (GET_MODE_SIZE (GET_MODE (x)))
14479 output_operand_lossage
14480 ("invalid operand type used with operand code 'Z'");
14484 output_operand_lossage
14485 ("invalid operand size for operand code 'Z'");
14503 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14505 ix86_print_operand (file, x, 0);
14506 fputs (", ", file);
14511 switch (GET_CODE (x))
14514 fputs ("neq", file);
14517 fputs ("eq", file);
14521 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14525 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14529 fputs ("le", file);
14533 fputs ("lt", file);
14536 fputs ("unord", file);
14539 fputs ("ord", file);
14542 fputs ("ueq", file);
14545 fputs ("nlt", file);
14548 fputs ("nle", file);
14551 fputs ("ule", file);
14554 fputs ("ult", file);
14557 fputs ("une", file);
14560 output_operand_lossage ("operand is not a condition code, "
14561 "invalid operand code 'Y'");
14567 /* Little bit of braindamage here. The SSE compare instructions
14568 does use completely different names for the comparisons that the
14569 fp conditional moves. */
14570 switch (GET_CODE (x))
14575 fputs ("eq_us", file);
14579 fputs ("eq", file);
14584 fputs ("nge", file);
14588 fputs ("lt", file);
14593 fputs ("ngt", file);
14597 fputs ("le", file);
14600 fputs ("unord", file);
14605 fputs ("neq_oq", file);
14609 fputs ("neq", file);
14614 fputs ("ge", file);
14618 fputs ("nlt", file);
14623 fputs ("gt", file);
14627 fputs ("nle", file);
14630 fputs ("ord", file);
14633 output_operand_lossage ("operand is not a condition code, "
14634 "invalid operand code 'D'");
14641 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14642 if (ASSEMBLER_DIALECT == ASM_ATT)
14648 if (!COMPARISON_P (x))
14650 output_operand_lossage ("operand is not a condition code, "
14651 "invalid operand code '%c'", code);
14654 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
14655 code == 'c' || code == 'f',
14656 code == 'F' || code == 'f',
14661 if (!offsettable_memref_p (x))
14663 output_operand_lossage ("operand is not an offsettable memory "
14664 "reference, invalid operand code 'H'");
14667 /* It doesn't actually matter what mode we use here, as we're
14668 only going to use this for printing. */
14669 x = adjust_address_nv (x, DImode, 8);
14673 gcc_assert (CONST_INT_P (x));
14675 if (INTVAL (x) & IX86_HLE_ACQUIRE)
14676 #ifdef HAVE_AS_IX86_HLE
14677 fputs ("xacquire ", file);
14679 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
14681 else if (INTVAL (x) & IX86_HLE_RELEASE)
14682 #ifdef HAVE_AS_IX86_HLE
14683 fputs ("xrelease ", file);
14685 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
14687 /* We do not want to print value of the operand. */
14691 if (ASSEMBLER_DIALECT == ASM_ATT)
14697 const char *name = get_some_local_dynamic_name ();
14699 output_operand_lossage ("'%%&' used without any "
14700 "local dynamic TLS references");
14702 assemble_name (file, name);
14711 || optimize_function_for_size_p (cfun)
14712 || !TARGET_BRANCH_PREDICTION_HINTS)
14715 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14718 int pred_val = INTVAL (XEXP (x, 0));
14720 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14721 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14723 bool taken = pred_val > REG_BR_PROB_BASE / 2;
14725 = final_forward_branch_p (current_output_insn) == 0;
14727 /* Emit hints only in the case default branch prediction
14728 heuristics would fail. */
14729 if (taken != cputaken)
14731 /* We use 3e (DS) prefix for taken branches and
14732 2e (CS) prefix for not taken branches. */
14734 fputs ("ds ; ", file);
14736 fputs ("cs ; ", file);
14744 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14750 if (ASSEMBLER_DIALECT == ASM_ATT)
14753 /* The kernel uses a different segment register for performance
14754 reasons; a system call would not have to trash the userspace
14755 segment register, which would be expensive. */
14756 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14757 fputs ("fs", file);
14759 fputs ("gs", file);
14763 putc (TARGET_AVX2 ? 'i' : 'f', file);
14767 if (TARGET_64BIT && Pmode != word_mode)
14768 fputs ("addr32 ", file);
14772 output_operand_lossage ("invalid operand code '%c'", code);
14777 print_reg (x, code, file);
14779 else if (MEM_P (x))
14781 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14782 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14783 && GET_MODE (x) != BLKmode)
14786 switch (GET_MODE_SIZE (GET_MODE (x)))
14788 case 1: size = "BYTE"; break;
14789 case 2: size = "WORD"; break;
14790 case 4: size = "DWORD"; break;
14791 case 8: size = "QWORD"; break;
14792 case 12: size = "TBYTE"; break;
14794 if (GET_MODE (x) == XFmode)
14799 case 32: size = "YMMWORD"; break;
14801 gcc_unreachable ();
14804 /* Check for explicit size override (codes 'b', 'w', 'k',
14808 else if (code == 'w')
14810 else if (code == 'k')
14812 else if (code == 'q')
14814 else if (code == 'x')
14817 fputs (size, file);
14818 fputs (" PTR ", file);
14822 /* Avoid (%rip) for call operands. */
14823 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14824 && !CONST_INT_P (x))
14825 output_addr_const (file, x);
14826 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14827 output_operand_lossage ("invalid constraints for operand");
14829 output_address (x);
14832 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14837 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14838 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14840 if (ASSEMBLER_DIALECT == ASM_ATT)
14842 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14844 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14846 fprintf (file, "0x%08x", (unsigned int) l);
14849 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14854 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14855 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14857 if (ASSEMBLER_DIALECT == ASM_ATT)
14859 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14862 /* These float cases don't actually occur as immediate operands. */
14863 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14867 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14868 fputs (dstr, file);
14873 /* We have patterns that allow zero sets of memory, for instance.
14874 In 64-bit mode, we should probably support all 8-byte vectors,
14875 since we can in fact encode that into an immediate. */
14876 if (GET_CODE (x) == CONST_VECTOR)
14878 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14882 if (code != 'P' && code != 'p')
14884 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14886 if (ASSEMBLER_DIALECT == ASM_ATT)
14889 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14890 || GET_CODE (x) == LABEL_REF)
14892 if (ASSEMBLER_DIALECT == ASM_ATT)
14895 fputs ("OFFSET FLAT:", file);
14898 if (CONST_INT_P (x))
14899 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14900 else if (flag_pic || MACHOPIC_INDIRECT)
14901 output_pic_addr_const (file, x, code);
14903 output_addr_const (file, x);
14908 ix86_print_operand_punct_valid_p (unsigned char code)
14910 return (code == '@' || code == '*' || code == '+' || code == '&'
14911 || code == ';' || code == '~' || code == '^');
14914 /* Print a memory operand whose address is ADDR. */
14917 ix86_print_operand_address (FILE *file, rtx addr)
14919 struct ix86_address parts;
14920 rtx base, index, disp;
14926 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
14928 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14929 gcc_assert (parts.index == NULL_RTX);
14930 parts.index = XVECEXP (addr, 0, 1);
14931 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
14932 addr = XVECEXP (addr, 0, 0);
14935 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
14937 gcc_assert (TARGET_64BIT);
14938 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14942 ok = ix86_decompose_address (addr, &parts);
14946 if (parts.base && GET_CODE (parts.base) == SUBREG)
14948 rtx tmp = SUBREG_REG (parts.base);
14949 parts.base = simplify_subreg (GET_MODE (parts.base),
14950 tmp, GET_MODE (tmp), 0);
14951 gcc_assert (parts.base != NULL_RTX);
14954 if (parts.index && GET_CODE (parts.index) == SUBREG)
14956 rtx tmp = SUBREG_REG (parts.index);
14957 parts.index = simplify_subreg (GET_MODE (parts.index),
14958 tmp, GET_MODE (tmp), 0);
14959 gcc_assert (parts.index != NULL_RTX);
14963 index = parts.index;
14965 scale = parts.scale;
14973 if (ASSEMBLER_DIALECT == ASM_ATT)
14975 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14978 gcc_unreachable ();
14981 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14982 if (TARGET_64BIT && !base && !index)
14986 if (GET_CODE (disp) == CONST
14987 && GET_CODE (XEXP (disp, 0)) == PLUS
14988 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14989 symbol = XEXP (XEXP (disp, 0), 0);
14991 if (GET_CODE (symbol) == LABEL_REF
14992 || (GET_CODE (symbol) == SYMBOL_REF
14993 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14996 if (!base && !index)
14998 /* Displacement only requires special attention. */
15000 if (CONST_INT_P (disp))
15002 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15003 fputs ("ds:", file);
15004 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15007 output_pic_addr_const (file, disp, 0);
15009 output_addr_const (file, disp);
15013 /* Print SImode register names to force addr32 prefix. */
15014 if (SImode_address_operand (addr, VOIDmode))
15016 #ifdef ENABLE_CHECKING
15017 gcc_assert (TARGET_64BIT);
15018 switch (GET_CODE (addr))
15021 gcc_assert (GET_MODE (addr) == SImode);
15022 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15026 gcc_assert (GET_MODE (addr) == DImode);
15029 gcc_unreachable ();
15032 gcc_assert (!code);
15036 if (ASSEMBLER_DIALECT == ASM_ATT)
15041 output_pic_addr_const (file, disp, 0);
15042 else if (GET_CODE (disp) == LABEL_REF)
15043 output_asm_label (disp);
15045 output_addr_const (file, disp);
15050 print_reg (base, code, file);
15054 print_reg (index, vsib ? 0 : code, file);
15055 if (scale != 1 || vsib)
15056 fprintf (file, ",%d", scale);
15062 rtx offset = NULL_RTX;
15066 /* Pull out the offset of a symbol; print any symbol itself. */
15067 if (GET_CODE (disp) == CONST
15068 && GET_CODE (XEXP (disp, 0)) == PLUS
15069 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15071 offset = XEXP (XEXP (disp, 0), 1);
15072 disp = gen_rtx_CONST (VOIDmode,
15073 XEXP (XEXP (disp, 0), 0));
15077 output_pic_addr_const (file, disp, 0);
15078 else if (GET_CODE (disp) == LABEL_REF)
15079 output_asm_label (disp);
15080 else if (CONST_INT_P (disp))
15083 output_addr_const (file, disp);
15089 print_reg (base, code, file);
15092 if (INTVAL (offset) >= 0)
15094 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15098 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15105 print_reg (index, vsib ? 0 : code, file);
15106 if (scale != 1 || vsib)
15107 fprintf (file, "*%d", scale);
15114 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15117 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15121 if (GET_CODE (x) != UNSPEC)
15124 op = XVECEXP (x, 0, 0);
15125 switch (XINT (x, 1))
15127 case UNSPEC_GOTTPOFF:
15128 output_addr_const (file, op);
15129 /* FIXME: This might be @TPOFF in Sun ld. */
15130 fputs ("@gottpoff", file);
15133 output_addr_const (file, op);
15134 fputs ("@tpoff", file);
15136 case UNSPEC_NTPOFF:
15137 output_addr_const (file, op);
15139 fputs ("@tpoff", file);
15141 fputs ("@ntpoff", file);
15143 case UNSPEC_DTPOFF:
15144 output_addr_const (file, op);
15145 fputs ("@dtpoff", file);
15147 case UNSPEC_GOTNTPOFF:
15148 output_addr_const (file, op);
15150 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15151 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15153 fputs ("@gotntpoff", file);
15155 case UNSPEC_INDNTPOFF:
15156 output_addr_const (file, op);
15157 fputs ("@indntpoff", file);
15160 case UNSPEC_MACHOPIC_OFFSET:
15161 output_addr_const (file, op);
15163 machopic_output_function_base_name (file);
15167 case UNSPEC_STACK_CHECK:
15171 gcc_assert (flag_split_stack);
15173 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15174 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15176 gcc_unreachable ();
15179 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15190 /* Split one or more double-mode RTL references into pairs of half-mode
15191 references. The RTL can be REG, offsettable MEM, integer constant, or
15192 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15193 split and "num" is its length. lo_half and hi_half are output arrays
15194 that parallel "operands". */
15197 split_double_mode (enum machine_mode mode, rtx operands[],
15198 int num, rtx lo_half[], rtx hi_half[])
15200 enum machine_mode half_mode;
15206 half_mode = DImode;
15209 half_mode = SImode;
15212 gcc_unreachable ();
15215 byte = GET_MODE_SIZE (half_mode);
15219 rtx op = operands[num];
15221 /* simplify_subreg refuse to split volatile memory addresses,
15222 but we still have to handle it. */
15225 lo_half[num] = adjust_address (op, half_mode, 0);
15226 hi_half[num] = adjust_address (op, half_mode, byte);
15230 lo_half[num] = simplify_gen_subreg (half_mode, op,
15231 GET_MODE (op) == VOIDmode
15232 ? mode : GET_MODE (op), 0);
15233 hi_half[num] = simplify_gen_subreg (half_mode, op,
15234 GET_MODE (op) == VOIDmode
15235 ? mode : GET_MODE (op), byte);
15240 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15241 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15242 is the expression of the binary operation. The output may either be
15243 emitted here, or returned to the caller, like all output_* functions.
15245 There is no guarantee that the operands are the same mode, as they
15246 might be within FLOAT or FLOAT_EXTEND expressions. */
15248 #ifndef SYSV386_COMPAT
15249 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15250 wants to fix the assemblers because that causes incompatibility
15251 with gcc. No-one wants to fix gcc because that causes
15252 incompatibility with assemblers... You can use the option of
15253 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15254 #define SYSV386_COMPAT 1
15258 output_387_binary_op (rtx insn, rtx *operands)
15260 static char buf[40];
15263 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15265 #ifdef ENABLE_CHECKING
15266 /* Even if we do not want to check the inputs, this documents input
15267 constraints. Which helps in understanding the following code. */
15268 if (STACK_REG_P (operands[0])
15269 && ((REG_P (operands[1])
15270 && REGNO (operands[0]) == REGNO (operands[1])
15271 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15272 || (REG_P (operands[2])
15273 && REGNO (operands[0]) == REGNO (operands[2])
15274 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15275 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15278 gcc_assert (is_sse);
15281 switch (GET_CODE (operands[3]))
15284 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15285 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15293 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15294 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15302 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15303 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15311 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15312 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15320 gcc_unreachable ();
15327 strcpy (buf, ssep);
15328 if (GET_MODE (operands[0]) == SFmode)
15329 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15331 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15335 strcpy (buf, ssep + 1);
15336 if (GET_MODE (operands[0]) == SFmode)
15337 strcat (buf, "ss\t{%2, %0|%0, %2}");
15339 strcat (buf, "sd\t{%2, %0|%0, %2}");
15345 switch (GET_CODE (operands[3]))
15349 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15351 rtx temp = operands[2];
15352 operands[2] = operands[1];
15353 operands[1] = temp;
15356 /* know operands[0] == operands[1]. */
15358 if (MEM_P (operands[2]))
15364 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15366 if (STACK_TOP_P (operands[0]))
15367 /* How is it that we are storing to a dead operand[2]?
15368 Well, presumably operands[1] is dead too. We can't
15369 store the result to st(0) as st(0) gets popped on this
15370 instruction. Instead store to operands[2] (which I
15371 think has to be st(1)). st(1) will be popped later.
15372 gcc <= 2.8.1 didn't have this check and generated
15373 assembly code that the Unixware assembler rejected. */
15374 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15376 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15380 if (STACK_TOP_P (operands[0]))
15381 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15383 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15388 if (MEM_P (operands[1]))
15394 if (MEM_P (operands[2]))
15400 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15403 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15404 derived assemblers, confusingly reverse the direction of
15405 the operation for fsub{r} and fdiv{r} when the
15406 destination register is not st(0). The Intel assembler
15407 doesn't have this brain damage. Read !SYSV386_COMPAT to
15408 figure out what the hardware really does. */
15409 if (STACK_TOP_P (operands[0]))
15410 p = "{p\t%0, %2|rp\t%2, %0}";
15412 p = "{rp\t%2, %0|p\t%0, %2}";
15414 if (STACK_TOP_P (operands[0]))
15415 /* As above for fmul/fadd, we can't store to st(0). */
15416 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15418 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15423 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15426 if (STACK_TOP_P (operands[0]))
15427 p = "{rp\t%0, %1|p\t%1, %0}";
15429 p = "{p\t%1, %0|rp\t%0, %1}";
15431 if (STACK_TOP_P (operands[0]))
15432 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15434 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15439 if (STACK_TOP_P (operands[0]))
15441 if (STACK_TOP_P (operands[1]))
15442 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15444 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15447 else if (STACK_TOP_P (operands[1]))
15450 p = "{\t%1, %0|r\t%0, %1}";
15452 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15458 p = "{r\t%2, %0|\t%0, %2}";
15460 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15466 gcc_unreachable ();
15473 /* Return needed mode for entity in optimize_mode_switching pass. */
15476 ix86_mode_needed (int entity, rtx insn)
15478 enum attr_i387_cw mode;
15480 /* The mode UNINITIALIZED is used to store control word after a
15481 function call or ASM pattern. The mode ANY specify that function
15482 has no requirements on the control word and make no changes in the
15483 bits we are interested in. */
15486 || (NONJUMP_INSN_P (insn)
15487 && (asm_noperands (PATTERN (insn)) >= 0
15488 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15489 return I387_CW_UNINITIALIZED;
15491 if (recog_memoized (insn) < 0)
15492 return I387_CW_ANY;
15494 mode = get_attr_i387_cw (insn);
15499 if (mode == I387_CW_TRUNC)
15504 if (mode == I387_CW_FLOOR)
15509 if (mode == I387_CW_CEIL)
15514 if (mode == I387_CW_MASK_PM)
15519 gcc_unreachable ();
15522 return I387_CW_ANY;
15525 /* Output code to initialize control word copies used by trunc?f?i and
15526 rounding patterns. CURRENT_MODE is set to current control word,
15527 while NEW_MODE is set to new control word. */
15530 emit_i387_cw_initialization (int mode)
15532 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15535 enum ix86_stack_slot slot;
15537 rtx reg = gen_reg_rtx (HImode);
15539 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15540 emit_move_insn (reg, copy_rtx (stored_mode));
15542 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
15543 || optimize_function_for_size_p (cfun))
15547 case I387_CW_TRUNC:
15548 /* round toward zero (truncate) */
15549 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15550 slot = SLOT_CW_TRUNC;
15553 case I387_CW_FLOOR:
15554 /* round down toward -oo */
15555 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15556 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15557 slot = SLOT_CW_FLOOR;
15561 /* round up toward +oo */
15562 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15563 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15564 slot = SLOT_CW_CEIL;
15567 case I387_CW_MASK_PM:
15568 /* mask precision exception for nearbyint() */
15569 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15570 slot = SLOT_CW_MASK_PM;
15574 gcc_unreachable ();
15581 case I387_CW_TRUNC:
15582 /* round toward zero (truncate) */
15583 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
15584 slot = SLOT_CW_TRUNC;
15587 case I387_CW_FLOOR:
15588 /* round down toward -oo */
15589 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
15590 slot = SLOT_CW_FLOOR;
15594 /* round up toward +oo */
15595 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
15596 slot = SLOT_CW_CEIL;
15599 case I387_CW_MASK_PM:
15600 /* mask precision exception for nearbyint() */
15601 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15602 slot = SLOT_CW_MASK_PM;
15606 gcc_unreachable ();
15610 gcc_assert (slot < MAX_386_STACK_LOCALS);
15612 new_mode = assign_386_stack_local (HImode, slot);
15613 emit_move_insn (new_mode, reg);
15616 /* Output code for INSN to convert a float to a signed int. OPERANDS
15617 are the insn operands. The output may be [HSD]Imode and the input
15618 operand may be [SDX]Fmode. */
15621 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
15623 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15624 int dimode_p = GET_MODE (operands[0]) == DImode;
15625 int round_mode = get_attr_i387_cw (insn);
15627 /* Jump through a hoop or two for DImode, since the hardware has no
15628 non-popping instruction. We used to do this a different way, but
15629 that was somewhat fragile and broke with post-reload splitters. */
15630 if ((dimode_p || fisttp) && !stack_top_dies)
15631 output_asm_insn ("fld\t%y1", operands);
15633 gcc_assert (STACK_TOP_P (operands[1]));
15634 gcc_assert (MEM_P (operands[0]));
15635 gcc_assert (GET_MODE (operands[1]) != TFmode);
15638 output_asm_insn ("fisttp%Z0\t%0", operands);
15641 if (round_mode != I387_CW_ANY)
15642 output_asm_insn ("fldcw\t%3", operands);
15643 if (stack_top_dies || dimode_p)
15644 output_asm_insn ("fistp%Z0\t%0", operands);
15646 output_asm_insn ("fist%Z0\t%0", operands);
15647 if (round_mode != I387_CW_ANY)
15648 output_asm_insn ("fldcw\t%2", operands);
15654 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15655 have the values zero or one, indicates the ffreep insn's operand
15656 from the OPERANDS array. */
15658 static const char *
15659 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15661 if (TARGET_USE_FFREEP)
15662 #ifdef HAVE_AS_IX86_FFREEP
15663 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15666 static char retval[32];
15667 int regno = REGNO (operands[opno]);
15669 gcc_assert (STACK_REGNO_P (regno));
15671 regno -= FIRST_STACK_REG;
15673 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15678 return opno ? "fstp\t%y1" : "fstp\t%y0";
15682 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15683 should be used. UNORDERED_P is true when fucom should be used. */
15686 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
15688 int stack_top_dies;
15689 rtx cmp_op0, cmp_op1;
15690 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
15694 cmp_op0 = operands[0];
15695 cmp_op1 = operands[1];
15699 cmp_op0 = operands[1];
15700 cmp_op1 = operands[2];
15705 if (GET_MODE (operands[0]) == SFmode)
15707 return "%vucomiss\t{%1, %0|%0, %1}";
15709 return "%vcomiss\t{%1, %0|%0, %1}";
15712 return "%vucomisd\t{%1, %0|%0, %1}";
15714 return "%vcomisd\t{%1, %0|%0, %1}";
15717 gcc_assert (STACK_TOP_P (cmp_op0));
15719 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15721 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
15723 if (stack_top_dies)
15725 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15726 return output_387_ffreep (operands, 1);
15729 return "ftst\n\tfnstsw\t%0";
15732 if (STACK_REG_P (cmp_op1)
15734 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15735 && REGNO (cmp_op1) != FIRST_STACK_REG)
15737 /* If both the top of the 387 stack dies, and the other operand
15738 is also a stack register that dies, then this must be a
15739 `fcompp' float compare */
15743 /* There is no double popping fcomi variant. Fortunately,
15744 eflags is immune from the fstp's cc clobbering. */
15746 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15748 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15749 return output_387_ffreep (operands, 0);
15754 return "fucompp\n\tfnstsw\t%0";
15756 return "fcompp\n\tfnstsw\t%0";
15761 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15763 static const char * const alt[16] =
15765 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15766 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15767 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15768 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15770 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15771 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15775 "fcomi\t{%y1, %0|%0, %y1}",
15776 "fcomip\t{%y1, %0|%0, %y1}",
15777 "fucomi\t{%y1, %0|%0, %y1}",
15778 "fucomip\t{%y1, %0|%0, %y1}",
15789 mask = eflags_p << 3;
15790 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15791 mask |= unordered_p << 1;
15792 mask |= stack_top_dies;
15794 gcc_assert (mask < 16);
15803 ix86_output_addr_vec_elt (FILE *file, int value)
15805 const char *directive = ASM_LONG;
15809 directive = ASM_QUAD;
15811 gcc_assert (!TARGET_64BIT);
15814 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15818 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15820 const char *directive = ASM_LONG;
15823 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15824 directive = ASM_QUAD;
15826 gcc_assert (!TARGET_64BIT);
15828 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15829 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15830 fprintf (file, "%s%s%d-%s%d\n",
15831 directive, LPREFIX, value, LPREFIX, rel);
15832 else if (HAVE_AS_GOTOFF_IN_DATA)
15833 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15835 else if (TARGET_MACHO)
15837 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15838 machopic_output_function_base_name (file);
15843 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15844 GOT_SYMBOL_NAME, LPREFIX, value);
15847 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15851 ix86_expand_clear (rtx dest)
15855 /* We play register width games, which are only valid after reload. */
15856 gcc_assert (reload_completed);
15858 /* Avoid HImode and its attendant prefix byte. */
15859 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15860 dest = gen_rtx_REG (SImode, REGNO (dest));
15861 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15863 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15864 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15866 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15867 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15873 /* X is an unchanging MEM. If it is a constant pool reference, return
15874 the constant pool rtx, else NULL. */
15877 maybe_get_pool_constant (rtx x)
15879 x = ix86_delegitimize_address (XEXP (x, 0));
15881 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15882 return get_pool_constant (x);
15888 ix86_expand_move (enum machine_mode mode, rtx operands[])
15891 enum tls_model model;
15896 if (GET_CODE (op1) == SYMBOL_REF)
15898 model = SYMBOL_REF_TLS_MODEL (op1);
15901 op1 = legitimize_tls_address (op1, model, true);
15902 op1 = force_operand (op1, op0);
15905 if (GET_MODE (op1) != mode)
15906 op1 = convert_to_mode (mode, op1, 1);
15908 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15909 && SYMBOL_REF_DLLIMPORT_P (op1))
15910 op1 = legitimize_dllimport_symbol (op1, false);
15912 else if (GET_CODE (op1) == CONST
15913 && GET_CODE (XEXP (op1, 0)) == PLUS
15914 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15916 rtx addend = XEXP (XEXP (op1, 0), 1);
15917 rtx symbol = XEXP (XEXP (op1, 0), 0);
15920 model = SYMBOL_REF_TLS_MODEL (symbol);
15922 tmp = legitimize_tls_address (symbol, model, true);
15923 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15924 && SYMBOL_REF_DLLIMPORT_P (symbol))
15925 tmp = legitimize_dllimport_symbol (symbol, true);
15929 tmp = force_operand (tmp, NULL);
15930 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15931 op0, 1, OPTAB_DIRECT);
15934 if (GET_MODE (tmp) != mode)
15935 op1 = convert_to_mode (mode, tmp, 1);
15939 if ((flag_pic || MACHOPIC_INDIRECT)
15940 && symbolic_operand (op1, mode))
15942 if (TARGET_MACHO && !TARGET_64BIT)
15945 /* dynamic-no-pic */
15946 if (MACHOPIC_INDIRECT)
15948 rtx temp = ((reload_in_progress
15949 || ((op0 && REG_P (op0))
15951 ? op0 : gen_reg_rtx (Pmode));
15952 op1 = machopic_indirect_data_reference (op1, temp);
15954 op1 = machopic_legitimize_pic_address (op1, mode,
15955 temp == op1 ? 0 : temp);
15957 if (op0 != op1 && GET_CODE (op0) != MEM)
15959 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15963 if (GET_CODE (op0) == MEM)
15964 op1 = force_reg (Pmode, op1);
15968 if (GET_CODE (temp) != REG)
15969 temp = gen_reg_rtx (Pmode);
15970 temp = legitimize_pic_address (op1, temp);
15975 /* dynamic-no-pic */
15981 op1 = force_reg (mode, op1);
15982 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
15984 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15985 op1 = legitimize_pic_address (op1, reg);
15988 if (GET_MODE (op1) != mode)
15989 op1 = convert_to_mode (mode, op1, 1);
15996 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15997 || !push_operand (op0, mode))
15999 op1 = force_reg (mode, op1);
16001 if (push_operand (op0, mode)
16002 && ! general_no_elim_operand (op1, mode))
16003 op1 = copy_to_mode_reg (mode, op1);
16005 /* Force large constants in 64bit compilation into register
16006 to get them CSEed. */
16007 if (can_create_pseudo_p ()
16008 && (mode == DImode) && TARGET_64BIT
16009 && immediate_operand (op1, mode)
16010 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16011 && !register_operand (op0, mode)
16013 op1 = copy_to_mode_reg (mode, op1);
16015 if (can_create_pseudo_p ()
16016 && FLOAT_MODE_P (mode)
16017 && GET_CODE (op1) == CONST_DOUBLE)
16019 /* If we are loading a floating point constant to a register,
16020 force the value to memory now, since we'll get better code
16021 out the back end. */
16023 op1 = validize_mem (force_const_mem (mode, op1));
16024 if (!register_operand (op0, mode))
16026 rtx temp = gen_reg_rtx (mode);
16027 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16028 emit_move_insn (op0, temp);
16034 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16038 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16040 rtx op0 = operands[0], op1 = operands[1];
16041 unsigned int align = GET_MODE_ALIGNMENT (mode);
16043 /* Force constants other than zero into memory. We do not know how
16044 the instructions used to build constants modify the upper 64 bits
16045 of the register, once we have that information we may be able
16046 to handle some of them more efficiently. */
16047 if (can_create_pseudo_p ()
16048 && register_operand (op0, mode)
16049 && (CONSTANT_P (op1)
16050 || (GET_CODE (op1) == SUBREG
16051 && CONSTANT_P (SUBREG_REG (op1))))
16052 && !standard_sse_constant_p (op1))
16053 op1 = validize_mem (force_const_mem (mode, op1));
16055 /* We need to check memory alignment for SSE mode since attribute
16056 can make operands unaligned. */
16057 if (can_create_pseudo_p ()
16058 && SSE_REG_MODE_P (mode)
16059 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16060 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16064 /* ix86_expand_vector_move_misalign() does not like constants ... */
16065 if (CONSTANT_P (op1)
16066 || (GET_CODE (op1) == SUBREG
16067 && CONSTANT_P (SUBREG_REG (op1))))
16068 op1 = validize_mem (force_const_mem (mode, op1));
16070 /* ... nor both arguments in memory. */
16071 if (!register_operand (op0, mode)
16072 && !register_operand (op1, mode))
16073 op1 = force_reg (mode, op1);
16075 tmp[0] = op0; tmp[1] = op1;
16076 ix86_expand_vector_move_misalign (mode, tmp);
16080 /* Make operand1 a register if it isn't already. */
16081 if (can_create_pseudo_p ()
16082 && !register_operand (op0, mode)
16083 && !register_operand (op1, mode))
16085 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16089 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16092 /* Split 32-byte AVX unaligned load and store if needed. */
16095 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16098 rtx (*extract) (rtx, rtx, rtx);
16099 rtx (*load_unaligned) (rtx, rtx);
16100 rtx (*store_unaligned) (rtx, rtx);
16101 enum machine_mode mode;
16103 switch (GET_MODE (op0))
16106 gcc_unreachable ();
16108 extract = gen_avx_vextractf128v32qi;
16109 load_unaligned = gen_avx_loaddqu256;
16110 store_unaligned = gen_avx_storedqu256;
16114 extract = gen_avx_vextractf128v8sf;
16115 load_unaligned = gen_avx_loadups256;
16116 store_unaligned = gen_avx_storeups256;
16120 extract = gen_avx_vextractf128v4df;
16121 load_unaligned = gen_avx_loadupd256;
16122 store_unaligned = gen_avx_storeupd256;
16129 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16131 rtx r = gen_reg_rtx (mode);
16132 m = adjust_address (op1, mode, 0);
16133 emit_move_insn (r, m);
16134 m = adjust_address (op1, mode, 16);
16135 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16136 emit_move_insn (op0, r);
16139 emit_insn (load_unaligned (op0, op1));
16141 else if (MEM_P (op0))
16143 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
16145 m = adjust_address (op0, mode, 0);
16146 emit_insn (extract (m, op1, const0_rtx));
16147 m = adjust_address (op0, mode, 16);
16148 emit_insn (extract (m, op1, const1_rtx));
16151 emit_insn (store_unaligned (op0, op1));
16154 gcc_unreachable ();
16157 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16158 straight to ix86_expand_vector_move. */
16159 /* Code generation for scalar reg-reg moves of single and double precision data:
16160 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16164 if (x86_sse_partial_reg_dependency == true)
16169 Code generation for scalar loads of double precision data:
16170 if (x86_sse_split_regs == true)
16171 movlpd mem, reg (gas syntax)
16175 Code generation for unaligned packed loads of single precision data
16176 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16177 if (x86_sse_unaligned_move_optimal)
16180 if (x86_sse_partial_reg_dependency == true)
16192 Code generation for unaligned packed loads of double precision data
16193 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16194 if (x86_sse_unaligned_move_optimal)
16197 if (x86_sse_split_regs == true)
16210 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
16218 && GET_MODE_SIZE (mode) == 32)
16220 switch (GET_MODE_CLASS (mode))
16222 case MODE_VECTOR_INT:
16224 op0 = gen_lowpart (V32QImode, op0);
16225 op1 = gen_lowpart (V32QImode, op1);
16228 case MODE_VECTOR_FLOAT:
16229 ix86_avx256_split_vector_move_misalign (op0, op1);
16233 gcc_unreachable ();
16241 /* ??? If we have typed data, then it would appear that using
16242 movdqu is the only way to get unaligned data loaded with
16244 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16246 op0 = gen_lowpart (V16QImode, op0);
16247 op1 = gen_lowpart (V16QImode, op1);
16248 /* We will eventually emit movups based on insn attributes. */
16249 emit_insn (gen_sse2_loaddqu (op0, op1));
16251 else if (TARGET_SSE2 && mode == V2DFmode)
16256 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16257 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16258 || optimize_function_for_size_p (cfun))
16260 /* We will eventually emit movups based on insn attributes. */
16261 emit_insn (gen_sse2_loadupd (op0, op1));
16265 /* When SSE registers are split into halves, we can avoid
16266 writing to the top half twice. */
16267 if (TARGET_SSE_SPLIT_REGS)
16269 emit_clobber (op0);
16274 /* ??? Not sure about the best option for the Intel chips.
16275 The following would seem to satisfy; the register is
16276 entirely cleared, breaking the dependency chain. We
16277 then store to the upper half, with a dependency depth
16278 of one. A rumor has it that Intel recommends two movsd
16279 followed by an unpacklpd, but this is unconfirmed. And
16280 given that the dependency depth of the unpacklpd would
16281 still be one, I'm not sure why this would be better. */
16282 zero = CONST0_RTX (V2DFmode);
16285 m = adjust_address (op1, DFmode, 0);
16286 emit_insn (gen_sse2_loadlpd (op0, zero, m));
16287 m = adjust_address (op1, DFmode, 8);
16288 emit_insn (gen_sse2_loadhpd (op0, op0, m));
16293 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16294 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16295 || optimize_function_for_size_p (cfun))
16297 op0 = gen_lowpart (V4SFmode, op0);
16298 op1 = gen_lowpart (V4SFmode, op1);
16299 emit_insn (gen_sse_loadups (op0, op1));
16303 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
16304 emit_move_insn (op0, CONST0_RTX (mode));
16306 emit_clobber (op0);
16308 if (mode != V4SFmode)
16309 op0 = gen_lowpart (V4SFmode, op0);
16311 m = adjust_address (op1, V2SFmode, 0);
16312 emit_insn (gen_sse_loadlps (op0, op0, m));
16313 m = adjust_address (op1, V2SFmode, 8);
16314 emit_insn (gen_sse_loadhps (op0, op0, m));
16317 else if (MEM_P (op0))
16319 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16321 op0 = gen_lowpart (V16QImode, op0);
16322 op1 = gen_lowpart (V16QImode, op1);
16323 /* We will eventually emit movups based on insn attributes. */
16324 emit_insn (gen_sse2_storedqu (op0, op1));
16326 else if (TARGET_SSE2 && mode == V2DFmode)
16329 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16330 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16331 || optimize_function_for_size_p (cfun))
16332 /* We will eventually emit movups based on insn attributes. */
16333 emit_insn (gen_sse2_storeupd (op0, op1));
16336 m = adjust_address (op0, DFmode, 0);
16337 emit_insn (gen_sse2_storelpd (m, op1));
16338 m = adjust_address (op0, DFmode, 8);
16339 emit_insn (gen_sse2_storehpd (m, op1));
16344 if (mode != V4SFmode)
16345 op1 = gen_lowpart (V4SFmode, op1);
16348 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16349 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16350 || optimize_function_for_size_p (cfun))
16352 op0 = gen_lowpart (V4SFmode, op0);
16353 emit_insn (gen_sse_storeups (op0, op1));
16357 m = adjust_address (op0, V2SFmode, 0);
16358 emit_insn (gen_sse_storelps (m, op1));
16359 m = adjust_address (op0, V2SFmode, 8);
16360 emit_insn (gen_sse_storehps (m, op1));
16365 gcc_unreachable ();
16368 /* Expand a push in MODE. This is some mode for which we do not support
16369 proper push instructions, at least from the registers that we expect
16370 the value to live in. */
16373 ix86_expand_push (enum machine_mode mode, rtx x)
16377 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
16378 GEN_INT (-GET_MODE_SIZE (mode)),
16379 stack_pointer_rtx, 1, OPTAB_DIRECT);
16380 if (tmp != stack_pointer_rtx)
16381 emit_move_insn (stack_pointer_rtx, tmp);
16383 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
16385 /* When we push an operand onto stack, it has to be aligned at least
16386 at the function argument boundary. However since we don't have
16387 the argument type, we can't determine the actual argument
16389 emit_move_insn (tmp, x);
16392 /* Helper function of ix86_fixup_binary_operands to canonicalize
16393 operand order. Returns true if the operands should be swapped. */
16396 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
16399 rtx dst = operands[0];
16400 rtx src1 = operands[1];
16401 rtx src2 = operands[2];
16403 /* If the operation is not commutative, we can't do anything. */
16404 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
16407 /* Highest priority is that src1 should match dst. */
16408 if (rtx_equal_p (dst, src1))
16410 if (rtx_equal_p (dst, src2))
16413 /* Next highest priority is that immediate constants come second. */
16414 if (immediate_operand (src2, mode))
16416 if (immediate_operand (src1, mode))
16419 /* Lowest priority is that memory references should come second. */
16429 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16430 destination to use for the operation. If different from the true
16431 destination in operands[0], a copy operation will be required. */
16434 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
16437 rtx dst = operands[0];
16438 rtx src1 = operands[1];
16439 rtx src2 = operands[2];
16441 /* Canonicalize operand order. */
16442 if (ix86_swap_binary_operands_p (code, mode, operands))
16446 /* It is invalid to swap operands of different modes. */
16447 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
16454 /* Both source operands cannot be in memory. */
16455 if (MEM_P (src1) && MEM_P (src2))
16457 /* Optimization: Only read from memory once. */
16458 if (rtx_equal_p (src1, src2))
16460 src2 = force_reg (mode, src2);
16464 src2 = force_reg (mode, src2);
16467 /* If the destination is memory, and we do not have matching source
16468 operands, do things in registers. */
16469 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16470 dst = gen_reg_rtx (mode);
16472 /* Source 1 cannot be a constant. */
16473 if (CONSTANT_P (src1))
16474 src1 = force_reg (mode, src1);
16476 /* Source 1 cannot be a non-matching memory. */
16477 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16478 src1 = force_reg (mode, src1);
16480 /* Improve address combine. */
16482 && GET_MODE_CLASS (mode) == MODE_INT
16484 src2 = force_reg (mode, src2);
16486 operands[1] = src1;
16487 operands[2] = src2;
16491 /* Similarly, but assume that the destination has already been
16492 set up properly. */
16495 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
16496 enum machine_mode mode, rtx operands[])
16498 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
16499 gcc_assert (dst == operands[0]);
16502 /* Attempt to expand a binary operator. Make the expansion closer to the
16503 actual machine, then just general_operand, which will allow 3 separate
16504 memory references (one output, two input) in a single insn. */
16507 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
16510 rtx src1, src2, dst, op, clob;
16512 dst = ix86_fixup_binary_operands (code, mode, operands);
16513 src1 = operands[1];
16514 src2 = operands[2];
16516 /* Emit the instruction. */
16518 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
16519 if (reload_in_progress)
16521 /* Reload doesn't know about the flags register, and doesn't know that
16522 it doesn't want to clobber it. We can only do this with PLUS. */
16523 gcc_assert (code == PLUS);
16526 else if (reload_completed
16528 && !rtx_equal_p (dst, src1))
16530 /* This is going to be an LEA; avoid splitting it later. */
16535 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16536 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16539 /* Fix up the destination if needed. */
16540 if (dst != operands[0])
16541 emit_move_insn (operands[0], dst);
16544 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
16545 the given OPERANDS. */
16548 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
16551 rtx op1 = NULL_RTX, op2 = NULL_RTX;
16552 if (GET_CODE (operands[1]) == SUBREG)
16557 else if (GET_CODE (operands[2]) == SUBREG)
16562 /* Optimize (__m128i) d | (__m128i) e and similar code
16563 when d and e are float vectors into float vector logical
16564 insn. In C/C++ without using intrinsics there is no other way
16565 to express vector logical operation on float vectors than
16566 to cast them temporarily to integer vectors. */
16568 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16569 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
16570 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
16571 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
16572 && SUBREG_BYTE (op1) == 0
16573 && (GET_CODE (op2) == CONST_VECTOR
16574 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
16575 && SUBREG_BYTE (op2) == 0))
16576 && can_create_pseudo_p ())
16579 switch (GET_MODE (SUBREG_REG (op1)))
16585 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
16586 if (GET_CODE (op2) == CONST_VECTOR)
16588 op2 = gen_lowpart (GET_MODE (dst), op2);
16589 op2 = force_reg (GET_MODE (dst), op2);
16594 op2 = SUBREG_REG (operands[2]);
16595 if (!nonimmediate_operand (op2, GET_MODE (dst)))
16596 op2 = force_reg (GET_MODE (dst), op2);
16598 op1 = SUBREG_REG (op1);
16599 if (!nonimmediate_operand (op1, GET_MODE (dst)))
16600 op1 = force_reg (GET_MODE (dst), op1);
16601 emit_insn (gen_rtx_SET (VOIDmode, dst,
16602 gen_rtx_fmt_ee (code, GET_MODE (dst),
16604 emit_move_insn (operands[0], gen_lowpart (mode, dst));
16610 if (!nonimmediate_operand (operands[1], mode))
16611 operands[1] = force_reg (mode, operands[1]);
16612 if (!nonimmediate_operand (operands[2], mode))
16613 operands[2] = force_reg (mode, operands[2]);
16614 ix86_fixup_binary_operands_no_copy (code, mode, operands);
16615 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16616 gen_rtx_fmt_ee (code, mode, operands[1],
16620 /* Return TRUE or FALSE depending on whether the binary operator meets the
16621 appropriate constraints. */
16624 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
16627 rtx dst = operands[0];
16628 rtx src1 = operands[1];
16629 rtx src2 = operands[2];
16631 /* Both source operands cannot be in memory. */
16632 if (MEM_P (src1) && MEM_P (src2))
16635 /* Canonicalize operand order for commutative operators. */
16636 if (ix86_swap_binary_operands_p (code, mode, operands))
16643 /* If the destination is memory, we must have a matching source operand. */
16644 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16647 /* Source 1 cannot be a constant. */
16648 if (CONSTANT_P (src1))
16651 /* Source 1 cannot be a non-matching memory. */
16652 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16653 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16654 return (code == AND
16657 || (TARGET_64BIT && mode == DImode))
16658 && satisfies_constraint_L (src2));
16663 /* Attempt to expand a unary operator. Make the expansion closer to the
16664 actual machine, then just general_operand, which will allow 2 separate
16665 memory references (one output, one input) in a single insn. */
16668 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
16671 int matching_memory;
16672 rtx src, dst, op, clob;
16677 /* If the destination is memory, and we do not have matching source
16678 operands, do things in registers. */
16679 matching_memory = 0;
16682 if (rtx_equal_p (dst, src))
16683 matching_memory = 1;
16685 dst = gen_reg_rtx (mode);
16688 /* When source operand is memory, destination must match. */
16689 if (MEM_P (src) && !matching_memory)
16690 src = force_reg (mode, src);
16692 /* Emit the instruction. */
16694 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
16695 if (reload_in_progress || code == NOT)
16697 /* Reload doesn't know about the flags register, and doesn't know that
16698 it doesn't want to clobber it. */
16699 gcc_assert (code == NOT);
16704 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16705 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16708 /* Fix up the destination if needed. */
16709 if (dst != operands[0])
16710 emit_move_insn (operands[0], dst);
16713 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16714 divisor are within the range [0-255]. */
16717 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
16720 rtx end_label, qimode_label;
16721 rtx insn, div, mod;
16722 rtx scratch, tmp0, tmp1, tmp2;
16723 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
16724 rtx (*gen_zero_extend) (rtx, rtx);
16725 rtx (*gen_test_ccno_1) (rtx, rtx);
16730 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
16731 gen_test_ccno_1 = gen_testsi_ccno_1;
16732 gen_zero_extend = gen_zero_extendqisi2;
16735 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
16736 gen_test_ccno_1 = gen_testdi_ccno_1;
16737 gen_zero_extend = gen_zero_extendqidi2;
16740 gcc_unreachable ();
16743 end_label = gen_label_rtx ();
16744 qimode_label = gen_label_rtx ();
16746 scratch = gen_reg_rtx (mode);
16748 /* Use 8bit unsigned divimod if dividend and divisor are within
16749 the range [0-255]. */
16750 emit_move_insn (scratch, operands[2]);
16751 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
16752 scratch, 1, OPTAB_DIRECT);
16753 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
16754 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
16755 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
16756 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
16757 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
16759 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
16760 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16761 JUMP_LABEL (insn) = qimode_label;
16763 /* Generate original signed/unsigned divimod. */
16764 div = gen_divmod4_1 (operands[0], operands[1],
16765 operands[2], operands[3]);
16768 /* Branch to the end. */
16769 emit_jump_insn (gen_jump (end_label));
16772 /* Generate 8bit unsigned divide. */
16773 emit_label (qimode_label);
16774 /* Don't use operands[0] for result of 8bit divide since not all
16775 registers support QImode ZERO_EXTRACT. */
16776 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16777 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16778 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16779 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16783 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16784 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16788 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16789 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16792 /* Extract remainder from AH. */
16793 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16794 if (REG_P (operands[1]))
16795 insn = emit_move_insn (operands[1], tmp1);
16798 /* Need a new scratch register since the old one has result
16800 scratch = gen_reg_rtx (mode);
16801 emit_move_insn (scratch, tmp1);
16802 insn = emit_move_insn (operands[1], scratch);
16804 set_unique_reg_note (insn, REG_EQUAL, mod);
16806 /* Zero extend quotient from AL. */
16807 tmp1 = gen_lowpart (QImode, tmp0);
16808 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16809 set_unique_reg_note (insn, REG_EQUAL, div);
16811 emit_label (end_label);
16814 #define LEA_MAX_STALL (3)
16815 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16817 /* Increase given DISTANCE in half-cycles according to
16818 dependencies between PREV and NEXT instructions.
16819 Add 1 half-cycle if there is no dependency and
16820 go to next cycle if there is some dependecy. */
16822 static unsigned int
16823 increase_distance (rtx prev, rtx next, unsigned int distance)
16828 if (!prev || !next)
16829 return distance + (distance & 1) + 2;
16831 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
16832 return distance + 1;
16834 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16835 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16836 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
16837 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
16838 return distance + (distance & 1) + 2;
16840 return distance + 1;
16843 /* Function checks if instruction INSN defines register number
16844 REGNO1 or REGNO2. */
16847 insn_defines_reg (unsigned int regno1, unsigned int regno2,
16852 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
16853 if (DF_REF_REG_DEF_P (*def_rec)
16854 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16855 && (regno1 == DF_REF_REGNO (*def_rec)
16856 || regno2 == DF_REF_REGNO (*def_rec)))
16864 /* Function checks if instruction INSN uses register number
16865 REGNO as a part of address expression. */
16868 insn_uses_reg_mem (unsigned int regno, rtx insn)
16872 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
16873 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
16879 /* Search backward for non-agu definition of register number REGNO1
16880 or register number REGNO2 in basic block starting from instruction
16881 START up to head of basic block or instruction INSN.
16883 Function puts true value into *FOUND var if definition was found
16884 and false otherwise.
16886 Distance in half-cycles between START and found instruction or head
16887 of BB is added to DISTANCE and returned. */
16890 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
16891 rtx insn, int distance,
16892 rtx start, bool *found)
16894 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16902 && distance < LEA_SEARCH_THRESHOLD)
16904 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
16906 distance = increase_distance (prev, next, distance);
16907 if (insn_defines_reg (regno1, regno2, prev))
16909 if (recog_memoized (prev) < 0
16910 || get_attr_type (prev) != TYPE_LEA)
16919 if (prev == BB_HEAD (bb))
16922 prev = PREV_INSN (prev);
16928 /* Search backward for non-agu definition of register number REGNO1
16929 or register number REGNO2 in INSN's basic block until
16930 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16931 2. Reach neighbour BBs boundary, or
16932 3. Reach agu definition.
16933 Returns the distance between the non-agu definition point and INSN.
16934 If no definition point, returns -1. */
16937 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16940 basic_block bb = BLOCK_FOR_INSN (insn);
16942 bool found = false;
16944 if (insn != BB_HEAD (bb))
16945 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
16946 distance, PREV_INSN (insn),
16949 if (!found && distance < LEA_SEARCH_THRESHOLD)
16953 bool simple_loop = false;
16955 FOR_EACH_EDGE (e, ei, bb->preds)
16958 simple_loop = true;
16963 distance = distance_non_agu_define_in_bb (regno1, regno2,
16965 BB_END (bb), &found);
16968 int shortest_dist = -1;
16969 bool found_in_bb = false;
16971 FOR_EACH_EDGE (e, ei, bb->preds)
16974 = distance_non_agu_define_in_bb (regno1, regno2,
16980 if (shortest_dist < 0)
16981 shortest_dist = bb_dist;
16982 else if (bb_dist > 0)
16983 shortest_dist = MIN (bb_dist, shortest_dist);
16989 distance = shortest_dist;
16993 /* get_attr_type may modify recog data. We want to make sure
16994 that recog data is valid for instruction INSN, on which
16995 distance_non_agu_define is called. INSN is unchanged here. */
16996 extract_insn_cached (insn);
17001 return distance >> 1;
17004 /* Return the distance in half-cycles between INSN and the next
17005 insn that uses register number REGNO in memory address added
17006 to DISTANCE. Return -1 if REGNO0 is set.
17008 Put true value into *FOUND if register usage was found and
17010 Put true value into *REDEFINED if register redefinition was
17011 found and false otherwise. */
17014 distance_agu_use_in_bb (unsigned int regno,
17015 rtx insn, int distance, rtx start,
17016 bool *found, bool *redefined)
17018 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17023 *redefined = false;
17027 && distance < LEA_SEARCH_THRESHOLD)
17029 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17031 distance = increase_distance(prev, next, distance);
17032 if (insn_uses_reg_mem (regno, next))
17034 /* Return DISTANCE if OP0 is used in memory
17035 address in NEXT. */
17040 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17042 /* Return -1 if OP0 is set in NEXT. */
17050 if (next == BB_END (bb))
17053 next = NEXT_INSN (next);
17059 /* Return the distance between INSN and the next insn that uses
17060 register number REGNO0 in memory address. Return -1 if no such
17061 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17064 distance_agu_use (unsigned int regno0, rtx insn)
17066 basic_block bb = BLOCK_FOR_INSN (insn);
17068 bool found = false;
17069 bool redefined = false;
17071 if (insn != BB_END (bb))
17072 distance = distance_agu_use_in_bb (regno0, insn, distance,
17074 &found, &redefined);
17076 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
17080 bool simple_loop = false;
17082 FOR_EACH_EDGE (e, ei, bb->succs)
17085 simple_loop = true;
17090 distance = distance_agu_use_in_bb (regno0, insn,
17091 distance, BB_HEAD (bb),
17092 &found, &redefined);
17095 int shortest_dist = -1;
17096 bool found_in_bb = false;
17097 bool redefined_in_bb = false;
17099 FOR_EACH_EDGE (e, ei, bb->succs)
17102 = distance_agu_use_in_bb (regno0, insn,
17103 distance, BB_HEAD (e->dest),
17104 &found_in_bb, &redefined_in_bb);
17107 if (shortest_dist < 0)
17108 shortest_dist = bb_dist;
17109 else if (bb_dist > 0)
17110 shortest_dist = MIN (bb_dist, shortest_dist);
17116 distance = shortest_dist;
17120 if (!found || redefined)
17123 return distance >> 1;
17126 /* Define this macro to tune LEA priority vs ADD, it take effect when
17127 there is a dilemma of choicing LEA or ADD
17128 Negative value: ADD is more preferred than LEA
17130 Positive value: LEA is more preferred than ADD*/
17131 #define IX86_LEA_PRIORITY 0
17133 /* Return true if usage of lea INSN has performance advantage
17134 over a sequence of instructions. Instructions sequence has
17135 SPLIT_COST cycles higher latency than lea latency. */
17138 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
17139 unsigned int regno2, int split_cost)
17141 int dist_define, dist_use;
17143 dist_define = distance_non_agu_define (regno1, regno2, insn);
17144 dist_use = distance_agu_use (regno0, insn);
17146 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
17148 /* If there is no non AGU operand definition, no AGU
17149 operand usage and split cost is 0 then both lea
17150 and non lea variants have same priority. Currently
17151 we prefer lea for 64 bit code and non lea on 32 bit
17153 if (dist_use < 0 && split_cost == 0)
17154 return TARGET_64BIT || IX86_LEA_PRIORITY;
17159 /* With longer definitions distance lea is more preferable.
17160 Here we change it to take into account splitting cost and
17162 dist_define += split_cost + IX86_LEA_PRIORITY;
17164 /* If there is no use in memory addess then we just check
17165 that split cost exceeds AGU stall. */
17167 return dist_define > LEA_MAX_STALL;
17169 /* If this insn has both backward non-agu dependence and forward
17170 agu dependence, the one with short distance takes effect. */
17171 return dist_define >= dist_use;
17174 /* Return true if it is legal to clobber flags by INSN and
17175 false otherwise. */
17178 ix86_ok_to_clobber_flags (rtx insn)
17180 basic_block bb = BLOCK_FOR_INSN (insn);
17186 if (NONDEBUG_INSN_P (insn))
17188 for (use = DF_INSN_USES (insn); *use; use++)
17189 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
17192 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
17196 if (insn == BB_END (bb))
17199 insn = NEXT_INSN (insn);
17202 live = df_get_live_out(bb);
17203 return !REGNO_REG_SET_P (live, FLAGS_REG);
17206 /* Return true if we need to split op0 = op1 + op2 into a sequence of
17207 move and add to avoid AGU stalls. */
17210 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
17212 unsigned int regno0, regno1, regno2;
17214 /* Check if we need to optimize. */
17215 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
17218 /* Check it is correct to split here. */
17219 if (!ix86_ok_to_clobber_flags(insn))
17222 regno0 = true_regnum (operands[0]);
17223 regno1 = true_regnum (operands[1]);
17224 regno2 = true_regnum (operands[2]);
17226 /* We need to split only adds with non destructive
17227 destination operand. */
17228 if (regno0 == regno1 || regno0 == regno2)
17231 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
17234 /* Return true if we should emit lea instruction instead of mov
17238 ix86_use_lea_for_mov (rtx insn, rtx operands[])
17240 unsigned int regno0, regno1;
17242 /* Check if we need to optimize. */
17243 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
17246 /* Use lea for reg to reg moves only. */
17247 if (!REG_P (operands[0]) || !REG_P (operands[1]))
17250 regno0 = true_regnum (operands[0]);
17251 regno1 = true_regnum (operands[1]);
17253 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0);
17256 /* Return true if we need to split lea into a sequence of
17257 instructions to avoid AGU stalls. */
17260 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
17262 unsigned int regno0, regno1, regno2;
17264 struct ix86_address parts;
17267 /* Check we need to optimize. */
17268 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
17271 /* Check it is correct to split here. */
17272 if (!ix86_ok_to_clobber_flags(insn))
17275 ok = ix86_decompose_address (operands[1], &parts);
17278 /* There should be at least two components in the address. */
17279 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
17280 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
17283 /* We should not split into add if non legitimate pic
17284 operand is used as displacement. */
17285 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
17288 regno0 = true_regnum (operands[0]) ;
17289 regno1 = INVALID_REGNUM;
17290 regno2 = INVALID_REGNUM;
17293 regno1 = true_regnum (parts.base);
17295 regno2 = true_regnum (parts.index);
17299 /* Compute how many cycles we will add to execution time
17300 if split lea into a sequence of instructions. */
17301 if (parts.base || parts.index)
17303 /* Have to use mov instruction if non desctructive
17304 destination form is used. */
17305 if (regno1 != regno0 && regno2 != regno0)
17308 /* Have to add index to base if both exist. */
17309 if (parts.base && parts.index)
17312 /* Have to use shift and adds if scale is 2 or greater. */
17313 if (parts.scale > 1)
17315 if (regno0 != regno1)
17317 else if (regno2 == regno0)
17320 split_cost += parts.scale;
17323 /* Have to use add instruction with immediate if
17324 disp is non zero. */
17325 if (parts.disp && parts.disp != const0_rtx)
17328 /* Subtract the price of lea. */
17332 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
17335 /* Emit x86 binary operand CODE in mode MODE, where the first operand
17336 matches destination. RTX includes clobber of FLAGS_REG. */
17339 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
17344 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
17345 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17347 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17350 /* Return true if regno1 def is nearest to the insn. */
17353 find_nearest_reg_def (rtx insn, int regno1, int regno2)
17356 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
17360 while (prev && prev != start)
17362 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
17364 prev = PREV_INSN (prev);
17367 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
17369 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
17371 prev = PREV_INSN (prev);
17374 /* None of the regs is defined in the bb. */
17378 /* Split lea instructions into a sequence of instructions
17379 which are executed on ALU to avoid AGU stalls.
17380 It is assumed that it is allowed to clobber flags register
17381 at lea position. */
17384 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
17386 unsigned int regno0, regno1, regno2;
17387 struct ix86_address parts;
17391 ok = ix86_decompose_address (operands[1], &parts);
17394 target = gen_lowpart (mode, operands[0]);
17396 regno0 = true_regnum (target);
17397 regno1 = INVALID_REGNUM;
17398 regno2 = INVALID_REGNUM;
17402 parts.base = gen_lowpart (mode, parts.base);
17403 regno1 = true_regnum (parts.base);
17408 parts.index = gen_lowpart (mode, parts.index);
17409 regno2 = true_regnum (parts.index);
17413 parts.disp = gen_lowpart (mode, parts.disp);
17415 if (parts.scale > 1)
17417 /* Case r1 = r1 + ... */
17418 if (regno1 == regno0)
17420 /* If we have a case r1 = r1 + C * r1 then we
17421 should use multiplication which is very
17422 expensive. Assume cost model is wrong if we
17423 have such case here. */
17424 gcc_assert (regno2 != regno0);
17426 for (adds = parts.scale; adds > 0; adds--)
17427 ix86_emit_binop (PLUS, mode, target, parts.index);
17431 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17432 if (regno0 != regno2)
17433 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
17435 /* Use shift for scaling. */
17436 ix86_emit_binop (ASHIFT, mode, target,
17437 GEN_INT (exact_log2 (parts.scale)));
17440 ix86_emit_binop (PLUS, mode, target, parts.base);
17442 if (parts.disp && parts.disp != const0_rtx)
17443 ix86_emit_binop (PLUS, mode, target, parts.disp);
17446 else if (!parts.base && !parts.index)
17448 gcc_assert(parts.disp);
17449 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
17455 if (regno0 != regno2)
17456 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
17458 else if (!parts.index)
17460 if (regno0 != regno1)
17461 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
17465 if (regno0 == regno1)
17467 else if (regno0 == regno2)
17473 /* Find better operand for SET instruction, depending
17474 on which definition is farther from the insn. */
17475 if (find_nearest_reg_def (insn, regno1, regno2))
17476 tmp = parts.index, tmp1 = parts.base;
17478 tmp = parts.base, tmp1 = parts.index;
17480 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17482 if (parts.disp && parts.disp != const0_rtx)
17483 ix86_emit_binop (PLUS, mode, target, parts.disp);
17485 ix86_emit_binop (PLUS, mode, target, tmp1);
17489 ix86_emit_binop (PLUS, mode, target, tmp);
17492 if (parts.disp && parts.disp != const0_rtx)
17493 ix86_emit_binop (PLUS, mode, target, parts.disp);
17497 /* Return true if it is ok to optimize an ADD operation to LEA
17498 operation to avoid flag register consumation. For most processors,
17499 ADD is faster than LEA. For the processors like ATOM, if the
17500 destination register of LEA holds an actual address which will be
17501 used soon, LEA is better and otherwise ADD is better. */
17504 ix86_lea_for_add_ok (rtx insn, rtx operands[])
17506 unsigned int regno0 = true_regnum (operands[0]);
17507 unsigned int regno1 = true_regnum (operands[1]);
17508 unsigned int regno2 = true_regnum (operands[2]);
17510 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17511 if (regno0 != regno1 && regno0 != regno2)
17514 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
17517 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
17520 /* Return true if destination reg of SET_BODY is shift count of
17524 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
17530 /* Retrieve destination of SET_BODY. */
17531 switch (GET_CODE (set_body))
17534 set_dest = SET_DEST (set_body);
17535 if (!set_dest || !REG_P (set_dest))
17539 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
17540 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
17548 /* Retrieve shift count of USE_BODY. */
17549 switch (GET_CODE (use_body))
17552 shift_rtx = XEXP (use_body, 1);
17555 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
17556 if (ix86_dep_by_shift_count_body (set_body,
17557 XVECEXP (use_body, 0, i)))
17565 && (GET_CODE (shift_rtx) == ASHIFT
17566 || GET_CODE (shift_rtx) == LSHIFTRT
17567 || GET_CODE (shift_rtx) == ASHIFTRT
17568 || GET_CODE (shift_rtx) == ROTATE
17569 || GET_CODE (shift_rtx) == ROTATERT))
17571 rtx shift_count = XEXP (shift_rtx, 1);
17573 /* Return true if shift count is dest of SET_BODY. */
17574 if (REG_P (shift_count))
17576 /* Add check since it can be invoked before register
17577 allocation in pre-reload schedule. */
17578 if (reload_completed
17579 && true_regnum (set_dest) == true_regnum (shift_count))
17581 else if (REGNO(set_dest) == REGNO(shift_count))
17589 /* Return true if destination reg of SET_INSN is shift count of
17593 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
17595 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
17596 PATTERN (use_insn));
17599 /* Return TRUE or FALSE depending on whether the unary operator meets the
17600 appropriate constraints. */
17603 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
17604 enum machine_mode mode ATTRIBUTE_UNUSED,
17605 rtx operands[2] ATTRIBUTE_UNUSED)
17607 /* If one of operands is memory, source and destination must match. */
17608 if ((MEM_P (operands[0])
17609 || MEM_P (operands[1]))
17610 && ! rtx_equal_p (operands[0], operands[1]))
17615 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17616 are ok, keeping in mind the possible movddup alternative. */
17619 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
17621 if (MEM_P (operands[0]))
17622 return rtx_equal_p (operands[0], operands[1 + high]);
17623 if (MEM_P (operands[1]) && MEM_P (operands[2]))
17624 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
17628 /* Post-reload splitter for converting an SF or DFmode value in an
17629 SSE register into an unsigned SImode. */
17632 ix86_split_convert_uns_si_sse (rtx operands[])
17634 enum machine_mode vecmode;
17635 rtx value, large, zero_or_two31, input, two31, x;
17637 large = operands[1];
17638 zero_or_two31 = operands[2];
17639 input = operands[3];
17640 two31 = operands[4];
17641 vecmode = GET_MODE (large);
17642 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
17644 /* Load up the value into the low element. We must ensure that the other
17645 elements are valid floats -- zero is the easiest such value. */
17648 if (vecmode == V4SFmode)
17649 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
17651 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
17655 input = gen_rtx_REG (vecmode, REGNO (input));
17656 emit_move_insn (value, CONST0_RTX (vecmode));
17657 if (vecmode == V4SFmode)
17658 emit_insn (gen_sse_movss (value, value, input));
17660 emit_insn (gen_sse2_movsd (value, value, input));
17663 emit_move_insn (large, two31);
17664 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
17666 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
17667 emit_insn (gen_rtx_SET (VOIDmode, large, x));
17669 x = gen_rtx_AND (vecmode, zero_or_two31, large);
17670 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
17672 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
17673 emit_insn (gen_rtx_SET (VOIDmode, value, x));
17675 large = gen_rtx_REG (V4SImode, REGNO (large));
17676 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
17678 x = gen_rtx_REG (V4SImode, REGNO (value));
17679 if (vecmode == V4SFmode)
17680 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
17682 emit_insn (gen_sse2_cvttpd2dq (x, value));
17685 emit_insn (gen_xorv4si3 (value, value, large));
17688 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17689 Expects the 64-bit DImode to be supplied in a pair of integral
17690 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17691 -mfpmath=sse, !optimize_size only. */
17694 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
17696 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
17697 rtx int_xmm, fp_xmm;
17698 rtx biases, exponents;
17701 int_xmm = gen_reg_rtx (V4SImode);
17702 if (TARGET_INTER_UNIT_MOVES)
17703 emit_insn (gen_movdi_to_sse (int_xmm, input));
17704 else if (TARGET_SSE_SPLIT_REGS)
17706 emit_clobber (int_xmm);
17707 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
17711 x = gen_reg_rtx (V2DImode);
17712 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
17713 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
17716 x = gen_rtx_CONST_VECTOR (V4SImode,
17717 gen_rtvec (4, GEN_INT (0x43300000UL),
17718 GEN_INT (0x45300000UL),
17719 const0_rtx, const0_rtx));
17720 exponents = validize_mem (force_const_mem (V4SImode, x));
17722 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17723 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
17725 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17726 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17727 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17728 (0x1.0p84 + double(fp_value_hi_xmm)).
17729 Note these exponents differ by 32. */
17731 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
17733 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17734 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17735 real_ldexp (&bias_lo_rvt, &dconst1, 52);
17736 real_ldexp (&bias_hi_rvt, &dconst1, 84);
17737 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
17738 x = const_double_from_real_value (bias_hi_rvt, DFmode);
17739 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
17740 biases = validize_mem (force_const_mem (V2DFmode, biases));
17741 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
17743 /* Add the upper and lower DFmode values together. */
17745 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
17748 x = copy_to_mode_reg (V2DFmode, fp_xmm);
17749 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
17750 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
17753 ix86_expand_vector_extract (false, target, fp_xmm, 0);
17756 /* Not used, but eases macroization of patterns. */
17758 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
17759 rtx input ATTRIBUTE_UNUSED)
17761 gcc_unreachable ();
17764 /* Convert an unsigned SImode value into a DFmode. Only currently used
17765 for SSE, but applicable anywhere. */
17768 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
17770 REAL_VALUE_TYPE TWO31r;
17773 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
17774 NULL, 1, OPTAB_DIRECT);
17776 fp = gen_reg_rtx (DFmode);
17777 emit_insn (gen_floatsidf2 (fp, x));
17779 real_ldexp (&TWO31r, &dconst1, 31);
17780 x = const_double_from_real_value (TWO31r, DFmode);
17782 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
17784 emit_move_insn (target, x);
17787 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17788 32-bit mode; otherwise we have a direct convert instruction. */
17791 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
17793 REAL_VALUE_TYPE TWO32r;
17794 rtx fp_lo, fp_hi, x;
17796 fp_lo = gen_reg_rtx (DFmode);
17797 fp_hi = gen_reg_rtx (DFmode);
17799 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
17801 real_ldexp (&TWO32r, &dconst1, 32);
17802 x = const_double_from_real_value (TWO32r, DFmode);
17803 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
17805 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
17807 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
17810 emit_move_insn (target, x);
17813 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17814 For x86_32, -mfpmath=sse, !optimize_size only. */
17816 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
17818 REAL_VALUE_TYPE ONE16r;
17819 rtx fp_hi, fp_lo, int_hi, int_lo, x;
17821 real_ldexp (&ONE16r, &dconst1, 16);
17822 x = const_double_from_real_value (ONE16r, SFmode);
17823 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
17824 NULL, 0, OPTAB_DIRECT);
17825 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
17826 NULL, 0, OPTAB_DIRECT);
17827 fp_hi = gen_reg_rtx (SFmode);
17828 fp_lo = gen_reg_rtx (SFmode);
17829 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
17830 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
17831 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
17833 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
17835 if (!rtx_equal_p (target, fp_hi))
17836 emit_move_insn (target, fp_hi);
17839 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17840 a vector of unsigned ints VAL to vector of floats TARGET. */
17843 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
17846 REAL_VALUE_TYPE TWO16r;
17847 enum machine_mode intmode = GET_MODE (val);
17848 enum machine_mode fltmode = GET_MODE (target);
17849 rtx (*cvt) (rtx, rtx);
17851 if (intmode == V4SImode)
17852 cvt = gen_floatv4siv4sf2;
17854 cvt = gen_floatv8siv8sf2;
17855 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
17856 tmp[0] = force_reg (intmode, tmp[0]);
17857 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
17859 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
17860 NULL_RTX, 1, OPTAB_DIRECT);
17861 tmp[3] = gen_reg_rtx (fltmode);
17862 emit_insn (cvt (tmp[3], tmp[1]));
17863 tmp[4] = gen_reg_rtx (fltmode);
17864 emit_insn (cvt (tmp[4], tmp[2]));
17865 real_ldexp (&TWO16r, &dconst1, 16);
17866 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
17867 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
17868 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
17870 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
17872 if (tmp[7] != target)
17873 emit_move_insn (target, tmp[7]);
17876 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17877 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17878 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17879 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17882 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
17884 REAL_VALUE_TYPE TWO31r;
17885 rtx two31r, tmp[4];
17886 enum machine_mode mode = GET_MODE (val);
17887 enum machine_mode scalarmode = GET_MODE_INNER (mode);
17888 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
17889 rtx (*cmp) (rtx, rtx, rtx, rtx);
17892 for (i = 0; i < 3; i++)
17893 tmp[i] = gen_reg_rtx (mode);
17894 real_ldexp (&TWO31r, &dconst1, 31);
17895 two31r = const_double_from_real_value (TWO31r, scalarmode);
17896 two31r = ix86_build_const_vector (mode, 1, two31r);
17897 two31r = force_reg (mode, two31r);
17900 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
17901 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
17902 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
17903 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
17904 default: gcc_unreachable ();
17906 tmp[3] = gen_rtx_LE (mode, two31r, val);
17907 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
17908 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
17910 if (intmode == V4SImode || TARGET_AVX2)
17911 *xorp = expand_simple_binop (intmode, ASHIFT,
17912 gen_lowpart (intmode, tmp[0]),
17913 GEN_INT (31), NULL_RTX, 0,
17917 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
17918 two31 = ix86_build_const_vector (intmode, 1, two31);
17919 *xorp = expand_simple_binop (intmode, AND,
17920 gen_lowpart (intmode, tmp[0]),
17921 two31, NULL_RTX, 0,
17924 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
17928 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17929 then replicate the value for all elements of the vector
17933 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
17937 enum machine_mode scalar_mode;
17954 n_elt = GET_MODE_NUNITS (mode);
17955 v = rtvec_alloc (n_elt);
17956 scalar_mode = GET_MODE_INNER (mode);
17958 RTVEC_ELT (v, 0) = value;
17960 for (i = 1; i < n_elt; ++i)
17961 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
17963 return gen_rtx_CONST_VECTOR (mode, v);
17966 gcc_unreachable ();
17970 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17971 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17972 for an SSE register. If VECT is true, then replicate the mask for
17973 all elements of the vector register. If INVERT is true, then create
17974 a mask excluding the sign bit. */
17977 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
17979 enum machine_mode vec_mode, imode;
17980 HOST_WIDE_INT hi, lo;
17985 /* Find the sign bit, sign extended to 2*HWI. */
17993 mode = GET_MODE_INNER (mode);
17995 lo = 0x80000000, hi = lo < 0;
18003 mode = GET_MODE_INNER (mode);
18005 if (HOST_BITS_PER_WIDE_INT >= 64)
18006 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18008 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18013 vec_mode = VOIDmode;
18014 if (HOST_BITS_PER_WIDE_INT >= 64)
18017 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
18024 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18028 lo = ~lo, hi = ~hi;
18034 mask = immed_double_const (lo, hi, imode);
18036 vec = gen_rtvec (2, v, mask);
18037 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
18038 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
18045 gcc_unreachable ();
18049 lo = ~lo, hi = ~hi;
18051 /* Force this value into the low part of a fp vector constant. */
18052 mask = immed_double_const (lo, hi, imode);
18053 mask = gen_lowpart (mode, mask);
18055 if (vec_mode == VOIDmode)
18056 return force_reg (mode, mask);
18058 v = ix86_build_const_vector (vec_mode, vect, mask);
18059 return force_reg (vec_mode, v);
18062 /* Generate code for floating point ABS or NEG. */
18065 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
18068 rtx mask, set, dst, src;
18069 bool use_sse = false;
18070 bool vector_mode = VECTOR_MODE_P (mode);
18071 enum machine_mode vmode = mode;
18075 else if (mode == TFmode)
18077 else if (TARGET_SSE_MATH)
18079 use_sse = SSE_FLOAT_MODE_P (mode);
18080 if (mode == SFmode)
18082 else if (mode == DFmode)
18086 /* NEG and ABS performed with SSE use bitwise mask operations.
18087 Create the appropriate mask now. */
18089 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
18096 set = gen_rtx_fmt_e (code, mode, src);
18097 set = gen_rtx_SET (VOIDmode, dst, set);
18104 use = gen_rtx_USE (VOIDmode, mask);
18106 par = gen_rtvec (2, set, use);
18109 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18110 par = gen_rtvec (3, set, use, clob);
18112 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
18118 /* Expand a copysign operation. Special case operand 0 being a constant. */
18121 ix86_expand_copysign (rtx operands[])
18123 enum machine_mode mode, vmode;
18124 rtx dest, op0, op1, mask, nmask;
18126 dest = operands[0];
18130 mode = GET_MODE (dest);
18132 if (mode == SFmode)
18134 else if (mode == DFmode)
18139 if (GET_CODE (op0) == CONST_DOUBLE)
18141 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
18143 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
18144 op0 = simplify_unary_operation (ABS, mode, op0, mode);
18146 if (mode == SFmode || mode == DFmode)
18148 if (op0 == CONST0_RTX (mode))
18149 op0 = CONST0_RTX (vmode);
18152 rtx v = ix86_build_const_vector (vmode, false, op0);
18154 op0 = force_reg (vmode, v);
18157 else if (op0 != CONST0_RTX (mode))
18158 op0 = force_reg (mode, op0);
18160 mask = ix86_build_signbit_mask (vmode, 0, 0);
18162 if (mode == SFmode)
18163 copysign_insn = gen_copysignsf3_const;
18164 else if (mode == DFmode)
18165 copysign_insn = gen_copysigndf3_const;
18167 copysign_insn = gen_copysigntf3_const;
18169 emit_insn (copysign_insn (dest, op0, op1, mask));
18173 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
18175 nmask = ix86_build_signbit_mask (vmode, 0, 1);
18176 mask = ix86_build_signbit_mask (vmode, 0, 0);
18178 if (mode == SFmode)
18179 copysign_insn = gen_copysignsf3_var;
18180 else if (mode == DFmode)
18181 copysign_insn = gen_copysigndf3_var;
18183 copysign_insn = gen_copysigntf3_var;
18185 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
18189 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
18190 be a constant, and so has already been expanded into a vector constant. */
18193 ix86_split_copysign_const (rtx operands[])
18195 enum machine_mode mode, vmode;
18196 rtx dest, op0, mask, x;
18198 dest = operands[0];
18200 mask = operands[3];
18202 mode = GET_MODE (dest);
18203 vmode = GET_MODE (mask);
18205 dest = simplify_gen_subreg (vmode, dest, mode, 0);
18206 x = gen_rtx_AND (vmode, dest, mask);
18207 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18209 if (op0 != CONST0_RTX (vmode))
18211 x = gen_rtx_IOR (vmode, dest, op0);
18212 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18216 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
18217 so we have to do two masks. */
18220 ix86_split_copysign_var (rtx operands[])
18222 enum machine_mode mode, vmode;
18223 rtx dest, scratch, op0, op1, mask, nmask, x;
18225 dest = operands[0];
18226 scratch = operands[1];
18229 nmask = operands[4];
18230 mask = operands[5];
18232 mode = GET_MODE (dest);
18233 vmode = GET_MODE (mask);
18235 if (rtx_equal_p (op0, op1))
18237 /* Shouldn't happen often (it's useless, obviously), but when it does
18238 we'd generate incorrect code if we continue below. */
18239 emit_move_insn (dest, op0);
18243 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
18245 gcc_assert (REGNO (op1) == REGNO (scratch));
18247 x = gen_rtx_AND (vmode, scratch, mask);
18248 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
18251 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
18252 x = gen_rtx_NOT (vmode, dest);
18253 x = gen_rtx_AND (vmode, x, op0);
18254 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18258 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
18260 x = gen_rtx_AND (vmode, scratch, mask);
18262 else /* alternative 2,4 */
18264 gcc_assert (REGNO (mask) == REGNO (scratch));
18265 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
18266 x = gen_rtx_AND (vmode, scratch, op1);
18268 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
18270 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
18272 dest = simplify_gen_subreg (vmode, op0, mode, 0);
18273 x = gen_rtx_AND (vmode, dest, nmask);
18275 else /* alternative 3,4 */
18277 gcc_assert (REGNO (nmask) == REGNO (dest));
18279 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
18280 x = gen_rtx_AND (vmode, dest, op0);
18282 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18285 x = gen_rtx_IOR (vmode, dest, scratch);
18286 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18289 /* Return TRUE or FALSE depending on whether the first SET in INSN
18290 has source and destination with matching CC modes, and that the
18291 CC mode is at least as constrained as REQ_MODE. */
18294 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
18297 enum machine_mode set_mode;
18299 set = PATTERN (insn);
18300 if (GET_CODE (set) == PARALLEL)
18301 set = XVECEXP (set, 0, 0);
18302 gcc_assert (GET_CODE (set) == SET);
18303 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
18305 set_mode = GET_MODE (SET_DEST (set));
18309 if (req_mode != CCNOmode
18310 && (req_mode != CCmode
18311 || XEXP (SET_SRC (set), 1) != const0_rtx))
18315 if (req_mode == CCGCmode)
18319 if (req_mode == CCGOCmode || req_mode == CCNOmode)
18323 if (req_mode == CCZmode)
18333 if (set_mode != req_mode)
18338 gcc_unreachable ();
18341 return GET_MODE (SET_SRC (set)) == set_mode;
18344 /* Generate insn patterns to do an integer compare of OPERANDS. */
18347 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
18349 enum machine_mode cmpmode;
18352 cmpmode = SELECT_CC_MODE (code, op0, op1);
18353 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
18355 /* This is very simple, but making the interface the same as in the
18356 FP case makes the rest of the code easier. */
18357 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
18358 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
18360 /* Return the test that should be put into the flags user, i.e.
18361 the bcc, scc, or cmov instruction. */
18362 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
18365 /* Figure out whether to use ordered or unordered fp comparisons.
18366 Return the appropriate mode to use. */
18369 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
18371 /* ??? In order to make all comparisons reversible, we do all comparisons
18372 non-trapping when compiling for IEEE. Once gcc is able to distinguish
18373 all forms trapping and nontrapping comparisons, we can make inequality
18374 comparisons trapping again, since it results in better code when using
18375 FCOM based compares. */
18376 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
18380 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
18382 enum machine_mode mode = GET_MODE (op0);
18384 if (SCALAR_FLOAT_MODE_P (mode))
18386 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
18387 return ix86_fp_compare_mode (code);
18392 /* Only zero flag is needed. */
18393 case EQ: /* ZF=0 */
18394 case NE: /* ZF!=0 */
18396 /* Codes needing carry flag. */
18397 case GEU: /* CF=0 */
18398 case LTU: /* CF=1 */
18399 /* Detect overflow checks. They need just the carry flag. */
18400 if (GET_CODE (op0) == PLUS
18401 && rtx_equal_p (op1, XEXP (op0, 0)))
18405 case GTU: /* CF=0 & ZF=0 */
18406 case LEU: /* CF=1 | ZF=1 */
18407 /* Detect overflow checks. They need just the carry flag. */
18408 if (GET_CODE (op0) == MINUS
18409 && rtx_equal_p (op1, XEXP (op0, 0)))
18413 /* Codes possibly doable only with sign flag when
18414 comparing against zero. */
18415 case GE: /* SF=OF or SF=0 */
18416 case LT: /* SF<>OF or SF=1 */
18417 if (op1 == const0_rtx)
18420 /* For other cases Carry flag is not required. */
18422 /* Codes doable only with sign flag when comparing
18423 against zero, but we miss jump instruction for it
18424 so we need to use relational tests against overflow
18425 that thus needs to be zero. */
18426 case GT: /* ZF=0 & SF=OF */
18427 case LE: /* ZF=1 | SF<>OF */
18428 if (op1 == const0_rtx)
18432 /* strcmp pattern do (use flags) and combine may ask us for proper
18437 gcc_unreachable ();
18441 /* Return the fixed registers used for condition codes. */
18444 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
18451 /* If two condition code modes are compatible, return a condition code
18452 mode which is compatible with both. Otherwise, return
18455 static enum machine_mode
18456 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
18461 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
18464 if ((m1 == CCGCmode && m2 == CCGOCmode)
18465 || (m1 == CCGOCmode && m2 == CCGCmode))
18468 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
18470 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
18476 gcc_unreachable ();
18506 /* These are only compatible with themselves, which we already
18513 /* Return a comparison we can do and that it is equivalent to
18514 swap_condition (code) apart possibly from orderedness.
18515 But, never change orderedness if TARGET_IEEE_FP, returning
18516 UNKNOWN in that case if necessary. */
18518 static enum rtx_code
18519 ix86_fp_swap_condition (enum rtx_code code)
18523 case GT: /* GTU - CF=0 & ZF=0 */
18524 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
18525 case GE: /* GEU - CF=0 */
18526 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
18527 case UNLT: /* LTU - CF=1 */
18528 return TARGET_IEEE_FP ? UNKNOWN : GT;
18529 case UNLE: /* LEU - CF=1 | ZF=1 */
18530 return TARGET_IEEE_FP ? UNKNOWN : GE;
18532 return swap_condition (code);
18536 /* Return cost of comparison CODE using the best strategy for performance.
18537 All following functions do use number of instructions as a cost metrics.
18538 In future this should be tweaked to compute bytes for optimize_size and
18539 take into account performance of various instructions on various CPUs. */
18542 ix86_fp_comparison_cost (enum rtx_code code)
18546 /* The cost of code using bit-twiddling on %ah. */
18563 arith_cost = TARGET_IEEE_FP ? 5 : 4;
18567 arith_cost = TARGET_IEEE_FP ? 6 : 4;
18570 gcc_unreachable ();
18573 switch (ix86_fp_comparison_strategy (code))
18575 case IX86_FPCMP_COMI:
18576 return arith_cost > 4 ? 3 : 2;
18577 case IX86_FPCMP_SAHF:
18578 return arith_cost > 4 ? 4 : 3;
18584 /* Return strategy to use for floating-point. We assume that fcomi is always
18585 preferrable where available, since that is also true when looking at size
18586 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18588 enum ix86_fpcmp_strategy
18589 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
18591 /* Do fcomi/sahf based test when profitable. */
18594 return IX86_FPCMP_COMI;
18596 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
18597 return IX86_FPCMP_SAHF;
18599 return IX86_FPCMP_ARITH;
18602 /* Swap, force into registers, or otherwise massage the two operands
18603 to a fp comparison. The operands are updated in place; the new
18604 comparison code is returned. */
18606 static enum rtx_code
18607 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
18609 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
18610 rtx op0 = *pop0, op1 = *pop1;
18611 enum machine_mode op_mode = GET_MODE (op0);
18612 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
18614 /* All of the unordered compare instructions only work on registers.
18615 The same is true of the fcomi compare instructions. The XFmode
18616 compare instructions require registers except when comparing
18617 against zero or when converting operand 1 from fixed point to
18621 && (fpcmp_mode == CCFPUmode
18622 || (op_mode == XFmode
18623 && ! (standard_80387_constant_p (op0) == 1
18624 || standard_80387_constant_p (op1) == 1)
18625 && GET_CODE (op1) != FLOAT)
18626 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
18628 op0 = force_reg (op_mode, op0);
18629 op1 = force_reg (op_mode, op1);
18633 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18634 things around if they appear profitable, otherwise force op0
18635 into a register. */
18637 if (standard_80387_constant_p (op0) == 0
18639 && ! (standard_80387_constant_p (op1) == 0
18642 enum rtx_code new_code = ix86_fp_swap_condition (code);
18643 if (new_code != UNKNOWN)
18646 tmp = op0, op0 = op1, op1 = tmp;
18652 op0 = force_reg (op_mode, op0);
18654 if (CONSTANT_P (op1))
18656 int tmp = standard_80387_constant_p (op1);
18658 op1 = validize_mem (force_const_mem (op_mode, op1));
18662 op1 = force_reg (op_mode, op1);
18665 op1 = force_reg (op_mode, op1);
18669 /* Try to rearrange the comparison to make it cheaper. */
18670 if (ix86_fp_comparison_cost (code)
18671 > ix86_fp_comparison_cost (swap_condition (code))
18672 && (REG_P (op1) || can_create_pseudo_p ()))
18675 tmp = op0, op0 = op1, op1 = tmp;
18676 code = swap_condition (code);
18678 op0 = force_reg (op_mode, op0);
18686 /* Convert comparison codes we use to represent FP comparison to integer
18687 code that will result in proper branch. Return UNKNOWN if no such code
18691 ix86_fp_compare_code_to_integer (enum rtx_code code)
18720 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18723 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
18725 enum machine_mode fpcmp_mode, intcmp_mode;
18728 fpcmp_mode = ix86_fp_compare_mode (code);
18729 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
18731 /* Do fcomi/sahf based test when profitable. */
18732 switch (ix86_fp_comparison_strategy (code))
18734 case IX86_FPCMP_COMI:
18735 intcmp_mode = fpcmp_mode;
18736 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18737 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
18742 case IX86_FPCMP_SAHF:
18743 intcmp_mode = fpcmp_mode;
18744 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18745 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
18749 scratch = gen_reg_rtx (HImode);
18750 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
18751 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
18754 case IX86_FPCMP_ARITH:
18755 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18756 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18757 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
18759 scratch = gen_reg_rtx (HImode);
18760 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
18762 /* In the unordered case, we have to check C2 for NaN's, which
18763 doesn't happen to work out to anything nice combination-wise.
18764 So do some bit twiddling on the value we've got in AH to come
18765 up with an appropriate set of condition codes. */
18767 intcmp_mode = CCNOmode;
18772 if (code == GT || !TARGET_IEEE_FP)
18774 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
18779 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18780 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
18781 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
18782 intcmp_mode = CCmode;
18788 if (code == LT && TARGET_IEEE_FP)
18790 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18791 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
18792 intcmp_mode = CCmode;
18797 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
18803 if (code == GE || !TARGET_IEEE_FP)
18805 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
18810 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18811 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
18817 if (code == LE && TARGET_IEEE_FP)
18819 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18820 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
18821 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
18822 intcmp_mode = CCmode;
18827 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
18833 if (code == EQ && TARGET_IEEE_FP)
18835 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18836 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
18837 intcmp_mode = CCmode;
18842 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
18848 if (code == NE && TARGET_IEEE_FP)
18850 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18851 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
18857 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
18863 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
18867 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
18872 gcc_unreachable ();
18880 /* Return the test that should be put into the flags user, i.e.
18881 the bcc, scc, or cmov instruction. */
18882 return gen_rtx_fmt_ee (code, VOIDmode,
18883 gen_rtx_REG (intcmp_mode, FLAGS_REG),
18888 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
18892 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
18893 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
18895 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
18897 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
18898 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18901 ret = ix86_expand_int_compare (code, op0, op1);
18907 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
18909 enum machine_mode mode = GET_MODE (op0);
18921 tmp = ix86_expand_compare (code, op0, op1);
18922 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18923 gen_rtx_LABEL_REF (VOIDmode, label),
18925 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18932 /* Expand DImode branch into multiple compare+branch. */
18934 rtx lo[2], hi[2], label2;
18935 enum rtx_code code1, code2, code3;
18936 enum machine_mode submode;
18938 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
18940 tmp = op0, op0 = op1, op1 = tmp;
18941 code = swap_condition (code);
18944 split_double_mode (mode, &op0, 1, lo+0, hi+0);
18945 split_double_mode (mode, &op1, 1, lo+1, hi+1);
18947 submode = mode == DImode ? SImode : DImode;
18949 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18950 avoid two branches. This costs one extra insn, so disable when
18951 optimizing for size. */
18953 if ((code == EQ || code == NE)
18954 && (!optimize_insn_for_size_p ()
18955 || hi[1] == const0_rtx || lo[1] == const0_rtx))
18960 if (hi[1] != const0_rtx)
18961 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
18962 NULL_RTX, 0, OPTAB_WIDEN);
18965 if (lo[1] != const0_rtx)
18966 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
18967 NULL_RTX, 0, OPTAB_WIDEN);
18969 tmp = expand_binop (submode, ior_optab, xor1, xor0,
18970 NULL_RTX, 0, OPTAB_WIDEN);
18972 ix86_expand_branch (code, tmp, const0_rtx, label);
18976 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18977 op1 is a constant and the low word is zero, then we can just
18978 examine the high word. Similarly for low word -1 and
18979 less-or-equal-than or greater-than. */
18981 if (CONST_INT_P (hi[1]))
18984 case LT: case LTU: case GE: case GEU:
18985 if (lo[1] == const0_rtx)
18987 ix86_expand_branch (code, hi[0], hi[1], label);
18991 case LE: case LEU: case GT: case GTU:
18992 if (lo[1] == constm1_rtx)
18994 ix86_expand_branch (code, hi[0], hi[1], label);
19002 /* Otherwise, we need two or three jumps. */
19004 label2 = gen_label_rtx ();
19007 code2 = swap_condition (code);
19008 code3 = unsigned_condition (code);
19012 case LT: case GT: case LTU: case GTU:
19015 case LE: code1 = LT; code2 = GT; break;
19016 case GE: code1 = GT; code2 = LT; break;
19017 case LEU: code1 = LTU; code2 = GTU; break;
19018 case GEU: code1 = GTU; code2 = LTU; break;
19020 case EQ: code1 = UNKNOWN; code2 = NE; break;
19021 case NE: code2 = UNKNOWN; break;
19024 gcc_unreachable ();
19029 * if (hi(a) < hi(b)) goto true;
19030 * if (hi(a) > hi(b)) goto false;
19031 * if (lo(a) < lo(b)) goto true;
19035 if (code1 != UNKNOWN)
19036 ix86_expand_branch (code1, hi[0], hi[1], label);
19037 if (code2 != UNKNOWN)
19038 ix86_expand_branch (code2, hi[0], hi[1], label2);
19040 ix86_expand_branch (code3, lo[0], lo[1], label);
19042 if (code2 != UNKNOWN)
19043 emit_label (label2);
19048 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
19053 /* Split branch based on floating point condition. */
19055 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
19056 rtx target1, rtx target2, rtx tmp, rtx pushed)
19061 if (target2 != pc_rtx)
19064 code = reverse_condition_maybe_unordered (code);
19069 condition = ix86_expand_fp_compare (code, op1, op2,
19072 /* Remove pushed operand from stack. */
19074 ix86_free_from_memory (GET_MODE (pushed));
19076 i = emit_jump_insn (gen_rtx_SET
19078 gen_rtx_IF_THEN_ELSE (VOIDmode,
19079 condition, target1, target2)));
19080 if (split_branch_probability >= 0)
19081 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
19085 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
19089 gcc_assert (GET_MODE (dest) == QImode);
19091 ret = ix86_expand_compare (code, op0, op1);
19092 PUT_MODE (ret, QImode);
19093 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
19096 /* Expand comparison setting or clearing carry flag. Return true when
19097 successful and set pop for the operation. */
19099 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
19101 enum machine_mode mode =
19102 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
19104 /* Do not handle double-mode compares that go through special path. */
19105 if (mode == (TARGET_64BIT ? TImode : DImode))
19108 if (SCALAR_FLOAT_MODE_P (mode))
19110 rtx compare_op, compare_seq;
19112 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19114 /* Shortcut: following common codes never translate
19115 into carry flag compares. */
19116 if (code == EQ || code == NE || code == UNEQ || code == LTGT
19117 || code == ORDERED || code == UNORDERED)
19120 /* These comparisons require zero flag; swap operands so they won't. */
19121 if ((code == GT || code == UNLE || code == LE || code == UNGT)
19122 && !TARGET_IEEE_FP)
19127 code = swap_condition (code);
19130 /* Try to expand the comparison and verify that we end up with
19131 carry flag based comparison. This fails to be true only when
19132 we decide to expand comparison using arithmetic that is not
19133 too common scenario. */
19135 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19136 compare_seq = get_insns ();
19139 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
19140 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
19141 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
19143 code = GET_CODE (compare_op);
19145 if (code != LTU && code != GEU)
19148 emit_insn (compare_seq);
19153 if (!INTEGRAL_MODE_P (mode))
19162 /* Convert a==0 into (unsigned)a<1. */
19165 if (op1 != const0_rtx)
19168 code = (code == EQ ? LTU : GEU);
19171 /* Convert a>b into b<a or a>=b-1. */
19174 if (CONST_INT_P (op1))
19176 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
19177 /* Bail out on overflow. We still can swap operands but that
19178 would force loading of the constant into register. */
19179 if (op1 == const0_rtx
19180 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
19182 code = (code == GTU ? GEU : LTU);
19189 code = (code == GTU ? LTU : GEU);
19193 /* Convert a>=0 into (unsigned)a<0x80000000. */
19196 if (mode == DImode || op1 != const0_rtx)
19198 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
19199 code = (code == LT ? GEU : LTU);
19203 if (mode == DImode || op1 != constm1_rtx)
19205 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
19206 code = (code == LE ? GEU : LTU);
19212 /* Swapping operands may cause constant to appear as first operand. */
19213 if (!nonimmediate_operand (op0, VOIDmode))
19215 if (!can_create_pseudo_p ())
19217 op0 = force_reg (mode, op0);
19219 *pop = ix86_expand_compare (code, op0, op1);
19220 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
19225 ix86_expand_int_movcc (rtx operands[])
19227 enum rtx_code code = GET_CODE (operands[1]), compare_code;
19228 rtx compare_seq, compare_op;
19229 enum machine_mode mode = GET_MODE (operands[0]);
19230 bool sign_bit_compare_p = false;
19231 rtx op0 = XEXP (operands[1], 0);
19232 rtx op1 = XEXP (operands[1], 1);
19234 if (GET_MODE (op0) == TImode
19235 || (GET_MODE (op0) == DImode
19240 compare_op = ix86_expand_compare (code, op0, op1);
19241 compare_seq = get_insns ();
19244 compare_code = GET_CODE (compare_op);
19246 if ((op1 == const0_rtx && (code == GE || code == LT))
19247 || (op1 == constm1_rtx && (code == GT || code == LE)))
19248 sign_bit_compare_p = true;
19250 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
19251 HImode insns, we'd be swallowed in word prefix ops. */
19253 if ((mode != HImode || TARGET_FAST_PREFIX)
19254 && (mode != (TARGET_64BIT ? TImode : DImode))
19255 && CONST_INT_P (operands[2])
19256 && CONST_INT_P (operands[3]))
19258 rtx out = operands[0];
19259 HOST_WIDE_INT ct = INTVAL (operands[2]);
19260 HOST_WIDE_INT cf = INTVAL (operands[3]);
19261 HOST_WIDE_INT diff;
19264 /* Sign bit compares are better done using shifts than we do by using
19266 if (sign_bit_compare_p
19267 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
19269 /* Detect overlap between destination and compare sources. */
19272 if (!sign_bit_compare_p)
19275 bool fpcmp = false;
19277 compare_code = GET_CODE (compare_op);
19279 flags = XEXP (compare_op, 0);
19281 if (GET_MODE (flags) == CCFPmode
19282 || GET_MODE (flags) == CCFPUmode)
19286 = ix86_fp_compare_code_to_integer (compare_code);
19289 /* To simplify rest of code, restrict to the GEU case. */
19290 if (compare_code == LTU)
19292 HOST_WIDE_INT tmp = ct;
19295 compare_code = reverse_condition (compare_code);
19296 code = reverse_condition (code);
19301 PUT_CODE (compare_op,
19302 reverse_condition_maybe_unordered
19303 (GET_CODE (compare_op)));
19305 PUT_CODE (compare_op,
19306 reverse_condition (GET_CODE (compare_op)));
19310 if (reg_overlap_mentioned_p (out, op0)
19311 || reg_overlap_mentioned_p (out, op1))
19312 tmp = gen_reg_rtx (mode);
19314 if (mode == DImode)
19315 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
19317 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
19318 flags, compare_op));
19322 if (code == GT || code == GE)
19323 code = reverse_condition (code);
19326 HOST_WIDE_INT tmp = ct;
19331 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
19344 tmp = expand_simple_binop (mode, PLUS,
19346 copy_rtx (tmp), 1, OPTAB_DIRECT);
19357 tmp = expand_simple_binop (mode, IOR,
19359 copy_rtx (tmp), 1, OPTAB_DIRECT);
19361 else if (diff == -1 && ct)
19371 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
19373 tmp = expand_simple_binop (mode, PLUS,
19374 copy_rtx (tmp), GEN_INT (cf),
19375 copy_rtx (tmp), 1, OPTAB_DIRECT);
19383 * andl cf - ct, dest
19393 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
19396 tmp = expand_simple_binop (mode, AND,
19398 gen_int_mode (cf - ct, mode),
19399 copy_rtx (tmp), 1, OPTAB_DIRECT);
19401 tmp = expand_simple_binop (mode, PLUS,
19402 copy_rtx (tmp), GEN_INT (ct),
19403 copy_rtx (tmp), 1, OPTAB_DIRECT);
19406 if (!rtx_equal_p (tmp, out))
19407 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
19414 enum machine_mode cmp_mode = GET_MODE (op0);
19417 tmp = ct, ct = cf, cf = tmp;
19420 if (SCALAR_FLOAT_MODE_P (cmp_mode))
19422 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
19424 /* We may be reversing unordered compare to normal compare, that
19425 is not valid in general (we may convert non-trapping condition
19426 to trapping one), however on i386 we currently emit all
19427 comparisons unordered. */
19428 compare_code = reverse_condition_maybe_unordered (compare_code);
19429 code = reverse_condition_maybe_unordered (code);
19433 compare_code = reverse_condition (compare_code);
19434 code = reverse_condition (code);
19438 compare_code = UNKNOWN;
19439 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
19440 && CONST_INT_P (op1))
19442 if (op1 == const0_rtx
19443 && (code == LT || code == GE))
19444 compare_code = code;
19445 else if (op1 == constm1_rtx)
19449 else if (code == GT)
19454 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19455 if (compare_code != UNKNOWN
19456 && GET_MODE (op0) == GET_MODE (out)
19457 && (cf == -1 || ct == -1))
19459 /* If lea code below could be used, only optimize
19460 if it results in a 2 insn sequence. */
19462 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
19463 || diff == 3 || diff == 5 || diff == 9)
19464 || (compare_code == LT && ct == -1)
19465 || (compare_code == GE && cf == -1))
19468 * notl op1 (if necessary)
19476 code = reverse_condition (code);
19479 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
19481 out = expand_simple_binop (mode, IOR,
19483 out, 1, OPTAB_DIRECT);
19484 if (out != operands[0])
19485 emit_move_insn (operands[0], out);
19492 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
19493 || diff == 3 || diff == 5 || diff == 9)
19494 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
19496 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
19502 * lea cf(dest*(ct-cf)),dest
19506 * This also catches the degenerate setcc-only case.
19512 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
19515 /* On x86_64 the lea instruction operates on Pmode, so we need
19516 to get arithmetics done in proper mode to match. */
19518 tmp = copy_rtx (out);
19522 out1 = copy_rtx (out);
19523 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
19527 tmp = gen_rtx_PLUS (mode, tmp, out1);
19533 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
19536 if (!rtx_equal_p (tmp, out))
19539 out = force_operand (tmp, copy_rtx (out));
19541 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
19543 if (!rtx_equal_p (out, operands[0]))
19544 emit_move_insn (operands[0], copy_rtx (out));
19550 * General case: Jumpful:
19551 * xorl dest,dest cmpl op1, op2
19552 * cmpl op1, op2 movl ct, dest
19553 * setcc dest jcc 1f
19554 * decl dest movl cf, dest
19555 * andl (cf-ct),dest 1:
19558 * Size 20. Size 14.
19560 * This is reasonably steep, but branch mispredict costs are
19561 * high on modern cpus, so consider failing only if optimizing
19565 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
19566 && BRANCH_COST (optimize_insn_for_speed_p (),
19571 enum machine_mode cmp_mode = GET_MODE (op0);
19576 if (SCALAR_FLOAT_MODE_P (cmp_mode))
19578 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
19580 /* We may be reversing unordered compare to normal compare,
19581 that is not valid in general (we may convert non-trapping
19582 condition to trapping one), however on i386 we currently
19583 emit all comparisons unordered. */
19584 code = reverse_condition_maybe_unordered (code);
19588 code = reverse_condition (code);
19589 if (compare_code != UNKNOWN)
19590 compare_code = reverse_condition (compare_code);
19594 if (compare_code != UNKNOWN)
19596 /* notl op1 (if needed)
19601 For x < 0 (resp. x <= -1) there will be no notl,
19602 so if possible swap the constants to get rid of the
19604 True/false will be -1/0 while code below (store flag
19605 followed by decrement) is 0/-1, so the constants need
19606 to be exchanged once more. */
19608 if (compare_code == GE || !cf)
19610 code = reverse_condition (code);
19615 HOST_WIDE_INT tmp = cf;
19620 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
19624 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
19626 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
19628 copy_rtx (out), 1, OPTAB_DIRECT);
19631 out = expand_simple_binop (mode, AND, copy_rtx (out),
19632 gen_int_mode (cf - ct, mode),
19633 copy_rtx (out), 1, OPTAB_DIRECT);
19635 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
19636 copy_rtx (out), 1, OPTAB_DIRECT);
19637 if (!rtx_equal_p (out, operands[0]))
19638 emit_move_insn (operands[0], copy_rtx (out));
19644 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
19646 /* Try a few things more with specific constants and a variable. */
19649 rtx var, orig_out, out, tmp;
19651 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19654 /* If one of the two operands is an interesting constant, load a
19655 constant with the above and mask it in with a logical operation. */
19657 if (CONST_INT_P (operands[2]))
19660 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
19661 operands[3] = constm1_rtx, op = and_optab;
19662 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
19663 operands[3] = const0_rtx, op = ior_optab;
19667 else if (CONST_INT_P (operands[3]))
19670 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
19671 operands[2] = constm1_rtx, op = and_optab;
19672 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
19673 operands[2] = const0_rtx, op = ior_optab;
19680 orig_out = operands[0];
19681 tmp = gen_reg_rtx (mode);
19684 /* Recurse to get the constant loaded. */
19685 if (ix86_expand_int_movcc (operands) == 0)
19688 /* Mask in the interesting variable. */
19689 out = expand_binop (mode, op, var, tmp, orig_out, 0,
19691 if (!rtx_equal_p (out, orig_out))
19692 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
19698 * For comparison with above,
19708 if (! nonimmediate_operand (operands[2], mode))
19709 operands[2] = force_reg (mode, operands[2]);
19710 if (! nonimmediate_operand (operands[3], mode))
19711 operands[3] = force_reg (mode, operands[3]);
19713 if (! register_operand (operands[2], VOIDmode)
19715 || ! register_operand (operands[3], VOIDmode)))
19716 operands[2] = force_reg (mode, operands[2]);
19719 && ! register_operand (operands[3], VOIDmode))
19720 operands[3] = force_reg (mode, operands[3]);
19722 emit_insn (compare_seq);
19723 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
19724 gen_rtx_IF_THEN_ELSE (mode,
19725 compare_op, operands[2],
19730 /* Swap, force into registers, or otherwise massage the two operands
19731 to an sse comparison with a mask result. Thus we differ a bit from
19732 ix86_prepare_fp_compare_args which expects to produce a flags result.
19734 The DEST operand exists to help determine whether to commute commutative
19735 operators. The POP0/POP1 operands are updated in place. The new
19736 comparison code is returned, or UNKNOWN if not implementable. */
19738 static enum rtx_code
19739 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
19740 rtx *pop0, rtx *pop1)
19748 /* AVX supports all the needed comparisons. */
19751 /* We have no LTGT as an operator. We could implement it with
19752 NE & ORDERED, but this requires an extra temporary. It's
19753 not clear that it's worth it. */
19760 /* These are supported directly. */
19767 /* AVX has 3 operand comparisons, no need to swap anything. */
19770 /* For commutative operators, try to canonicalize the destination
19771 operand to be first in the comparison - this helps reload to
19772 avoid extra moves. */
19773 if (!dest || !rtx_equal_p (dest, *pop1))
19781 /* These are not supported directly before AVX, and furthermore
19782 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19783 comparison operands to transform into something that is
19788 code = swap_condition (code);
19792 gcc_unreachable ();
19798 /* Detect conditional moves that exactly match min/max operational
19799 semantics. Note that this is IEEE safe, as long as we don't
19800 interchange the operands.
19802 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19803 and TRUE if the operation is successful and instructions are emitted. */
19806 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
19807 rtx cmp_op1, rtx if_true, rtx if_false)
19809 enum machine_mode mode;
19815 else if (code == UNGE)
19818 if_true = if_false;
19824 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
19826 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
19831 mode = GET_MODE (dest);
19833 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19834 but MODE may be a vector mode and thus not appropriate. */
19835 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
19837 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
19840 if_true = force_reg (mode, if_true);
19841 v = gen_rtvec (2, if_true, if_false);
19842 tmp = gen_rtx_UNSPEC (mode, v, u);
19846 code = is_min ? SMIN : SMAX;
19847 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
19850 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
19854 /* Expand an sse vector comparison. Return the register with the result. */
19857 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
19858 rtx op_true, rtx op_false)
19860 enum machine_mode mode = GET_MODE (dest);
19861 enum machine_mode cmp_mode = GET_MODE (cmp_op0);
19864 cmp_op0 = force_reg (cmp_mode, cmp_op0);
19865 if (!nonimmediate_operand (cmp_op1, cmp_mode))
19866 cmp_op1 = force_reg (cmp_mode, cmp_op1);
19869 || reg_overlap_mentioned_p (dest, op_true)
19870 || reg_overlap_mentioned_p (dest, op_false))
19871 dest = gen_reg_rtx (mode);
19873 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
19874 if (cmp_mode != mode)
19876 x = force_reg (cmp_mode, x);
19877 convert_move (dest, x, false);
19880 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19885 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19886 operations. This is used for both scalar and vector conditional moves. */
19889 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
19891 enum machine_mode mode = GET_MODE (dest);
19894 if (vector_all_ones_operand (op_true, mode)
19895 && rtx_equal_p (op_false, CONST0_RTX (mode)))
19897 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
19899 else if (op_false == CONST0_RTX (mode))
19901 op_true = force_reg (mode, op_true);
19902 x = gen_rtx_AND (mode, cmp, op_true);
19903 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19905 else if (op_true == CONST0_RTX (mode))
19907 op_false = force_reg (mode, op_false);
19908 x = gen_rtx_NOT (mode, cmp);
19909 x = gen_rtx_AND (mode, x, op_false);
19910 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19912 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode))
19914 op_false = force_reg (mode, op_false);
19915 x = gen_rtx_IOR (mode, cmp, op_false);
19916 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19918 else if (TARGET_XOP)
19920 op_true = force_reg (mode, op_true);
19922 if (!nonimmediate_operand (op_false, mode))
19923 op_false = force_reg (mode, op_false);
19925 emit_insn (gen_rtx_SET (mode, dest,
19926 gen_rtx_IF_THEN_ELSE (mode, cmp,
19932 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
19934 if (!nonimmediate_operand (op_true, mode))
19935 op_true = force_reg (mode, op_true);
19937 op_false = force_reg (mode, op_false);
19943 gen = gen_sse4_1_blendvps;
19947 gen = gen_sse4_1_blendvpd;
19955 gen = gen_sse4_1_pblendvb;
19956 dest = gen_lowpart (V16QImode, dest);
19957 op_false = gen_lowpart (V16QImode, op_false);
19958 op_true = gen_lowpart (V16QImode, op_true);
19959 cmp = gen_lowpart (V16QImode, cmp);
19964 gen = gen_avx_blendvps256;
19968 gen = gen_avx_blendvpd256;
19976 gen = gen_avx2_pblendvb;
19977 dest = gen_lowpart (V32QImode, dest);
19978 op_false = gen_lowpart (V32QImode, op_false);
19979 op_true = gen_lowpart (V32QImode, op_true);
19980 cmp = gen_lowpart (V32QImode, cmp);
19988 emit_insn (gen (dest, op_false, op_true, cmp));
19991 op_true = force_reg (mode, op_true);
19993 t2 = gen_reg_rtx (mode);
19995 t3 = gen_reg_rtx (mode);
19999 x = gen_rtx_AND (mode, op_true, cmp);
20000 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
20002 x = gen_rtx_NOT (mode, cmp);
20003 x = gen_rtx_AND (mode, x, op_false);
20004 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
20006 x = gen_rtx_IOR (mode, t3, t2);
20007 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20012 /* Expand a floating-point conditional move. Return true if successful. */
20015 ix86_expand_fp_movcc (rtx operands[])
20017 enum machine_mode mode = GET_MODE (operands[0]);
20018 enum rtx_code code = GET_CODE (operands[1]);
20019 rtx tmp, compare_op;
20020 rtx op0 = XEXP (operands[1], 0);
20021 rtx op1 = XEXP (operands[1], 1);
20023 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
20025 enum machine_mode cmode;
20027 /* Since we've no cmove for sse registers, don't force bad register
20028 allocation just to gain access to it. Deny movcc when the
20029 comparison mode doesn't match the move mode. */
20030 cmode = GET_MODE (op0);
20031 if (cmode == VOIDmode)
20032 cmode = GET_MODE (op1);
20036 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
20037 if (code == UNKNOWN)
20040 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
20041 operands[2], operands[3]))
20044 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
20045 operands[2], operands[3]);
20046 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
20050 /* The floating point conditional move instructions don't directly
20051 support conditions resulting from a signed integer comparison. */
20053 compare_op = ix86_expand_compare (code, op0, op1);
20054 if (!fcmov_comparison_operator (compare_op, VOIDmode))
20056 tmp = gen_reg_rtx (QImode);
20057 ix86_expand_setcc (tmp, code, op0, op1);
20059 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
20062 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20063 gen_rtx_IF_THEN_ELSE (mode, compare_op,
20064 operands[2], operands[3])));
20069 /* Expand a floating-point vector conditional move; a vcond operation
20070 rather than a movcc operation. */
20073 ix86_expand_fp_vcond (rtx operands[])
20075 enum rtx_code code = GET_CODE (operands[3]);
20078 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
20079 &operands[4], &operands[5]);
20080 if (code == UNKNOWN)
20083 switch (GET_CODE (operands[3]))
20086 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
20087 operands[5], operands[0], operands[0]);
20088 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
20089 operands[5], operands[1], operands[2]);
20093 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
20094 operands[5], operands[0], operands[0]);
20095 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
20096 operands[5], operands[1], operands[2]);
20100 gcc_unreachable ();
20102 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
20104 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
20108 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
20109 operands[5], operands[1], operands[2]))
20112 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
20113 operands[1], operands[2]);
20114 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
20118 /* Expand a signed/unsigned integral vector conditional move. */
20121 ix86_expand_int_vcond (rtx operands[])
20123 enum machine_mode data_mode = GET_MODE (operands[0]);
20124 enum machine_mode mode = GET_MODE (operands[4]);
20125 enum rtx_code code = GET_CODE (operands[3]);
20126 bool negate = false;
20129 cop0 = operands[4];
20130 cop1 = operands[5];
20132 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
20133 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
20134 if ((code == LT || code == GE)
20135 && data_mode == mode
20136 && cop1 == CONST0_RTX (mode)
20137 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
20138 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
20139 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
20140 && (GET_MODE_SIZE (data_mode) == 16
20141 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
20143 rtx negop = operands[2 - (code == LT)];
20144 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
20145 if (negop == CONST1_RTX (data_mode))
20147 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
20148 operands[0], 1, OPTAB_DIRECT);
20149 if (res != operands[0])
20150 emit_move_insn (operands[0], res);
20153 else if (GET_MODE_INNER (data_mode) != DImode
20154 && vector_all_ones_operand (negop, data_mode))
20156 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
20157 operands[0], 0, OPTAB_DIRECT);
20158 if (res != operands[0])
20159 emit_move_insn (operands[0], res);
20164 if (!nonimmediate_operand (cop1, mode))
20165 cop1 = force_reg (mode, cop1);
20166 if (!general_operand (operands[1], data_mode))
20167 operands[1] = force_reg (data_mode, operands[1]);
20168 if (!general_operand (operands[2], data_mode))
20169 operands[2] = force_reg (data_mode, operands[2]);
20171 /* XOP supports all of the comparisons on all 128-bit vector int types. */
20173 && (mode == V16QImode || mode == V8HImode
20174 || mode == V4SImode || mode == V2DImode))
20178 /* Canonicalize the comparison to EQ, GT, GTU. */
20189 code = reverse_condition (code);
20195 code = reverse_condition (code);
20201 code = swap_condition (code);
20202 x = cop0, cop0 = cop1, cop1 = x;
20206 gcc_unreachable ();
20209 /* Only SSE4.1/SSE4.2 supports V2DImode. */
20210 if (mode == V2DImode)
20215 /* SSE4.1 supports EQ. */
20216 if (!TARGET_SSE4_1)
20222 /* SSE4.2 supports GT/GTU. */
20223 if (!TARGET_SSE4_2)
20228 gcc_unreachable ();
20232 /* Unsigned parallel compare is not supported by the hardware.
20233 Play some tricks to turn this into a signed comparison
20237 cop0 = force_reg (mode, cop0);
20247 rtx (*gen_sub3) (rtx, rtx, rtx);
20251 case V8SImode: gen_sub3 = gen_subv8si3; break;
20252 case V4DImode: gen_sub3 = gen_subv4di3; break;
20253 case V4SImode: gen_sub3 = gen_subv4si3; break;
20254 case V2DImode: gen_sub3 = gen_subv2di3; break;
20256 gcc_unreachable ();
20258 /* Subtract (-(INT MAX) - 1) from both operands to make
20260 mask = ix86_build_signbit_mask (mode, true, false);
20261 t1 = gen_reg_rtx (mode);
20262 emit_insn (gen_sub3 (t1, cop0, mask));
20264 t2 = gen_reg_rtx (mode);
20265 emit_insn (gen_sub3 (t2, cop1, mask));
20277 /* Perform a parallel unsigned saturating subtraction. */
20278 x = gen_reg_rtx (mode);
20279 emit_insn (gen_rtx_SET (VOIDmode, x,
20280 gen_rtx_US_MINUS (mode, cop0, cop1)));
20283 cop1 = CONST0_RTX (mode);
20289 gcc_unreachable ();
20294 /* Allow the comparison to be done in one mode, but the movcc to
20295 happen in another mode. */
20296 if (data_mode == mode)
20298 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
20299 operands[1+negate], operands[2-negate]);
20303 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
20304 x = ix86_expand_sse_cmp (gen_lowpart (mode, operands[0]),
20306 operands[1+negate], operands[2-negate]);
20307 x = gen_lowpart (data_mode, x);
20310 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
20311 operands[2-negate]);
20315 /* Expand a variable vector permutation. */
20318 ix86_expand_vec_perm (rtx operands[])
20320 rtx target = operands[0];
20321 rtx op0 = operands[1];
20322 rtx op1 = operands[2];
20323 rtx mask = operands[3];
20324 rtx t1, t2, t3, t4, vt, vt2, vec[32];
20325 enum machine_mode mode = GET_MODE (op0);
20326 enum machine_mode maskmode = GET_MODE (mask);
20328 bool one_operand_shuffle = rtx_equal_p (op0, op1);
20330 /* Number of elements in the vector. */
20331 w = GET_MODE_NUNITS (mode);
20332 e = GET_MODE_UNIT_SIZE (mode);
20333 gcc_assert (w <= 32);
20337 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
20339 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
20340 an constant shuffle operand. With a tiny bit of effort we can
20341 use VPERMD instead. A re-interpretation stall for V4DFmode is
20342 unfortunate but there's no avoiding it.
20343 Similarly for V16HImode we don't have instructions for variable
20344 shuffling, while for V32QImode we can use after preparing suitable
20345 masks vpshufb; vpshufb; vpermq; vpor. */
20347 if (mode == V16HImode)
20349 maskmode = mode = V32QImode;
20355 maskmode = mode = V8SImode;
20359 t1 = gen_reg_rtx (maskmode);
20361 /* Replicate the low bits of the V4DImode mask into V8SImode:
20363 t1 = { A A B B C C D D }. */
20364 for (i = 0; i < w / 2; ++i)
20365 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
20366 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20367 vt = force_reg (maskmode, vt);
20368 mask = gen_lowpart (maskmode, mask);
20369 if (maskmode == V8SImode)
20370 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
20372 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
20374 /* Multiply the shuffle indicies by two. */
20375 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
20378 /* Add one to the odd shuffle indicies:
20379 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
20380 for (i = 0; i < w / 2; ++i)
20382 vec[i * 2] = const0_rtx;
20383 vec[i * 2 + 1] = const1_rtx;
20385 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20386 vt = force_const_mem (maskmode, vt);
20387 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
20390 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
20391 operands[3] = mask = t1;
20392 target = gen_lowpart (mode, target);
20393 op0 = gen_lowpart (mode, op0);
20394 op1 = gen_lowpart (mode, op1);
20400 /* The VPERMD and VPERMPS instructions already properly ignore
20401 the high bits of the shuffle elements. No need for us to
20402 perform an AND ourselves. */
20403 if (one_operand_shuffle)
20404 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
20407 t1 = gen_reg_rtx (V8SImode);
20408 t2 = gen_reg_rtx (V8SImode);
20409 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
20410 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
20416 mask = gen_lowpart (V8SFmode, mask);
20417 if (one_operand_shuffle)
20418 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
20421 t1 = gen_reg_rtx (V8SFmode);
20422 t2 = gen_reg_rtx (V8SFmode);
20423 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
20424 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
20430 /* By combining the two 128-bit input vectors into one 256-bit
20431 input vector, we can use VPERMD and VPERMPS for the full
20432 two-operand shuffle. */
20433 t1 = gen_reg_rtx (V8SImode);
20434 t2 = gen_reg_rtx (V8SImode);
20435 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
20436 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
20437 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
20438 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
20442 t1 = gen_reg_rtx (V8SFmode);
20443 t2 = gen_reg_rtx (V8SImode);
20444 mask = gen_lowpart (V4SImode, mask);
20445 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
20446 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
20447 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
20448 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
20452 t1 = gen_reg_rtx (V32QImode);
20453 t2 = gen_reg_rtx (V32QImode);
20454 t3 = gen_reg_rtx (V32QImode);
20455 vt2 = GEN_INT (128);
20456 for (i = 0; i < 32; i++)
20458 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
20459 vt = force_reg (V32QImode, vt);
20460 for (i = 0; i < 32; i++)
20461 vec[i] = i < 16 ? vt2 : const0_rtx;
20462 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
20463 vt2 = force_reg (V32QImode, vt2);
20464 /* From mask create two adjusted masks, which contain the same
20465 bits as mask in the low 7 bits of each vector element.
20466 The first mask will have the most significant bit clear
20467 if it requests element from the same 128-bit lane
20468 and MSB set if it requests element from the other 128-bit lane.
20469 The second mask will have the opposite values of the MSB,
20470 and additionally will have its 128-bit lanes swapped.
20471 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20472 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20473 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20474 stands for other 12 bytes. */
20475 /* The bit whether element is from the same lane or the other
20476 lane is bit 4, so shift it up by 3 to the MSB position. */
20477 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode, t1),
20478 gen_lowpart (V4DImode, mask),
20480 /* Clear MSB bits from the mask just in case it had them set. */
20481 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
20482 /* After this t1 will have MSB set for elements from other lane. */
20483 emit_insn (gen_xorv32qi3 (t1, t1, vt2));
20484 /* Clear bits other than MSB. */
20485 emit_insn (gen_andv32qi3 (t1, t1, vt));
20486 /* Or in the lower bits from mask into t3. */
20487 emit_insn (gen_iorv32qi3 (t3, t1, t2));
20488 /* And invert MSB bits in t1, so MSB is set for elements from the same
20490 emit_insn (gen_xorv32qi3 (t1, t1, vt));
20491 /* Swap 128-bit lanes in t3. */
20492 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20493 gen_lowpart (V4DImode, t3),
20494 const2_rtx, GEN_INT (3),
20495 const0_rtx, const1_rtx));
20496 /* And or in the lower bits from mask into t1. */
20497 emit_insn (gen_iorv32qi3 (t1, t1, t2));
20498 if (one_operand_shuffle)
20500 /* Each of these shuffles will put 0s in places where
20501 element from the other 128-bit lane is needed, otherwise
20502 will shuffle in the requested value. */
20503 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0, t3));
20504 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
20505 /* For t3 the 128-bit lanes are swapped again. */
20506 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20507 gen_lowpart (V4DImode, t3),
20508 const2_rtx, GEN_INT (3),
20509 const0_rtx, const1_rtx));
20510 /* And oring both together leads to the result. */
20511 emit_insn (gen_iorv32qi3 (target, t1, t3));
20515 t4 = gen_reg_rtx (V32QImode);
20516 /* Similarly to the above one_operand_shuffle code,
20517 just for repeated twice for each operand. merge_two:
20518 code will merge the two results together. */
20519 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0, t3));
20520 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1, t3));
20521 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
20522 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
20523 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t4),
20524 gen_lowpart (V4DImode, t4),
20525 const2_rtx, GEN_INT (3),
20526 const0_rtx, const1_rtx));
20527 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20528 gen_lowpart (V4DImode, t3),
20529 const2_rtx, GEN_INT (3),
20530 const0_rtx, const1_rtx));
20531 emit_insn (gen_iorv32qi3 (t4, t2, t4));
20532 emit_insn (gen_iorv32qi3 (t3, t1, t3));
20538 gcc_assert (GET_MODE_SIZE (mode) <= 16);
20545 /* The XOP VPPERM insn supports three inputs. By ignoring the
20546 one_operand_shuffle special case, we avoid creating another
20547 set of constant vectors in memory. */
20548 one_operand_shuffle = false;
20550 /* mask = mask & {2*w-1, ...} */
20551 vt = GEN_INT (2*w - 1);
20555 /* mask = mask & {w-1, ...} */
20556 vt = GEN_INT (w - 1);
20559 for (i = 0; i < w; i++)
20561 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20562 mask = expand_simple_binop (maskmode, AND, mask, vt,
20563 NULL_RTX, 0, OPTAB_DIRECT);
20565 /* For non-QImode operations, convert the word permutation control
20566 into a byte permutation control. */
20567 if (mode != V16QImode)
20569 mask = expand_simple_binop (maskmode, ASHIFT, mask,
20570 GEN_INT (exact_log2 (e)),
20571 NULL_RTX, 0, OPTAB_DIRECT);
20573 /* Convert mask to vector of chars. */
20574 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
20576 /* Replicate each of the input bytes into byte positions:
20577 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20578 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20579 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20580 for (i = 0; i < 16; ++i)
20581 vec[i] = GEN_INT (i/e * e);
20582 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
20583 vt = force_const_mem (V16QImode, vt);
20585 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
20587 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
20589 /* Convert it into the byte positions by doing
20590 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20591 for (i = 0; i < 16; ++i)
20592 vec[i] = GEN_INT (i % e);
20593 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
20594 vt = force_const_mem (V16QImode, vt);
20595 emit_insn (gen_addv16qi3 (mask, mask, vt));
20598 /* The actual shuffle operations all operate on V16QImode. */
20599 op0 = gen_lowpart (V16QImode, op0);
20600 op1 = gen_lowpart (V16QImode, op1);
20601 target = gen_lowpart (V16QImode, target);
20605 emit_insn (gen_xop_pperm (target, op0, op1, mask));
20607 else if (one_operand_shuffle)
20609 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
20616 /* Shuffle the two input vectors independently. */
20617 t1 = gen_reg_rtx (V16QImode);
20618 t2 = gen_reg_rtx (V16QImode);
20619 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
20620 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
20623 /* Then merge them together. The key is whether any given control
20624 element contained a bit set that indicates the second word. */
20625 mask = operands[3];
20627 if (maskmode == V2DImode && !TARGET_SSE4_1)
20629 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20630 more shuffle to convert the V2DI input mask into a V4SI
20631 input mask. At which point the masking that expand_int_vcond
20632 will work as desired. */
20633 rtx t3 = gen_reg_rtx (V4SImode);
20634 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
20635 const0_rtx, const0_rtx,
20636 const2_rtx, const2_rtx));
20638 maskmode = V4SImode;
20642 for (i = 0; i < w; i++)
20644 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20645 vt = force_reg (maskmode, vt);
20646 mask = expand_simple_binop (maskmode, AND, mask, vt,
20647 NULL_RTX, 0, OPTAB_DIRECT);
20649 xops[0] = gen_lowpart (mode, operands[0]);
20650 xops[1] = gen_lowpart (mode, t2);
20651 xops[2] = gen_lowpart (mode, t1);
20652 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
20655 ok = ix86_expand_int_vcond (xops);
20660 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20661 true if we should do zero extension, else sign extension. HIGH_P is
20662 true if we want the N/2 high elements, else the low elements. */
20665 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
20667 enum machine_mode imode = GET_MODE (src);
20672 rtx (*unpack)(rtx, rtx);
20673 rtx (*extract)(rtx, rtx) = NULL;
20674 enum machine_mode halfmode = BLKmode;
20680 unpack = gen_avx2_zero_extendv16qiv16hi2;
20682 unpack = gen_avx2_sign_extendv16qiv16hi2;
20683 halfmode = V16QImode;
20685 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
20689 unpack = gen_avx2_zero_extendv8hiv8si2;
20691 unpack = gen_avx2_sign_extendv8hiv8si2;
20692 halfmode = V8HImode;
20694 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
20698 unpack = gen_avx2_zero_extendv4siv4di2;
20700 unpack = gen_avx2_sign_extendv4siv4di2;
20701 halfmode = V4SImode;
20703 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
20707 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
20709 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
20713 unpack = gen_sse4_1_zero_extendv4hiv4si2;
20715 unpack = gen_sse4_1_sign_extendv4hiv4si2;
20719 unpack = gen_sse4_1_zero_extendv2siv2di2;
20721 unpack = gen_sse4_1_sign_extendv2siv2di2;
20724 gcc_unreachable ();
20727 if (GET_MODE_SIZE (imode) == 32)
20729 tmp = gen_reg_rtx (halfmode);
20730 emit_insn (extract (tmp, src));
20734 /* Shift higher 8 bytes to lower 8 bytes. */
20735 tmp = gen_reg_rtx (imode);
20736 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
20737 gen_lowpart (V1TImode, src),
20743 emit_insn (unpack (dest, tmp));
20747 rtx (*unpack)(rtx, rtx, rtx);
20753 unpack = gen_vec_interleave_highv16qi;
20755 unpack = gen_vec_interleave_lowv16qi;
20759 unpack = gen_vec_interleave_highv8hi;
20761 unpack = gen_vec_interleave_lowv8hi;
20765 unpack = gen_vec_interleave_highv4si;
20767 unpack = gen_vec_interleave_lowv4si;
20770 gcc_unreachable ();
20774 tmp = force_reg (imode, CONST0_RTX (imode));
20776 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
20777 src, pc_rtx, pc_rtx);
20779 emit_insn (unpack (gen_lowpart (imode, dest), src, tmp));
20783 /* Expand conditional increment or decrement using adb/sbb instructions.
20784 The default case using setcc followed by the conditional move can be
20785 done by generic code. */
20787 ix86_expand_int_addcc (rtx operands[])
20789 enum rtx_code code = GET_CODE (operands[1]);
20791 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
20793 rtx val = const0_rtx;
20794 bool fpcmp = false;
20795 enum machine_mode mode;
20796 rtx op0 = XEXP (operands[1], 0);
20797 rtx op1 = XEXP (operands[1], 1);
20799 if (operands[3] != const1_rtx
20800 && operands[3] != constm1_rtx)
20802 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20804 code = GET_CODE (compare_op);
20806 flags = XEXP (compare_op, 0);
20808 if (GET_MODE (flags) == CCFPmode
20809 || GET_MODE (flags) == CCFPUmode)
20812 code = ix86_fp_compare_code_to_integer (code);
20819 PUT_CODE (compare_op,
20820 reverse_condition_maybe_unordered
20821 (GET_CODE (compare_op)));
20823 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
20826 mode = GET_MODE (operands[0]);
20828 /* Construct either adc or sbb insn. */
20829 if ((code == LTU) == (operands[3] == constm1_rtx))
20834 insn = gen_subqi3_carry;
20837 insn = gen_subhi3_carry;
20840 insn = gen_subsi3_carry;
20843 insn = gen_subdi3_carry;
20846 gcc_unreachable ();
20854 insn = gen_addqi3_carry;
20857 insn = gen_addhi3_carry;
20860 insn = gen_addsi3_carry;
20863 insn = gen_adddi3_carry;
20866 gcc_unreachable ();
20869 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
20875 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20876 but works for floating pointer parameters and nonoffsetable memories.
20877 For pushes, it returns just stack offsets; the values will be saved
20878 in the right order. Maximally three parts are generated. */
20881 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
20886 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
20888 size = (GET_MODE_SIZE (mode) + 4) / 8;
20890 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
20891 gcc_assert (size >= 2 && size <= 4);
20893 /* Optimize constant pool reference to immediates. This is used by fp
20894 moves, that force all constants to memory to allow combining. */
20895 if (MEM_P (operand) && MEM_READONLY_P (operand))
20897 rtx tmp = maybe_get_pool_constant (operand);
20902 if (MEM_P (operand) && !offsettable_memref_p (operand))
20904 /* The only non-offsetable memories we handle are pushes. */
20905 int ok = push_operand (operand, VOIDmode);
20909 operand = copy_rtx (operand);
20910 PUT_MODE (operand, word_mode);
20911 parts[0] = parts[1] = parts[2] = parts[3] = operand;
20915 if (GET_CODE (operand) == CONST_VECTOR)
20917 enum machine_mode imode = int_mode_for_mode (mode);
20918 /* Caution: if we looked through a constant pool memory above,
20919 the operand may actually have a different mode now. That's
20920 ok, since we want to pun this all the way back to an integer. */
20921 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
20922 gcc_assert (operand != NULL);
20928 if (mode == DImode)
20929 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
20934 if (REG_P (operand))
20936 gcc_assert (reload_completed);
20937 for (i = 0; i < size; i++)
20938 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
20940 else if (offsettable_memref_p (operand))
20942 operand = adjust_address (operand, SImode, 0);
20943 parts[0] = operand;
20944 for (i = 1; i < size; i++)
20945 parts[i] = adjust_address (operand, SImode, 4 * i);
20947 else if (GET_CODE (operand) == CONST_DOUBLE)
20952 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
20956 real_to_target (l, &r, mode);
20957 parts[3] = gen_int_mode (l[3], SImode);
20958 parts[2] = gen_int_mode (l[2], SImode);
20961 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
20962 long double may not be 80-bit. */
20963 real_to_target (l, &r, mode);
20964 parts[2] = gen_int_mode (l[2], SImode);
20967 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
20970 gcc_unreachable ();
20972 parts[1] = gen_int_mode (l[1], SImode);
20973 parts[0] = gen_int_mode (l[0], SImode);
20976 gcc_unreachable ();
20981 if (mode == TImode)
20982 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
20983 if (mode == XFmode || mode == TFmode)
20985 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
20986 if (REG_P (operand))
20988 gcc_assert (reload_completed);
20989 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
20990 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
20992 else if (offsettable_memref_p (operand))
20994 operand = adjust_address (operand, DImode, 0);
20995 parts[0] = operand;
20996 parts[1] = adjust_address (operand, upper_mode, 8);
20998 else if (GET_CODE (operand) == CONST_DOUBLE)
21003 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
21004 real_to_target (l, &r, mode);
21006 /* Do not use shift by 32 to avoid warning on 32bit systems. */
21007 if (HOST_BITS_PER_WIDE_INT >= 64)
21010 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
21011 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
21014 parts[0] = immed_double_const (l[0], l[1], DImode);
21016 if (upper_mode == SImode)
21017 parts[1] = gen_int_mode (l[2], SImode);
21018 else if (HOST_BITS_PER_WIDE_INT >= 64)
21021 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
21022 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
21025 parts[1] = immed_double_const (l[2], l[3], DImode);
21028 gcc_unreachable ();
21035 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
21036 Return false when normal moves are needed; true when all required
21037 insns have been emitted. Operands 2-4 contain the input values
21038 int the correct order; operands 5-7 contain the output values. */
21041 ix86_split_long_move (rtx operands[])
21046 int collisions = 0;
21047 enum machine_mode mode = GET_MODE (operands[0]);
21048 bool collisionparts[4];
21050 /* The DFmode expanders may ask us to move double.
21051 For 64bit target this is single move. By hiding the fact
21052 here we simplify i386.md splitters. */
21053 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
21055 /* Optimize constant pool reference to immediates. This is used by
21056 fp moves, that force all constants to memory to allow combining. */
21058 if (MEM_P (operands[1])
21059 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
21060 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
21061 operands[1] = get_pool_constant (XEXP (operands[1], 0));
21062 if (push_operand (operands[0], VOIDmode))
21064 operands[0] = copy_rtx (operands[0]);
21065 PUT_MODE (operands[0], word_mode);
21068 operands[0] = gen_lowpart (DImode, operands[0]);
21069 operands[1] = gen_lowpart (DImode, operands[1]);
21070 emit_move_insn (operands[0], operands[1]);
21074 /* The only non-offsettable memory we handle is push. */
21075 if (push_operand (operands[0], VOIDmode))
21078 gcc_assert (!MEM_P (operands[0])
21079 || offsettable_memref_p (operands[0]));
21081 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
21082 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
21084 /* When emitting push, take care for source operands on the stack. */
21085 if (push && MEM_P (operands[1])
21086 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
21088 rtx src_base = XEXP (part[1][nparts - 1], 0);
21090 /* Compensate for the stack decrement by 4. */
21091 if (!TARGET_64BIT && nparts == 3
21092 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
21093 src_base = plus_constant (Pmode, src_base, 4);
21095 /* src_base refers to the stack pointer and is
21096 automatically decreased by emitted push. */
21097 for (i = 0; i < nparts; i++)
21098 part[1][i] = change_address (part[1][i],
21099 GET_MODE (part[1][i]), src_base);
21102 /* We need to do copy in the right order in case an address register
21103 of the source overlaps the destination. */
21104 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
21108 for (i = 0; i < nparts; i++)
21111 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
21112 if (collisionparts[i])
21116 /* Collision in the middle part can be handled by reordering. */
21117 if (collisions == 1 && nparts == 3 && collisionparts [1])
21119 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
21120 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
21122 else if (collisions == 1
21124 && (collisionparts [1] || collisionparts [2]))
21126 if (collisionparts [1])
21128 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
21129 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
21133 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
21134 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
21138 /* If there are more collisions, we can't handle it by reordering.
21139 Do an lea to the last part and use only one colliding move. */
21140 else if (collisions > 1)
21146 base = part[0][nparts - 1];
21148 /* Handle the case when the last part isn't valid for lea.
21149 Happens in 64-bit mode storing the 12-byte XFmode. */
21150 if (GET_MODE (base) != Pmode)
21151 base = gen_rtx_REG (Pmode, REGNO (base));
21153 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
21154 part[1][0] = replace_equiv_address (part[1][0], base);
21155 for (i = 1; i < nparts; i++)
21157 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
21158 part[1][i] = replace_equiv_address (part[1][i], tmp);
21169 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
21170 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
21171 stack_pointer_rtx, GEN_INT (-4)));
21172 emit_move_insn (part[0][2], part[1][2]);
21174 else if (nparts == 4)
21176 emit_move_insn (part[0][3], part[1][3]);
21177 emit_move_insn (part[0][2], part[1][2]);
21182 /* In 64bit mode we don't have 32bit push available. In case this is
21183 register, it is OK - we will just use larger counterpart. We also
21184 retype memory - these comes from attempt to avoid REX prefix on
21185 moving of second half of TFmode value. */
21186 if (GET_MODE (part[1][1]) == SImode)
21188 switch (GET_CODE (part[1][1]))
21191 part[1][1] = adjust_address (part[1][1], DImode, 0);
21195 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
21199 gcc_unreachable ();
21202 if (GET_MODE (part[1][0]) == SImode)
21203 part[1][0] = part[1][1];
21206 emit_move_insn (part[0][1], part[1][1]);
21207 emit_move_insn (part[0][0], part[1][0]);
21211 /* Choose correct order to not overwrite the source before it is copied. */
21212 if ((REG_P (part[0][0])
21213 && REG_P (part[1][1])
21214 && (REGNO (part[0][0]) == REGNO (part[1][1])
21216 && REGNO (part[0][0]) == REGNO (part[1][2]))
21218 && REGNO (part[0][0]) == REGNO (part[1][3]))))
21220 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
21222 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
21224 operands[2 + i] = part[0][j];
21225 operands[6 + i] = part[1][j];
21230 for (i = 0; i < nparts; i++)
21232 operands[2 + i] = part[0][i];
21233 operands[6 + i] = part[1][i];
21237 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
21238 if (optimize_insn_for_size_p ())
21240 for (j = 0; j < nparts - 1; j++)
21241 if (CONST_INT_P (operands[6 + j])
21242 && operands[6 + j] != const0_rtx
21243 && REG_P (operands[2 + j]))
21244 for (i = j; i < nparts - 1; i++)
21245 if (CONST_INT_P (operands[7 + i])
21246 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
21247 operands[7 + i] = operands[2 + j];
21250 for (i = 0; i < nparts; i++)
21251 emit_move_insn (operands[2 + i], operands[6 + i]);
21256 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
21257 left shift by a constant, either using a single shift or
21258 a sequence of add instructions. */
21261 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
21263 rtx (*insn)(rtx, rtx, rtx);
21266 || (count * ix86_cost->add <= ix86_cost->shift_const
21267 && !optimize_insn_for_size_p ()))
21269 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
21270 while (count-- > 0)
21271 emit_insn (insn (operand, operand, operand));
21275 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
21276 emit_insn (insn (operand, operand, GEN_INT (count)));
21281 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
21283 rtx (*gen_ashl3)(rtx, rtx, rtx);
21284 rtx (*gen_shld)(rtx, rtx, rtx);
21285 int half_width = GET_MODE_BITSIZE (mode) >> 1;
21287 rtx low[2], high[2];
21290 if (CONST_INT_P (operands[2]))
21292 split_double_mode (mode, operands, 2, low, high);
21293 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
21295 if (count >= half_width)
21297 emit_move_insn (high[0], low[1]);
21298 emit_move_insn (low[0], const0_rtx);
21300 if (count > half_width)
21301 ix86_expand_ashl_const (high[0], count - half_width, mode);
21305 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
21307 if (!rtx_equal_p (operands[0], operands[1]))
21308 emit_move_insn (operands[0], operands[1]);
21310 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
21311 ix86_expand_ashl_const (low[0], count, mode);
21316 split_double_mode (mode, operands, 1, low, high);
21318 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
21320 if (operands[1] == const1_rtx)
21322 /* Assuming we've chosen a QImode capable registers, then 1 << N
21323 can be done with two 32/64-bit shifts, no branches, no cmoves. */
21324 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
21326 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
21328 ix86_expand_clear (low[0]);
21329 ix86_expand_clear (high[0]);
21330 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
21332 d = gen_lowpart (QImode, low[0]);
21333 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
21334 s = gen_rtx_EQ (QImode, flags, const0_rtx);
21335 emit_insn (gen_rtx_SET (VOIDmode, d, s));
21337 d = gen_lowpart (QImode, high[0]);
21338 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
21339 s = gen_rtx_NE (QImode, flags, const0_rtx);
21340 emit_insn (gen_rtx_SET (VOIDmode, d, s));
21343 /* Otherwise, we can get the same results by manually performing
21344 a bit extract operation on bit 5/6, and then performing the two
21345 shifts. The two methods of getting 0/1 into low/high are exactly
21346 the same size. Avoiding the shift in the bit extract case helps
21347 pentium4 a bit; no one else seems to care much either way. */
21350 enum machine_mode half_mode;
21351 rtx (*gen_lshr3)(rtx, rtx, rtx);
21352 rtx (*gen_and3)(rtx, rtx, rtx);
21353 rtx (*gen_xor3)(rtx, rtx, rtx);
21354 HOST_WIDE_INT bits;
21357 if (mode == DImode)
21359 half_mode = SImode;
21360 gen_lshr3 = gen_lshrsi3;
21361 gen_and3 = gen_andsi3;
21362 gen_xor3 = gen_xorsi3;
21367 half_mode = DImode;
21368 gen_lshr3 = gen_lshrdi3;
21369 gen_and3 = gen_anddi3;
21370 gen_xor3 = gen_xordi3;
21374 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
21375 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
21377 x = gen_lowpart (half_mode, operands[2]);
21378 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
21380 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
21381 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
21382 emit_move_insn (low[0], high[0]);
21383 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
21386 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
21387 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
21391 if (operands[1] == constm1_rtx)
21393 /* For -1 << N, we can avoid the shld instruction, because we
21394 know that we're shifting 0...31/63 ones into a -1. */
21395 emit_move_insn (low[0], constm1_rtx);
21396 if (optimize_insn_for_size_p ())
21397 emit_move_insn (high[0], low[0]);
21399 emit_move_insn (high[0], constm1_rtx);
21403 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
21405 if (!rtx_equal_p (operands[0], operands[1]))
21406 emit_move_insn (operands[0], operands[1]);
21408 split_double_mode (mode, operands, 1, low, high);
21409 emit_insn (gen_shld (high[0], low[0], operands[2]));
21412 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
21414 if (TARGET_CMOVE && scratch)
21416 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
21417 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
21419 ix86_expand_clear (scratch);
21420 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
21424 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
21425 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
21427 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
21432 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
21434 rtx (*gen_ashr3)(rtx, rtx, rtx)
21435 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
21436 rtx (*gen_shrd)(rtx, rtx, rtx);
21437 int half_width = GET_MODE_BITSIZE (mode) >> 1;
21439 rtx low[2], high[2];
21442 if (CONST_INT_P (operands[2]))
21444 split_double_mode (mode, operands, 2, low, high);
21445 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
21447 if (count == GET_MODE_BITSIZE (mode) - 1)
21449 emit_move_insn (high[0], high[1]);
21450 emit_insn (gen_ashr3 (high[0], high[0],
21451 GEN_INT (half_width - 1)));
21452 emit_move_insn (low[0], high[0]);
21455 else if (count >= half_width)
21457 emit_move_insn (low[0], high[1]);
21458 emit_move_insn (high[0], low[0]);
21459 emit_insn (gen_ashr3 (high[0], high[0],
21460 GEN_INT (half_width - 1)));
21462 if (count > half_width)
21463 emit_insn (gen_ashr3 (low[0], low[0],
21464 GEN_INT (count - half_width)));
21468 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21470 if (!rtx_equal_p (operands[0], operands[1]))
21471 emit_move_insn (operands[0], operands[1]);
21473 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
21474 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
21479 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21481 if (!rtx_equal_p (operands[0], operands[1]))
21482 emit_move_insn (operands[0], operands[1]);
21484 split_double_mode (mode, operands, 1, low, high);
21486 emit_insn (gen_shrd (low[0], high[0], operands[2]));
21487 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
21489 if (TARGET_CMOVE && scratch)
21491 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
21492 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
21494 emit_move_insn (scratch, high[0]);
21495 emit_insn (gen_ashr3 (scratch, scratch,
21496 GEN_INT (half_width - 1)));
21497 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
21502 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
21503 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
21505 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
21511 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
21513 rtx (*gen_lshr3)(rtx, rtx, rtx)
21514 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
21515 rtx (*gen_shrd)(rtx, rtx, rtx);
21516 int half_width = GET_MODE_BITSIZE (mode) >> 1;
21518 rtx low[2], high[2];
21521 if (CONST_INT_P (operands[2]))
21523 split_double_mode (mode, operands, 2, low, high);
21524 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
21526 if (count >= half_width)
21528 emit_move_insn (low[0], high[1]);
21529 ix86_expand_clear (high[0]);
21531 if (count > half_width)
21532 emit_insn (gen_lshr3 (low[0], low[0],
21533 GEN_INT (count - half_width)));
21537 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21539 if (!rtx_equal_p (operands[0], operands[1]))
21540 emit_move_insn (operands[0], operands[1]);
21542 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
21543 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
21548 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21550 if (!rtx_equal_p (operands[0], operands[1]))
21551 emit_move_insn (operands[0], operands[1]);
21553 split_double_mode (mode, operands, 1, low, high);
21555 emit_insn (gen_shrd (low[0], high[0], operands[2]));
21556 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
21558 if (TARGET_CMOVE && scratch)
21560 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
21561 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
21563 ix86_expand_clear (scratch);
21564 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
21569 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
21570 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
21572 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
21577 /* Predict just emitted jump instruction to be taken with probability PROB. */
21579 predict_jump (int prob)
21581 rtx insn = get_last_insn ();
21582 gcc_assert (JUMP_P (insn));
21583 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
21586 /* Helper function for the string operations below. Dest VARIABLE whether
21587 it is aligned to VALUE bytes. If true, jump to the label. */
21589 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
21591 rtx label = gen_label_rtx ();
21592 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
21593 if (GET_MODE (variable) == DImode)
21594 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
21596 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
21597 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
21600 predict_jump (REG_BR_PROB_BASE * 50 / 100);
21602 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21606 /* Adjust COUNTER by the VALUE. */
21608 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
21610 rtx (*gen_add)(rtx, rtx, rtx)
21611 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
21613 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
21616 /* Zero extend possibly SImode EXP to Pmode register. */
21618 ix86_zero_extend_to_Pmode (rtx exp)
21620 if (GET_MODE (exp) != Pmode)
21621 exp = convert_to_mode (Pmode, exp, 1);
21622 return force_reg (Pmode, exp);
21625 /* Divide COUNTREG by SCALE. */
21627 scale_counter (rtx countreg, int scale)
21633 if (CONST_INT_P (countreg))
21634 return GEN_INT (INTVAL (countreg) / scale);
21635 gcc_assert (REG_P (countreg));
21637 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
21638 GEN_INT (exact_log2 (scale)),
21639 NULL, 1, OPTAB_DIRECT);
21643 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21644 DImode for constant loop counts. */
21646 static enum machine_mode
21647 counter_mode (rtx count_exp)
21649 if (GET_MODE (count_exp) != VOIDmode)
21650 return GET_MODE (count_exp);
21651 if (!CONST_INT_P (count_exp))
21653 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
21658 /* When SRCPTR is non-NULL, output simple loop to move memory
21659 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21660 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21661 equivalent loop to set memory by VALUE (supposed to be in MODE).
21663 The size is rounded down to whole number of chunk size moved at once.
21664 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21668 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
21669 rtx destptr, rtx srcptr, rtx value,
21670 rtx count, enum machine_mode mode, int unroll,
21673 rtx out_label, top_label, iter, tmp;
21674 enum machine_mode iter_mode = counter_mode (count);
21675 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
21676 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
21682 top_label = gen_label_rtx ();
21683 out_label = gen_label_rtx ();
21684 iter = gen_reg_rtx (iter_mode);
21686 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
21687 NULL, 1, OPTAB_DIRECT);
21688 /* Those two should combine. */
21689 if (piece_size == const1_rtx)
21691 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
21693 predict_jump (REG_BR_PROB_BASE * 10 / 100);
21695 emit_move_insn (iter, const0_rtx);
21697 emit_label (top_label);
21699 tmp = convert_modes (Pmode, iter_mode, iter, true);
21700 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
21701 destmem = change_address (destmem, mode, x_addr);
21705 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
21706 srcmem = change_address (srcmem, mode, y_addr);
21708 /* When unrolling for chips that reorder memory reads and writes,
21709 we can save registers by using single temporary.
21710 Also using 4 temporaries is overkill in 32bit mode. */
21711 if (!TARGET_64BIT && 0)
21713 for (i = 0; i < unroll; i++)
21718 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21720 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
21722 emit_move_insn (destmem, srcmem);
21728 gcc_assert (unroll <= 4);
21729 for (i = 0; i < unroll; i++)
21731 tmpreg[i] = gen_reg_rtx (mode);
21735 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
21737 emit_move_insn (tmpreg[i], srcmem);
21739 for (i = 0; i < unroll; i++)
21744 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21746 emit_move_insn (destmem, tmpreg[i]);
21751 for (i = 0; i < unroll; i++)
21755 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21756 emit_move_insn (destmem, value);
21759 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
21760 true, OPTAB_LIB_WIDEN);
21762 emit_move_insn (iter, tmp);
21764 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
21766 if (expected_size != -1)
21768 expected_size /= GET_MODE_SIZE (mode) * unroll;
21769 if (expected_size == 0)
21771 else if (expected_size > REG_BR_PROB_BASE)
21772 predict_jump (REG_BR_PROB_BASE - 1);
21774 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
21777 predict_jump (REG_BR_PROB_BASE * 80 / 100);
21778 iter = ix86_zero_extend_to_Pmode (iter);
21779 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
21780 true, OPTAB_LIB_WIDEN);
21781 if (tmp != destptr)
21782 emit_move_insn (destptr, tmp);
21785 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
21786 true, OPTAB_LIB_WIDEN);
21788 emit_move_insn (srcptr, tmp);
21790 emit_label (out_label);
21793 /* Output "rep; mov" instruction.
21794 Arguments have same meaning as for previous function */
21796 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
21797 rtx destptr, rtx srcptr,
21799 enum machine_mode mode)
21804 HOST_WIDE_INT rounded_count;
21806 /* If the size is known, it is shorter to use rep movs. */
21807 if (mode == QImode && CONST_INT_P (count)
21808 && !(INTVAL (count) & 3))
21811 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
21812 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
21813 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
21814 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
21815 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
21816 if (mode != QImode)
21818 destexp = gen_rtx_ASHIFT (Pmode, countreg,
21819 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21820 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
21821 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
21822 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21823 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
21827 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
21828 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
21830 if (CONST_INT_P (count))
21832 rounded_count = (INTVAL (count)
21833 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
21834 destmem = shallow_copy_rtx (destmem);
21835 srcmem = shallow_copy_rtx (srcmem);
21836 set_mem_size (destmem, rounded_count);
21837 set_mem_size (srcmem, rounded_count);
21841 if (MEM_SIZE_KNOWN_P (destmem))
21842 clear_mem_size (destmem);
21843 if (MEM_SIZE_KNOWN_P (srcmem))
21844 clear_mem_size (srcmem);
21846 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
21850 /* Output "rep; stos" instruction.
21851 Arguments have same meaning as for previous function */
21853 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
21854 rtx count, enum machine_mode mode,
21859 HOST_WIDE_INT rounded_count;
21861 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
21862 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
21863 value = force_reg (mode, gen_lowpart (mode, value));
21864 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
21865 if (mode != QImode)
21867 destexp = gen_rtx_ASHIFT (Pmode, countreg,
21868 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21869 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
21872 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
21873 if (orig_value == const0_rtx && CONST_INT_P (count))
21875 rounded_count = (INTVAL (count)
21876 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
21877 destmem = shallow_copy_rtx (destmem);
21878 set_mem_size (destmem, rounded_count);
21880 else if (MEM_SIZE_KNOWN_P (destmem))
21881 clear_mem_size (destmem);
21882 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
21886 emit_strmov (rtx destmem, rtx srcmem,
21887 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
21889 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
21890 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
21891 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21894 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21896 expand_movmem_epilogue (rtx destmem, rtx srcmem,
21897 rtx destptr, rtx srcptr, rtx count, int max_size)
21900 if (CONST_INT_P (count))
21902 HOST_WIDE_INT countval = INTVAL (count);
21905 if ((countval & 0x10) && max_size > 16)
21909 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
21910 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
21913 gcc_unreachable ();
21916 if ((countval & 0x08) && max_size > 8)
21919 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
21922 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
21923 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
21927 if ((countval & 0x04) && max_size > 4)
21929 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
21932 if ((countval & 0x02) && max_size > 2)
21934 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
21937 if ((countval & 0x01) && max_size > 1)
21939 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
21946 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
21947 count, 1, OPTAB_DIRECT);
21948 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
21949 count, QImode, 1, 4);
21953 /* When there are stringops, we can cheaply increase dest and src pointers.
21954 Otherwise we save code size by maintaining offset (zero is readily
21955 available from preceding rep operation) and using x86 addressing modes.
21957 if (TARGET_SINGLE_STRINGOP)
21961 rtx label = ix86_expand_aligntest (count, 4, true);
21962 src = change_address (srcmem, SImode, srcptr);
21963 dest = change_address (destmem, SImode, destptr);
21964 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21965 emit_label (label);
21966 LABEL_NUSES (label) = 1;
21970 rtx label = ix86_expand_aligntest (count, 2, true);
21971 src = change_address (srcmem, HImode, srcptr);
21972 dest = change_address (destmem, HImode, destptr);
21973 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21974 emit_label (label);
21975 LABEL_NUSES (label) = 1;
21979 rtx label = ix86_expand_aligntest (count, 1, true);
21980 src = change_address (srcmem, QImode, srcptr);
21981 dest = change_address (destmem, QImode, destptr);
21982 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21983 emit_label (label);
21984 LABEL_NUSES (label) = 1;
21989 rtx offset = force_reg (Pmode, const0_rtx);
21994 rtx label = ix86_expand_aligntest (count, 4, true);
21995 src = change_address (srcmem, SImode, srcptr);
21996 dest = change_address (destmem, SImode, destptr);
21997 emit_move_insn (dest, src);
21998 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
21999 true, OPTAB_LIB_WIDEN);
22001 emit_move_insn (offset, tmp);
22002 emit_label (label);
22003 LABEL_NUSES (label) = 1;
22007 rtx label = ix86_expand_aligntest (count, 2, true);
22008 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
22009 src = change_address (srcmem, HImode, tmp);
22010 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
22011 dest = change_address (destmem, HImode, tmp);
22012 emit_move_insn (dest, src);
22013 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
22014 true, OPTAB_LIB_WIDEN);
22016 emit_move_insn (offset, tmp);
22017 emit_label (label);
22018 LABEL_NUSES (label) = 1;
22022 rtx label = ix86_expand_aligntest (count, 1, true);
22023 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
22024 src = change_address (srcmem, QImode, tmp);
22025 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
22026 dest = change_address (destmem, QImode, tmp);
22027 emit_move_insn (dest, src);
22028 emit_label (label);
22029 LABEL_NUSES (label) = 1;
22034 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22036 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
22037 rtx count, int max_size)
22040 expand_simple_binop (counter_mode (count), AND, count,
22041 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
22042 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
22043 gen_lowpart (QImode, value), count, QImode,
22047 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22049 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
22053 if (CONST_INT_P (count))
22055 HOST_WIDE_INT countval = INTVAL (count);
22058 if ((countval & 0x10) && max_size > 16)
22062 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
22063 emit_insn (gen_strset (destptr, dest, value));
22064 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
22065 emit_insn (gen_strset (destptr, dest, value));
22068 gcc_unreachable ();
22071 if ((countval & 0x08) && max_size > 8)
22075 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
22076 emit_insn (gen_strset (destptr, dest, value));
22080 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
22081 emit_insn (gen_strset (destptr, dest, value));
22082 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
22083 emit_insn (gen_strset (destptr, dest, value));
22087 if ((countval & 0x04) && max_size > 4)
22089 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
22090 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
22093 if ((countval & 0x02) && max_size > 2)
22095 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
22096 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
22099 if ((countval & 0x01) && max_size > 1)
22101 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
22102 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
22109 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
22114 rtx label = ix86_expand_aligntest (count, 16, true);
22117 dest = change_address (destmem, DImode, destptr);
22118 emit_insn (gen_strset (destptr, dest, value));
22119 emit_insn (gen_strset (destptr, dest, value));
22123 dest = change_address (destmem, SImode, destptr);
22124 emit_insn (gen_strset (destptr, dest, value));
22125 emit_insn (gen_strset (destptr, dest, value));
22126 emit_insn (gen_strset (destptr, dest, value));
22127 emit_insn (gen_strset (destptr, dest, value));
22129 emit_label (label);
22130 LABEL_NUSES (label) = 1;
22134 rtx label = ix86_expand_aligntest (count, 8, true);
22137 dest = change_address (destmem, DImode, destptr);
22138 emit_insn (gen_strset (destptr, dest, value));
22142 dest = change_address (destmem, SImode, destptr);
22143 emit_insn (gen_strset (destptr, dest, value));
22144 emit_insn (gen_strset (destptr, dest, value));
22146 emit_label (label);
22147 LABEL_NUSES (label) = 1;
22151 rtx label = ix86_expand_aligntest (count, 4, true);
22152 dest = change_address (destmem, SImode, destptr);
22153 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
22154 emit_label (label);
22155 LABEL_NUSES (label) = 1;
22159 rtx label = ix86_expand_aligntest (count, 2, true);
22160 dest = change_address (destmem, HImode, destptr);
22161 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
22162 emit_label (label);
22163 LABEL_NUSES (label) = 1;
22167 rtx label = ix86_expand_aligntest (count, 1, true);
22168 dest = change_address (destmem, QImode, destptr);
22169 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
22170 emit_label (label);
22171 LABEL_NUSES (label) = 1;
22175 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
22176 DESIRED_ALIGNMENT. */
22178 expand_movmem_prologue (rtx destmem, rtx srcmem,
22179 rtx destptr, rtx srcptr, rtx count,
22180 int align, int desired_alignment)
22182 if (align <= 1 && desired_alignment > 1)
22184 rtx label = ix86_expand_aligntest (destptr, 1, false);
22185 srcmem = change_address (srcmem, QImode, srcptr);
22186 destmem = change_address (destmem, QImode, destptr);
22187 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
22188 ix86_adjust_counter (count, 1);
22189 emit_label (label);
22190 LABEL_NUSES (label) = 1;
22192 if (align <= 2 && desired_alignment > 2)
22194 rtx label = ix86_expand_aligntest (destptr, 2, false);
22195 srcmem = change_address (srcmem, HImode, srcptr);
22196 destmem = change_address (destmem, HImode, destptr);
22197 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
22198 ix86_adjust_counter (count, 2);
22199 emit_label (label);
22200 LABEL_NUSES (label) = 1;
22202 if (align <= 4 && desired_alignment > 4)
22204 rtx label = ix86_expand_aligntest (destptr, 4, false);
22205 srcmem = change_address (srcmem, SImode, srcptr);
22206 destmem = change_address (destmem, SImode, destptr);
22207 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
22208 ix86_adjust_counter (count, 4);
22209 emit_label (label);
22210 LABEL_NUSES (label) = 1;
22212 gcc_assert (desired_alignment <= 8);
22215 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
22216 ALIGN_BYTES is how many bytes need to be copied. */
22218 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
22219 int desired_align, int align_bytes)
22222 rtx orig_dst = dst;
22223 rtx orig_src = src;
22225 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
22226 if (src_align_bytes >= 0)
22227 src_align_bytes = desired_align - src_align_bytes;
22228 if (align_bytes & 1)
22230 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
22231 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
22233 emit_insn (gen_strmov (destreg, dst, srcreg, src));
22235 if (align_bytes & 2)
22237 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
22238 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
22239 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
22240 set_mem_align (dst, 2 * BITS_PER_UNIT);
22241 if (src_align_bytes >= 0
22242 && (src_align_bytes & 1) == (align_bytes & 1)
22243 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
22244 set_mem_align (src, 2 * BITS_PER_UNIT);
22246 emit_insn (gen_strmov (destreg, dst, srcreg, src));
22248 if (align_bytes & 4)
22250 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
22251 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
22252 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
22253 set_mem_align (dst, 4 * BITS_PER_UNIT);
22254 if (src_align_bytes >= 0)
22256 unsigned int src_align = 0;
22257 if ((src_align_bytes & 3) == (align_bytes & 3))
22259 else if ((src_align_bytes & 1) == (align_bytes & 1))
22261 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
22262 set_mem_align (src, src_align * BITS_PER_UNIT);
22265 emit_insn (gen_strmov (destreg, dst, srcreg, src));
22267 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
22268 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
22269 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
22270 set_mem_align (dst, desired_align * BITS_PER_UNIT);
22271 if (src_align_bytes >= 0)
22273 unsigned int src_align = 0;
22274 if ((src_align_bytes & 7) == (align_bytes & 7))
22276 else if ((src_align_bytes & 3) == (align_bytes & 3))
22278 else if ((src_align_bytes & 1) == (align_bytes & 1))
22280 if (src_align > (unsigned int) desired_align)
22281 src_align = desired_align;
22282 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
22283 set_mem_align (src, src_align * BITS_PER_UNIT);
22285 if (MEM_SIZE_KNOWN_P (orig_dst))
22286 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
22287 if (MEM_SIZE_KNOWN_P (orig_src))
22288 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
22293 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
22294 DESIRED_ALIGNMENT. */
22296 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
22297 int align, int desired_alignment)
22299 if (align <= 1 && desired_alignment > 1)
22301 rtx label = ix86_expand_aligntest (destptr, 1, false);
22302 destmem = change_address (destmem, QImode, destptr);
22303 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
22304 ix86_adjust_counter (count, 1);
22305 emit_label (label);
22306 LABEL_NUSES (label) = 1;
22308 if (align <= 2 && desired_alignment > 2)
22310 rtx label = ix86_expand_aligntest (destptr, 2, false);
22311 destmem = change_address (destmem, HImode, destptr);
22312 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
22313 ix86_adjust_counter (count, 2);
22314 emit_label (label);
22315 LABEL_NUSES (label) = 1;
22317 if (align <= 4 && desired_alignment > 4)
22319 rtx label = ix86_expand_aligntest (destptr, 4, false);
22320 destmem = change_address (destmem, SImode, destptr);
22321 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
22322 ix86_adjust_counter (count, 4);
22323 emit_label (label);
22324 LABEL_NUSES (label) = 1;
22326 gcc_assert (desired_alignment <= 8);
22329 /* Set enough from DST to align DST known to by aligned by ALIGN to
22330 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
22332 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
22333 int desired_align, int align_bytes)
22336 rtx orig_dst = dst;
22337 if (align_bytes & 1)
22339 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
22341 emit_insn (gen_strset (destreg, dst,
22342 gen_lowpart (QImode, value)));
22344 if (align_bytes & 2)
22346 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
22347 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
22348 set_mem_align (dst, 2 * BITS_PER_UNIT);
22350 emit_insn (gen_strset (destreg, dst,
22351 gen_lowpart (HImode, value)));
22353 if (align_bytes & 4)
22355 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
22356 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
22357 set_mem_align (dst, 4 * BITS_PER_UNIT);
22359 emit_insn (gen_strset (destreg, dst,
22360 gen_lowpart (SImode, value)));
22362 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
22363 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
22364 set_mem_align (dst, desired_align * BITS_PER_UNIT);
22365 if (MEM_SIZE_KNOWN_P (orig_dst))
22366 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
22370 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
22371 static enum stringop_alg
22372 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
22373 int *dynamic_check)
22375 const struct stringop_algs * algs;
22376 bool optimize_for_speed;
22377 /* Algorithms using the rep prefix want at least edi and ecx;
22378 additionally, memset wants eax and memcpy wants esi. Don't
22379 consider such algorithms if the user has appropriated those
22380 registers for their own purposes. */
22381 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
22383 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
22385 #define ALG_USABLE_P(alg) (rep_prefix_usable \
22386 || (alg != rep_prefix_1_byte \
22387 && alg != rep_prefix_4_byte \
22388 && alg != rep_prefix_8_byte))
22389 const struct processor_costs *cost;
22391 /* Even if the string operation call is cold, we still might spend a lot
22392 of time processing large blocks. */
22393 if (optimize_function_for_size_p (cfun)
22394 || (optimize_insn_for_size_p ()
22395 && expected_size != -1 && expected_size < 256))
22396 optimize_for_speed = false;
22398 optimize_for_speed = true;
22400 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
22402 *dynamic_check = -1;
22404 algs = &cost->memset[TARGET_64BIT != 0];
22406 algs = &cost->memcpy[TARGET_64BIT != 0];
22407 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
22408 return ix86_stringop_alg;
22409 /* rep; movq or rep; movl is the smallest variant. */
22410 else if (!optimize_for_speed)
22412 if (!count || (count & 3))
22413 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
22415 return rep_prefix_usable ? rep_prefix_4_byte : loop;
22417 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
22419 else if (expected_size != -1 && expected_size < 4)
22420 return loop_1_byte;
22421 else if (expected_size != -1)
22424 enum stringop_alg alg = libcall;
22425 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
22427 /* We get here if the algorithms that were not libcall-based
22428 were rep-prefix based and we are unable to use rep prefixes
22429 based on global register usage. Break out of the loop and
22430 use the heuristic below. */
22431 if (algs->size[i].max == 0)
22433 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
22435 enum stringop_alg candidate = algs->size[i].alg;
22437 if (candidate != libcall && ALG_USABLE_P (candidate))
22439 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22440 last non-libcall inline algorithm. */
22441 if (TARGET_INLINE_ALL_STRINGOPS)
22443 /* When the current size is best to be copied by a libcall,
22444 but we are still forced to inline, run the heuristic below
22445 that will pick code for medium sized blocks. */
22446 if (alg != libcall)
22450 else if (ALG_USABLE_P (candidate))
22454 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
22456 /* When asked to inline the call anyway, try to pick meaningful choice.
22457 We look for maximal size of block that is faster to copy by hand and
22458 take blocks of at most of that size guessing that average size will
22459 be roughly half of the block.
22461 If this turns out to be bad, we might simply specify the preferred
22462 choice in ix86_costs. */
22463 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22464 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
22467 enum stringop_alg alg;
22469 bool any_alg_usable_p = true;
22471 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
22473 enum stringop_alg candidate = algs->size[i].alg;
22474 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
22476 if (candidate != libcall && candidate
22477 && ALG_USABLE_P (candidate))
22478 max = algs->size[i].max;
22480 /* If there aren't any usable algorithms, then recursing on
22481 smaller sizes isn't going to find anything. Just return the
22482 simple byte-at-a-time copy loop. */
22483 if (!any_alg_usable_p)
22485 /* Pick something reasonable. */
22486 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22487 *dynamic_check = 128;
22488 return loop_1_byte;
22492 alg = decide_alg (count, max / 2, memset, dynamic_check);
22493 gcc_assert (*dynamic_check == -1);
22494 gcc_assert (alg != libcall);
22495 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22496 *dynamic_check = max;
22499 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
22500 #undef ALG_USABLE_P
22503 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22504 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22506 decide_alignment (int align,
22507 enum stringop_alg alg,
22510 int desired_align = 0;
22514 gcc_unreachable ();
22516 case unrolled_loop:
22517 desired_align = GET_MODE_SIZE (Pmode);
22519 case rep_prefix_8_byte:
22522 case rep_prefix_4_byte:
22523 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22524 copying whole cacheline at once. */
22525 if (TARGET_PENTIUMPRO)
22530 case rep_prefix_1_byte:
22531 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22532 copying whole cacheline at once. */
22533 if (TARGET_PENTIUMPRO)
22547 if (desired_align < align)
22548 desired_align = align;
22549 if (expected_size != -1 && expected_size < 4)
22550 desired_align = align;
22551 return desired_align;
22554 /* Return the smallest power of 2 greater than VAL. */
22556 smallest_pow2_greater_than (int val)
22564 /* Expand string move (memcpy) operation. Use i386 string operations
22565 when profitable. expand_setmem contains similar code. The code
22566 depends upon architecture, block size and alignment, but always has
22567 the same overall structure:
22569 1) Prologue guard: Conditional that jumps up to epilogues for small
22570 blocks that can be handled by epilogue alone. This is faster
22571 but also needed for correctness, since prologue assume the block
22572 is larger than the desired alignment.
22574 Optional dynamic check for size and libcall for large
22575 blocks is emitted here too, with -minline-stringops-dynamically.
22577 2) Prologue: copy first few bytes in order to get destination
22578 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22579 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22580 copied. We emit either a jump tree on power of two sized
22581 blocks, or a byte loop.
22583 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22584 with specified algorithm.
22586 4) Epilogue: code copying tail of the block that is too small to be
22587 handled by main body (or up to size guarded by prologue guard). */
22590 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
22591 rtx expected_align_exp, rtx expected_size_exp)
22597 rtx jump_around_label = NULL;
22598 HOST_WIDE_INT align = 1;
22599 unsigned HOST_WIDE_INT count = 0;
22600 HOST_WIDE_INT expected_size = -1;
22601 int size_needed = 0, epilogue_size_needed;
22602 int desired_align = 0, align_bytes = 0;
22603 enum stringop_alg alg;
22605 bool need_zero_guard = false;
22607 if (CONST_INT_P (align_exp))
22608 align = INTVAL (align_exp);
22609 /* i386 can do misaligned access on reasonably increased cost. */
22610 if (CONST_INT_P (expected_align_exp)
22611 && INTVAL (expected_align_exp) > align)
22612 align = INTVAL (expected_align_exp);
22613 /* ALIGN is the minimum of destination and source alignment, but we care here
22614 just about destination alignment. */
22615 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
22616 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
22618 if (CONST_INT_P (count_exp))
22619 count = expected_size = INTVAL (count_exp);
22620 if (CONST_INT_P (expected_size_exp) && count == 0)
22621 expected_size = INTVAL (expected_size_exp);
22623 /* Make sure we don't need to care about overflow later on. */
22624 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
22627 /* Step 0: Decide on preferred algorithm, desired alignment and
22628 size of chunks to be copied by main loop. */
22630 alg = decide_alg (count, expected_size, false, &dynamic_check);
22631 desired_align = decide_alignment (align, alg, expected_size);
22633 if (!TARGET_ALIGN_STRINGOPS)
22634 align = desired_align;
22636 if (alg == libcall)
22638 gcc_assert (alg != no_stringop);
22640 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
22641 destreg = copy_addr_to_reg (XEXP (dst, 0));
22642 srcreg = copy_addr_to_reg (XEXP (src, 0));
22647 gcc_unreachable ();
22649 need_zero_guard = true;
22650 size_needed = GET_MODE_SIZE (word_mode);
22652 case unrolled_loop:
22653 need_zero_guard = true;
22654 size_needed = GET_MODE_SIZE (word_mode) * (TARGET_64BIT ? 4 : 2);
22656 case rep_prefix_8_byte:
22659 case rep_prefix_4_byte:
22662 case rep_prefix_1_byte:
22666 need_zero_guard = true;
22671 epilogue_size_needed = size_needed;
22673 /* Step 1: Prologue guard. */
22675 /* Alignment code needs count to be in register. */
22676 if (CONST_INT_P (count_exp) && desired_align > align)
22678 if (INTVAL (count_exp) > desired_align
22679 && INTVAL (count_exp) > size_needed)
22682 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
22683 if (align_bytes <= 0)
22686 align_bytes = desired_align - align_bytes;
22688 if (align_bytes == 0)
22689 count_exp = force_reg (counter_mode (count_exp), count_exp);
22691 gcc_assert (desired_align >= 1 && align >= 1);
22693 /* Ensure that alignment prologue won't copy past end of block. */
22694 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
22696 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
22697 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22698 Make sure it is power of 2. */
22699 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
22703 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
22705 /* If main algorithm works on QImode, no epilogue is needed.
22706 For small sizes just don't align anything. */
22707 if (size_needed == 1)
22708 desired_align = align;
22715 label = gen_label_rtx ();
22716 emit_cmp_and_jump_insns (count_exp,
22717 GEN_INT (epilogue_size_needed),
22718 LTU, 0, counter_mode (count_exp), 1, label);
22719 if (expected_size == -1 || expected_size < epilogue_size_needed)
22720 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22722 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22726 /* Emit code to decide on runtime whether library call or inline should be
22728 if (dynamic_check != -1)
22730 if (CONST_INT_P (count_exp))
22732 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
22734 emit_block_move_via_libcall (dst, src, count_exp, false);
22735 count_exp = const0_rtx;
22741 rtx hot_label = gen_label_rtx ();
22742 jump_around_label = gen_label_rtx ();
22743 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
22744 LEU, 0, GET_MODE (count_exp), 1, hot_label);
22745 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22746 emit_block_move_via_libcall (dst, src, count_exp, false);
22747 emit_jump (jump_around_label);
22748 emit_label (hot_label);
22752 /* Step 2: Alignment prologue. */
22754 if (desired_align > align)
22756 if (align_bytes == 0)
22758 /* Except for the first move in epilogue, we no longer know
22759 constant offset in aliasing info. It don't seems to worth
22760 the pain to maintain it for the first move, so throw away
22762 src = change_address (src, BLKmode, srcreg);
22763 dst = change_address (dst, BLKmode, destreg);
22764 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
22769 /* If we know how many bytes need to be stored before dst is
22770 sufficiently aligned, maintain aliasing info accurately. */
22771 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
22772 desired_align, align_bytes);
22773 count_exp = plus_constant (counter_mode (count_exp),
22774 count_exp, -align_bytes);
22775 count -= align_bytes;
22777 if (need_zero_guard
22778 && (count < (unsigned HOST_WIDE_INT) size_needed
22779 || (align_bytes == 0
22780 && count < ((unsigned HOST_WIDE_INT) size_needed
22781 + desired_align - align))))
22783 /* It is possible that we copied enough so the main loop will not
22785 gcc_assert (size_needed > 1);
22786 if (label == NULL_RTX)
22787 label = gen_label_rtx ();
22788 emit_cmp_and_jump_insns (count_exp,
22789 GEN_INT (size_needed),
22790 LTU, 0, counter_mode (count_exp), 1, label);
22791 if (expected_size == -1
22792 || expected_size < (desired_align - align) / 2 + size_needed)
22793 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22795 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22798 if (label && size_needed == 1)
22800 emit_label (label);
22801 LABEL_NUSES (label) = 1;
22803 epilogue_size_needed = 1;
22805 else if (label == NULL_RTX)
22806 epilogue_size_needed = size_needed;
22808 /* Step 3: Main loop. */
22814 gcc_unreachable ();
22816 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22817 count_exp, QImode, 1, expected_size);
22820 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22821 count_exp, word_mode, 1, expected_size);
22823 case unrolled_loop:
22824 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22825 registers for 4 temporaries anyway. */
22826 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22827 count_exp, word_mode, TARGET_64BIT ? 4 : 2,
22830 case rep_prefix_8_byte:
22831 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22834 case rep_prefix_4_byte:
22835 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22838 case rep_prefix_1_byte:
22839 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22843 /* Adjust properly the offset of src and dest memory for aliasing. */
22844 if (CONST_INT_P (count_exp))
22846 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
22847 (count / size_needed) * size_needed);
22848 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
22849 (count / size_needed) * size_needed);
22853 src = change_address (src, BLKmode, srcreg);
22854 dst = change_address (dst, BLKmode, destreg);
22857 /* Step 4: Epilogue to copy the remaining bytes. */
22861 /* When the main loop is done, COUNT_EXP might hold original count,
22862 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22863 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22864 bytes. Compensate if needed. */
22866 if (size_needed < epilogue_size_needed)
22869 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
22870 GEN_INT (size_needed - 1), count_exp, 1,
22872 if (tmp != count_exp)
22873 emit_move_insn (count_exp, tmp);
22875 emit_label (label);
22876 LABEL_NUSES (label) = 1;
22879 if (count_exp != const0_rtx && epilogue_size_needed > 1)
22880 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
22881 epilogue_size_needed);
22882 if (jump_around_label)
22883 emit_label (jump_around_label);
22887 /* Helper function for memcpy. For QImode value 0xXY produce
22888 0xXYXYXYXY of wide specified by MODE. This is essentially
22889 a * 0x10101010, but we can do slightly better than
22890 synth_mult by unwinding the sequence by hand on CPUs with
22893 promote_duplicated_reg (enum machine_mode mode, rtx val)
22895 enum machine_mode valmode = GET_MODE (val);
22897 int nops = mode == DImode ? 3 : 2;
22899 gcc_assert (mode == SImode || mode == DImode);
22900 if (val == const0_rtx)
22901 return copy_to_mode_reg (mode, const0_rtx);
22902 if (CONST_INT_P (val))
22904 HOST_WIDE_INT v = INTVAL (val) & 255;
22908 if (mode == DImode)
22909 v |= (v << 16) << 16;
22910 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
22913 if (valmode == VOIDmode)
22915 if (valmode != QImode)
22916 val = gen_lowpart (QImode, val);
22917 if (mode == QImode)
22919 if (!TARGET_PARTIAL_REG_STALL)
22921 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
22922 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
22923 <= (ix86_cost->shift_const + ix86_cost->add) * nops
22924 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
22926 rtx reg = convert_modes (mode, QImode, val, true);
22927 tmp = promote_duplicated_reg (mode, const1_rtx);
22928 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
22933 rtx reg = convert_modes (mode, QImode, val, true);
22935 if (!TARGET_PARTIAL_REG_STALL)
22936 if (mode == SImode)
22937 emit_insn (gen_movsi_insv_1 (reg, reg));
22939 emit_insn (gen_movdi_insv_1 (reg, reg));
22942 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
22943 NULL, 1, OPTAB_DIRECT);
22945 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22947 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
22948 NULL, 1, OPTAB_DIRECT);
22949 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22950 if (mode == SImode)
22952 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
22953 NULL, 1, OPTAB_DIRECT);
22954 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22959 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22960 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22961 alignment from ALIGN to DESIRED_ALIGN. */
22963 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
22968 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
22969 promoted_val = promote_duplicated_reg (DImode, val);
22970 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
22971 promoted_val = promote_duplicated_reg (SImode, val);
22972 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
22973 promoted_val = promote_duplicated_reg (HImode, val);
22975 promoted_val = val;
22977 return promoted_val;
22980 /* Expand string clear operation (bzero). Use i386 string operations when
22981 profitable. See expand_movmem comment for explanation of individual
22982 steps performed. */
22984 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
22985 rtx expected_align_exp, rtx expected_size_exp)
22990 rtx jump_around_label = NULL;
22991 HOST_WIDE_INT align = 1;
22992 unsigned HOST_WIDE_INT count = 0;
22993 HOST_WIDE_INT expected_size = -1;
22994 int size_needed = 0, epilogue_size_needed;
22995 int desired_align = 0, align_bytes = 0;
22996 enum stringop_alg alg;
22997 rtx promoted_val = NULL;
22998 bool force_loopy_epilogue = false;
23000 bool need_zero_guard = false;
23002 if (CONST_INT_P (align_exp))
23003 align = INTVAL (align_exp);
23004 /* i386 can do misaligned access on reasonably increased cost. */
23005 if (CONST_INT_P (expected_align_exp)
23006 && INTVAL (expected_align_exp) > align)
23007 align = INTVAL (expected_align_exp);
23008 if (CONST_INT_P (count_exp))
23009 count = expected_size = INTVAL (count_exp);
23010 if (CONST_INT_P (expected_size_exp) && count == 0)
23011 expected_size = INTVAL (expected_size_exp);
23013 /* Make sure we don't need to care about overflow later on. */
23014 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
23017 /* Step 0: Decide on preferred algorithm, desired alignment and
23018 size of chunks to be copied by main loop. */
23020 alg = decide_alg (count, expected_size, true, &dynamic_check);
23021 desired_align = decide_alignment (align, alg, expected_size);
23023 if (!TARGET_ALIGN_STRINGOPS)
23024 align = desired_align;
23026 if (alg == libcall)
23028 gcc_assert (alg != no_stringop);
23030 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
23031 destreg = copy_addr_to_reg (XEXP (dst, 0));
23036 gcc_unreachable ();
23038 need_zero_guard = true;
23039 size_needed = GET_MODE_SIZE (word_mode);
23041 case unrolled_loop:
23042 need_zero_guard = true;
23043 size_needed = GET_MODE_SIZE (word_mode) * 4;
23045 case rep_prefix_8_byte:
23048 case rep_prefix_4_byte:
23051 case rep_prefix_1_byte:
23055 need_zero_guard = true;
23059 epilogue_size_needed = size_needed;
23061 /* Step 1: Prologue guard. */
23063 /* Alignment code needs count to be in register. */
23064 if (CONST_INT_P (count_exp) && desired_align > align)
23066 if (INTVAL (count_exp) > desired_align
23067 && INTVAL (count_exp) > size_needed)
23070 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
23071 if (align_bytes <= 0)
23074 align_bytes = desired_align - align_bytes;
23076 if (align_bytes == 0)
23078 enum machine_mode mode = SImode;
23079 if (TARGET_64BIT && (count & ~0xffffffff))
23081 count_exp = force_reg (mode, count_exp);
23084 /* Do the cheap promotion to allow better CSE across the
23085 main loop and epilogue (ie one load of the big constant in the
23086 front of all code. */
23087 if (CONST_INT_P (val_exp))
23088 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
23089 desired_align, align);
23090 /* Ensure that alignment prologue won't copy past end of block. */
23091 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
23093 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
23094 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
23095 Make sure it is power of 2. */
23096 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
23098 /* To improve performance of small blocks, we jump around the VAL
23099 promoting mode. This mean that if the promoted VAL is not constant,
23100 we might not use it in the epilogue and have to use byte
23102 if (epilogue_size_needed > 2 && !promoted_val)
23103 force_loopy_epilogue = true;
23106 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
23108 /* If main algorithm works on QImode, no epilogue is needed.
23109 For small sizes just don't align anything. */
23110 if (size_needed == 1)
23111 desired_align = align;
23118 label = gen_label_rtx ();
23119 emit_cmp_and_jump_insns (count_exp,
23120 GEN_INT (epilogue_size_needed),
23121 LTU, 0, counter_mode (count_exp), 1, label);
23122 if (expected_size == -1 || expected_size <= epilogue_size_needed)
23123 predict_jump (REG_BR_PROB_BASE * 60 / 100);
23125 predict_jump (REG_BR_PROB_BASE * 20 / 100);
23128 if (dynamic_check != -1)
23130 rtx hot_label = gen_label_rtx ();
23131 jump_around_label = gen_label_rtx ();
23132 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
23133 LEU, 0, counter_mode (count_exp), 1, hot_label);
23134 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23135 set_storage_via_libcall (dst, count_exp, val_exp, false);
23136 emit_jump (jump_around_label);
23137 emit_label (hot_label);
23140 /* Step 2: Alignment prologue. */
23142 /* Do the expensive promotion once we branched off the small blocks. */
23144 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
23145 desired_align, align);
23146 gcc_assert (desired_align >= 1 && align >= 1);
23148 if (desired_align > align)
23150 if (align_bytes == 0)
23152 /* Except for the first move in epilogue, we no longer know
23153 constant offset in aliasing info. It don't seems to worth
23154 the pain to maintain it for the first move, so throw away
23156 dst = change_address (dst, BLKmode, destreg);
23157 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
23162 /* If we know how many bytes need to be stored before dst is
23163 sufficiently aligned, maintain aliasing info accurately. */
23164 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
23165 desired_align, align_bytes);
23166 count_exp = plus_constant (counter_mode (count_exp),
23167 count_exp, -align_bytes);
23168 count -= align_bytes;
23170 if (need_zero_guard
23171 && (count < (unsigned HOST_WIDE_INT) size_needed
23172 || (align_bytes == 0
23173 && count < ((unsigned HOST_WIDE_INT) size_needed
23174 + desired_align - align))))
23176 /* It is possible that we copied enough so the main loop will not
23178 gcc_assert (size_needed > 1);
23179 if (label == NULL_RTX)
23180 label = gen_label_rtx ();
23181 emit_cmp_and_jump_insns (count_exp,
23182 GEN_INT (size_needed),
23183 LTU, 0, counter_mode (count_exp), 1, label);
23184 if (expected_size == -1
23185 || expected_size < (desired_align - align) / 2 + size_needed)
23186 predict_jump (REG_BR_PROB_BASE * 20 / 100);
23188 predict_jump (REG_BR_PROB_BASE * 60 / 100);
23191 if (label && size_needed == 1)
23193 emit_label (label);
23194 LABEL_NUSES (label) = 1;
23196 promoted_val = val_exp;
23197 epilogue_size_needed = 1;
23199 else if (label == NULL_RTX)
23200 epilogue_size_needed = size_needed;
23202 /* Step 3: Main loop. */
23208 gcc_unreachable ();
23210 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
23211 count_exp, QImode, 1, expected_size);
23214 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
23215 count_exp, word_mode, 1, expected_size);
23217 case unrolled_loop:
23218 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
23219 count_exp, word_mode, 4, expected_size);
23221 case rep_prefix_8_byte:
23222 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
23225 case rep_prefix_4_byte:
23226 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
23229 case rep_prefix_1_byte:
23230 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
23234 /* Adjust properly the offset of src and dest memory for aliasing. */
23235 if (CONST_INT_P (count_exp))
23236 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
23237 (count / size_needed) * size_needed);
23239 dst = change_address (dst, BLKmode, destreg);
23241 /* Step 4: Epilogue to copy the remaining bytes. */
23245 /* When the main loop is done, COUNT_EXP might hold original count,
23246 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23247 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23248 bytes. Compensate if needed. */
23250 if (size_needed < epilogue_size_needed)
23253 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
23254 GEN_INT (size_needed - 1), count_exp, 1,
23256 if (tmp != count_exp)
23257 emit_move_insn (count_exp, tmp);
23259 emit_label (label);
23260 LABEL_NUSES (label) = 1;
23263 if (count_exp != const0_rtx && epilogue_size_needed > 1)
23265 if (force_loopy_epilogue)
23266 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
23267 epilogue_size_needed);
23269 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
23270 epilogue_size_needed);
23272 if (jump_around_label)
23273 emit_label (jump_around_label);
23277 /* Expand the appropriate insns for doing strlen if not just doing
23280 out = result, initialized with the start address
23281 align_rtx = alignment of the address.
23282 scratch = scratch register, initialized with the startaddress when
23283 not aligned, otherwise undefined
23285 This is just the body. It needs the initializations mentioned above and
23286 some address computing at the end. These things are done in i386.md. */
23289 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
23293 rtx align_2_label = NULL_RTX;
23294 rtx align_3_label = NULL_RTX;
23295 rtx align_4_label = gen_label_rtx ();
23296 rtx end_0_label = gen_label_rtx ();
23298 rtx tmpreg = gen_reg_rtx (SImode);
23299 rtx scratch = gen_reg_rtx (SImode);
23303 if (CONST_INT_P (align_rtx))
23304 align = INTVAL (align_rtx);
23306 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
23308 /* Is there a known alignment and is it less than 4? */
23311 rtx scratch1 = gen_reg_rtx (Pmode);
23312 emit_move_insn (scratch1, out);
23313 /* Is there a known alignment and is it not 2? */
23316 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
23317 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
23319 /* Leave just the 3 lower bits. */
23320 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
23321 NULL_RTX, 0, OPTAB_WIDEN);
23323 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
23324 Pmode, 1, align_4_label);
23325 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
23326 Pmode, 1, align_2_label);
23327 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
23328 Pmode, 1, align_3_label);
23332 /* Since the alignment is 2, we have to check 2 or 0 bytes;
23333 check if is aligned to 4 - byte. */
23335 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
23336 NULL_RTX, 0, OPTAB_WIDEN);
23338 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
23339 Pmode, 1, align_4_label);
23342 mem = change_address (src, QImode, out);
23344 /* Now compare the bytes. */
23346 /* Compare the first n unaligned byte on a byte per byte basis. */
23347 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
23348 QImode, 1, end_0_label);
23350 /* Increment the address. */
23351 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
23353 /* Not needed with an alignment of 2 */
23356 emit_label (align_2_label);
23358 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
23361 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
23363 emit_label (align_3_label);
23366 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
23369 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
23372 /* Generate loop to check 4 bytes at a time. It is not a good idea to
23373 align this loop. It gives only huge programs, but does not help to
23375 emit_label (align_4_label);
23377 mem = change_address (src, SImode, out);
23378 emit_move_insn (scratch, mem);
23379 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
23381 /* This formula yields a nonzero result iff one of the bytes is zero.
23382 This saves three branches inside loop and many cycles. */
23384 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
23385 emit_insn (gen_one_cmplsi2 (scratch, scratch));
23386 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
23387 emit_insn (gen_andsi3 (tmpreg, tmpreg,
23388 gen_int_mode (0x80808080, SImode)));
23389 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
23394 rtx reg = gen_reg_rtx (SImode);
23395 rtx reg2 = gen_reg_rtx (Pmode);
23396 emit_move_insn (reg, tmpreg);
23397 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
23399 /* If zero is not in the first two bytes, move two bytes forward. */
23400 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
23401 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
23402 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
23403 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
23404 gen_rtx_IF_THEN_ELSE (SImode, tmp,
23407 /* Emit lea manually to avoid clobbering of flags. */
23408 emit_insn (gen_rtx_SET (SImode, reg2,
23409 gen_rtx_PLUS (Pmode, out, const2_rtx)));
23411 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
23412 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
23413 emit_insn (gen_rtx_SET (VOIDmode, out,
23414 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
23420 rtx end_2_label = gen_label_rtx ();
23421 /* Is zero in the first two bytes? */
23423 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
23424 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
23425 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
23426 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
23427 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
23429 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
23430 JUMP_LABEL (tmp) = end_2_label;
23432 /* Not in the first two. Move two bytes forward. */
23433 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
23434 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
23436 emit_label (end_2_label);
23440 /* Avoid branch in fixing the byte. */
23441 tmpreg = gen_lowpart (QImode, tmpreg);
23442 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
23443 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
23444 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
23445 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
23447 emit_label (end_0_label);
23450 /* Expand strlen. */
23453 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
23455 rtx addr, scratch1, scratch2, scratch3, scratch4;
23457 /* The generic case of strlen expander is long. Avoid it's
23458 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23460 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
23461 && !TARGET_INLINE_ALL_STRINGOPS
23462 && !optimize_insn_for_size_p ()
23463 && (!CONST_INT_P (align) || INTVAL (align) < 4))
23466 addr = force_reg (Pmode, XEXP (src, 0));
23467 scratch1 = gen_reg_rtx (Pmode);
23469 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
23470 && !optimize_insn_for_size_p ())
23472 /* Well it seems that some optimizer does not combine a call like
23473 foo(strlen(bar), strlen(bar));
23474 when the move and the subtraction is done here. It does calculate
23475 the length just once when these instructions are done inside of
23476 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23477 often used and I use one fewer register for the lifetime of
23478 output_strlen_unroll() this is better. */
23480 emit_move_insn (out, addr);
23482 ix86_expand_strlensi_unroll_1 (out, src, align);
23484 /* strlensi_unroll_1 returns the address of the zero at the end of
23485 the string, like memchr(), so compute the length by subtracting
23486 the start address. */
23487 emit_insn (ix86_gen_sub3 (out, out, addr));
23493 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23494 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
23497 scratch2 = gen_reg_rtx (Pmode);
23498 scratch3 = gen_reg_rtx (Pmode);
23499 scratch4 = force_reg (Pmode, constm1_rtx);
23501 emit_move_insn (scratch3, addr);
23502 eoschar = force_reg (QImode, eoschar);
23504 src = replace_equiv_address_nv (src, scratch3);
23506 /* If .md starts supporting :P, this can be done in .md. */
23507 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
23508 scratch4), UNSPEC_SCAS);
23509 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
23510 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
23511 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
23516 /* For given symbol (function) construct code to compute address of it's PLT
23517 entry in large x86-64 PIC model. */
23519 construct_plt_address (rtx symbol)
23523 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
23524 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
23525 gcc_assert (Pmode == DImode);
23527 tmp = gen_reg_rtx (Pmode);
23528 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
23530 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
23531 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
23536 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
23538 rtx pop, bool sibcall)
23540 /* We need to represent that SI and DI registers are clobbered
23542 static int clobbered_registers[] = {
23543 XMM6_REG, XMM7_REG, XMM8_REG,
23544 XMM9_REG, XMM10_REG, XMM11_REG,
23545 XMM12_REG, XMM13_REG, XMM14_REG,
23546 XMM15_REG, SI_REG, DI_REG
23548 rtx vec[ARRAY_SIZE (clobbered_registers) + 3];
23549 rtx use = NULL, call;
23550 unsigned int vec_len;
23552 if (pop == const0_rtx)
23554 gcc_assert (!TARGET_64BIT || !pop);
23556 if (TARGET_MACHO && !TARGET_64BIT)
23559 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
23560 fnaddr = machopic_indirect_call_target (fnaddr);
23565 /* Static functions and indirect calls don't need the pic register. */
23566 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
23567 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
23568 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
23569 use_reg (&use, pic_offset_table_rtx);
23572 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
23574 rtx al = gen_rtx_REG (QImode, AX_REG);
23575 emit_move_insn (al, callarg2);
23576 use_reg (&use, al);
23579 if (ix86_cmodel == CM_LARGE_PIC
23581 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
23582 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
23583 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
23585 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
23586 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
23588 fnaddr = XEXP (fnaddr, 0);
23589 if (GET_MODE (fnaddr) != word_mode)
23590 fnaddr = convert_to_mode (word_mode, fnaddr, 1);
23591 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
23595 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
23597 call = gen_rtx_SET (VOIDmode, retval, call);
23598 vec[vec_len++] = call;
23602 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
23603 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
23604 vec[vec_len++] = pop;
23607 if (TARGET_64BIT_MS_ABI
23608 && (!callarg2 || INTVAL (callarg2) != -2))
23612 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
23613 UNSPEC_MS_TO_SYSV_CALL);
23615 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
23617 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
23619 gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i])
23621 clobbered_registers[i]));
23624 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
23625 if (TARGET_VZEROUPPER)
23628 if (cfun->machine->callee_pass_avx256_p)
23630 if (cfun->machine->callee_return_avx256_p)
23631 avx256 = callee_return_pass_avx256;
23633 avx256 = callee_pass_avx256;
23635 else if (cfun->machine->callee_return_avx256_p)
23636 avx256 = callee_return_avx256;
23638 avx256 = call_no_avx256;
23640 if (reload_completed)
23641 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
23643 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode,
23644 gen_rtvec (1, GEN_INT (avx256)),
23645 UNSPEC_CALL_NEEDS_VZEROUPPER);
23649 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
23650 call = emit_call_insn (call);
23652 CALL_INSN_FUNCTION_USAGE (call) = use;
23658 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
23660 rtx pat = PATTERN (insn);
23661 rtvec vec = XVEC (pat, 0);
23662 int len = GET_NUM_ELEM (vec) - 1;
23664 /* Strip off the last entry of the parallel. */
23665 gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC);
23666 gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER);
23668 pat = RTVEC_ELT (vec, 0);
23670 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0)));
23672 emit_insn (gen_avx_vzeroupper (vzeroupper));
23673 emit_call_insn (pat);
23676 /* Output the assembly for a call instruction. */
23679 ix86_output_call_insn (rtx insn, rtx call_op)
23681 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
23682 bool seh_nop_p = false;
23685 if (SIBLING_CALL_P (insn))
23689 /* SEH epilogue detection requires the indirect branch case
23690 to include REX.W. */
23691 else if (TARGET_SEH)
23692 xasm = "rex.W jmp %A0";
23696 output_asm_insn (xasm, &call_op);
23700 /* SEH unwinding can require an extra nop to be emitted in several
23701 circumstances. Determine if we have one of those. */
23706 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
23708 /* If we get to another real insn, we don't need the nop. */
23712 /* If we get to the epilogue note, prevent a catch region from
23713 being adjacent to the standard epilogue sequence. If non-
23714 call-exceptions, we'll have done this during epilogue emission. */
23715 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
23716 && !flag_non_call_exceptions
23717 && !can_throw_internal (insn))
23724 /* If we didn't find a real insn following the call, prevent the
23725 unwinder from looking into the next function. */
23731 xasm = "call\t%P0";
23733 xasm = "call\t%A0";
23735 output_asm_insn (xasm, &call_op);
23743 /* Clear stack slot assignments remembered from previous functions.
23744 This is called from INIT_EXPANDERS once before RTL is emitted for each
23747 static struct machine_function *
23748 ix86_init_machine_status (void)
23750 struct machine_function *f;
23752 f = ggc_alloc_cleared_machine_function ();
23753 f->use_fast_prologue_epilogue_nregs = -1;
23754 f->tls_descriptor_call_expanded_p = 0;
23755 f->call_abi = ix86_abi;
23760 /* Return a MEM corresponding to a stack slot with mode MODE.
23761 Allocate a new slot if necessary.
23763 The RTL for a function can have several slots available: N is
23764 which slot to use. */
23767 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
23769 struct stack_local_entry *s;
23771 gcc_assert (n < MAX_386_STACK_LOCALS);
23773 /* Virtual slot is valid only before vregs are instantiated. */
23774 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
23776 for (s = ix86_stack_locals; s; s = s->next)
23777 if (s->mode == mode && s->n == n)
23778 return validize_mem (copy_rtx (s->rtl));
23780 s = ggc_alloc_stack_local_entry ();
23783 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
23785 s->next = ix86_stack_locals;
23786 ix86_stack_locals = s;
23787 return validize_mem (s->rtl);
23790 /* Calculate the length of the memory address in the instruction encoding.
23791 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23792 or other prefixes. We never generate addr32 prefix for LEA insn. */
23795 memory_address_length (rtx addr, bool lea)
23797 struct ix86_address parts;
23798 rtx base, index, disp;
23802 if (GET_CODE (addr) == PRE_DEC
23803 || GET_CODE (addr) == POST_INC
23804 || GET_CODE (addr) == PRE_MODIFY
23805 || GET_CODE (addr) == POST_MODIFY)
23808 ok = ix86_decompose_address (addr, &parts);
23811 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
23813 /* If this is not LEA instruction, add the length of addr32 prefix. */
23814 if (TARGET_64BIT && !lea
23815 && (SImode_address_operand (addr, VOIDmode)
23816 || (parts.base && GET_MODE (parts.base) == SImode)
23817 || (parts.index && GET_MODE (parts.index) == SImode)))
23821 index = parts.index;
23824 if (base && GET_CODE (base) == SUBREG)
23825 base = SUBREG_REG (base);
23826 if (index && GET_CODE (index) == SUBREG)
23827 index = SUBREG_REG (index);
23829 gcc_assert (base == NULL_RTX || REG_P (base));
23830 gcc_assert (index == NULL_RTX || REG_P (index));
23833 - esp as the base always wants an index,
23834 - ebp as the base always wants a displacement,
23835 - r12 as the base always wants an index,
23836 - r13 as the base always wants a displacement. */
23838 /* Register Indirect. */
23839 if (base && !index && !disp)
23841 /* esp (for its index) and ebp (for its displacement) need
23842 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23844 if (base == arg_pointer_rtx
23845 || base == frame_pointer_rtx
23846 || REGNO (base) == SP_REG
23847 || REGNO (base) == BP_REG
23848 || REGNO (base) == R12_REG
23849 || REGNO (base) == R13_REG)
23853 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23854 is not disp32, but disp32(%rip), so for disp32
23855 SIB byte is needed, unless print_operand_address
23856 optimizes it into disp32(%rip) or (%rip) is implied
23858 else if (disp && !base && !index)
23865 if (GET_CODE (disp) == CONST)
23866 symbol = XEXP (disp, 0);
23867 if (GET_CODE (symbol) == PLUS
23868 && CONST_INT_P (XEXP (symbol, 1)))
23869 symbol = XEXP (symbol, 0);
23871 if (GET_CODE (symbol) != LABEL_REF
23872 && (GET_CODE (symbol) != SYMBOL_REF
23873 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
23874 && (GET_CODE (symbol) != UNSPEC
23875 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
23876 && XINT (symbol, 1) != UNSPEC_PCREL
23877 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
23883 /* Find the length of the displacement constant. */
23886 if (base && satisfies_constraint_K (disp))
23891 /* ebp always wants a displacement. Similarly r13. */
23892 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
23895 /* An index requires the two-byte modrm form.... */
23897 /* ...like esp (or r12), which always wants an index. */
23898 || base == arg_pointer_rtx
23899 || base == frame_pointer_rtx
23900 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
23907 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23908 is set, expect that insn have 8bit immediate alternative. */
23910 ix86_attr_length_immediate_default (rtx insn, bool shortform)
23914 extract_insn_cached (insn);
23915 for (i = recog_data.n_operands - 1; i >= 0; --i)
23916 if (CONSTANT_P (recog_data.operand[i]))
23918 enum attr_mode mode = get_attr_mode (insn);
23921 if (shortform && CONST_INT_P (recog_data.operand[i]))
23923 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
23930 ival = trunc_int_for_mode (ival, HImode);
23933 ival = trunc_int_for_mode (ival, SImode);
23938 if (IN_RANGE (ival, -128, 127))
23955 /* Immediates for DImode instructions are encoded
23956 as 32bit sign extended values. */
23961 fatal_insn ("unknown insn mode", insn);
23967 /* Compute default value for "length_address" attribute. */
23969 ix86_attr_length_address_default (rtx insn)
23973 if (get_attr_type (insn) == TYPE_LEA)
23975 rtx set = PATTERN (insn), addr;
23977 if (GET_CODE (set) == PARALLEL)
23978 set = XVECEXP (set, 0, 0);
23980 gcc_assert (GET_CODE (set) == SET);
23982 addr = SET_SRC (set);
23984 return memory_address_length (addr, true);
23987 extract_insn_cached (insn);
23988 for (i = recog_data.n_operands - 1; i >= 0; --i)
23989 if (MEM_P (recog_data.operand[i]))
23991 constrain_operands_cached (reload_completed);
23992 if (which_alternative != -1)
23994 const char *constraints = recog_data.constraints[i];
23995 int alt = which_alternative;
23997 while (*constraints == '=' || *constraints == '+')
24000 while (*constraints++ != ',')
24002 /* Skip ignored operands. */
24003 if (*constraints == 'X')
24006 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
24011 /* Compute default value for "length_vex" attribute. It includes
24012 2 or 3 byte VEX prefix and 1 opcode byte. */
24015 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
24019 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
24020 byte VEX prefix. */
24021 if (!has_0f_opcode || has_vex_w)
24024 /* We can always use 2 byte VEX prefix in 32bit. */
24028 extract_insn_cached (insn);
24030 for (i = recog_data.n_operands - 1; i >= 0; --i)
24031 if (REG_P (recog_data.operand[i]))
24033 /* REX.W bit uses 3 byte VEX prefix. */
24034 if (GET_MODE (recog_data.operand[i]) == DImode
24035 && GENERAL_REG_P (recog_data.operand[i]))
24040 /* REX.X or REX.B bits use 3 byte VEX prefix. */
24041 if (MEM_P (recog_data.operand[i])
24042 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
24049 /* Return the maximum number of instructions a cpu can issue. */
24052 ix86_issue_rate (void)
24056 case PROCESSOR_PENTIUM:
24057 case PROCESSOR_ATOM:
24059 case PROCESSOR_BTVER2:
24062 case PROCESSOR_PENTIUMPRO:
24063 case PROCESSOR_PENTIUM4:
24064 case PROCESSOR_CORE2_32:
24065 case PROCESSOR_CORE2_64:
24066 case PROCESSOR_COREI7_32:
24067 case PROCESSOR_COREI7_64:
24068 case PROCESSOR_ATHLON:
24070 case PROCESSOR_AMDFAM10:
24071 case PROCESSOR_NOCONA:
24072 case PROCESSOR_GENERIC32:
24073 case PROCESSOR_GENERIC64:
24074 case PROCESSOR_BDVER1:
24075 case PROCESSOR_BDVER2:
24076 case PROCESSOR_BTVER1:
24084 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
24085 by DEP_INSN and nothing set by DEP_INSN. */
24088 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
24092 /* Simplify the test for uninteresting insns. */
24093 if (insn_type != TYPE_SETCC
24094 && insn_type != TYPE_ICMOV
24095 && insn_type != TYPE_FCMOV
24096 && insn_type != TYPE_IBR)
24099 if ((set = single_set (dep_insn)) != 0)
24101 set = SET_DEST (set);
24104 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
24105 && XVECLEN (PATTERN (dep_insn), 0) == 2
24106 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
24107 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
24109 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
24110 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
24115 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
24118 /* This test is true if the dependent insn reads the flags but
24119 not any other potentially set register. */
24120 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
24123 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
24129 /* Return true iff USE_INSN has a memory address with operands set by
24133 ix86_agi_dependent (rtx set_insn, rtx use_insn)
24136 extract_insn_cached (use_insn);
24137 for (i = recog_data.n_operands - 1; i >= 0; --i)
24138 if (MEM_P (recog_data.operand[i]))
24140 rtx addr = XEXP (recog_data.operand[i], 0);
24141 return modified_in_p (addr, set_insn) != 0;
24147 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
24149 enum attr_type insn_type, dep_insn_type;
24150 enum attr_memory memory;
24152 int dep_insn_code_number;
24154 /* Anti and output dependencies have zero cost on all CPUs. */
24155 if (REG_NOTE_KIND (link) != 0)
24158 dep_insn_code_number = recog_memoized (dep_insn);
24160 /* If we can't recognize the insns, we can't really do anything. */
24161 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
24164 insn_type = get_attr_type (insn);
24165 dep_insn_type = get_attr_type (dep_insn);
24169 case PROCESSOR_PENTIUM:
24170 /* Address Generation Interlock adds a cycle of latency. */
24171 if (insn_type == TYPE_LEA)
24173 rtx addr = PATTERN (insn);
24175 if (GET_CODE (addr) == PARALLEL)
24176 addr = XVECEXP (addr, 0, 0);
24178 gcc_assert (GET_CODE (addr) == SET);
24180 addr = SET_SRC (addr);
24181 if (modified_in_p (addr, dep_insn))
24184 else if (ix86_agi_dependent (dep_insn, insn))
24187 /* ??? Compares pair with jump/setcc. */
24188 if (ix86_flags_dependent (insn, dep_insn, insn_type))
24191 /* Floating point stores require value to be ready one cycle earlier. */
24192 if (insn_type == TYPE_FMOV
24193 && get_attr_memory (insn) == MEMORY_STORE
24194 && !ix86_agi_dependent (dep_insn, insn))
24198 case PROCESSOR_PENTIUMPRO:
24199 memory = get_attr_memory (insn);
24201 /* INT->FP conversion is expensive. */
24202 if (get_attr_fp_int_src (dep_insn))
24205 /* There is one cycle extra latency between an FP op and a store. */
24206 if (insn_type == TYPE_FMOV
24207 && (set = single_set (dep_insn)) != NULL_RTX
24208 && (set2 = single_set (insn)) != NULL_RTX
24209 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
24210 && MEM_P (SET_DEST (set2)))
24213 /* Show ability of reorder buffer to hide latency of load by executing
24214 in parallel with previous instruction in case
24215 previous instruction is not needed to compute the address. */
24216 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
24217 && !ix86_agi_dependent (dep_insn, insn))
24219 /* Claim moves to take one cycle, as core can issue one load
24220 at time and the next load can start cycle later. */
24221 if (dep_insn_type == TYPE_IMOV
24222 || dep_insn_type == TYPE_FMOV)
24230 memory = get_attr_memory (insn);
24232 /* The esp dependency is resolved before the instruction is really
24234 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
24235 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
24238 /* INT->FP conversion is expensive. */
24239 if (get_attr_fp_int_src (dep_insn))
24242 /* Show ability of reorder buffer to hide latency of load by executing
24243 in parallel with previous instruction in case
24244 previous instruction is not needed to compute the address. */
24245 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
24246 && !ix86_agi_dependent (dep_insn, insn))
24248 /* Claim moves to take one cycle, as core can issue one load
24249 at time and the next load can start cycle later. */
24250 if (dep_insn_type == TYPE_IMOV
24251 || dep_insn_type == TYPE_FMOV)
24260 case PROCESSOR_ATHLON:
24262 case PROCESSOR_AMDFAM10:
24263 case PROCESSOR_BDVER1:
24264 case PROCESSOR_BDVER2:
24265 case PROCESSOR_BTVER1:
24266 case PROCESSOR_BTVER2:
24267 case PROCESSOR_ATOM:
24268 case PROCESSOR_GENERIC32:
24269 case PROCESSOR_GENERIC64:
24270 memory = get_attr_memory (insn);
24272 /* Show ability of reorder buffer to hide latency of load by executing
24273 in parallel with previous instruction in case
24274 previous instruction is not needed to compute the address. */
24275 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
24276 && !ix86_agi_dependent (dep_insn, insn))
24278 enum attr_unit unit = get_attr_unit (insn);
24281 /* Because of the difference between the length of integer and
24282 floating unit pipeline preparation stages, the memory operands
24283 for floating point are cheaper.
24285 ??? For Athlon it the difference is most probably 2. */
24286 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
24289 loadcost = TARGET_ATHLON ? 2 : 0;
24291 if (cost >= loadcost)
24304 /* How many alternative schedules to try. This should be as wide as the
24305 scheduling freedom in the DFA, but no wider. Making this value too
24306 large results extra work for the scheduler. */
24309 ia32_multipass_dfa_lookahead (void)
24313 case PROCESSOR_PENTIUM:
24316 case PROCESSOR_PENTIUMPRO:
24320 case PROCESSOR_CORE2_32:
24321 case PROCESSOR_CORE2_64:
24322 case PROCESSOR_COREI7_32:
24323 case PROCESSOR_COREI7_64:
24324 case PROCESSOR_ATOM:
24325 /* Generally, we want haifa-sched:max_issue() to look ahead as far
24326 as many instructions can be executed on a cycle, i.e.,
24327 issue_rate. I wonder why tuning for many CPUs does not do this. */
24328 if (reload_completed)
24329 return ix86_issue_rate ();
24330 /* Don't use lookahead for pre-reload schedule to save compile time. */
24338 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
24339 execution. It is applied if
24340 (1) IMUL instruction is on the top of list;
24341 (2) There exists the only producer of independent IMUL instruction in
24343 (3) Put found producer on the top of ready list.
24344 Returns issue rate. */
24347 ix86_sched_reorder(FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
24348 int clock_var ATTRIBUTE_UNUSED)
24350 static int issue_rate = -1;
24351 int n_ready = *pn_ready;
24352 rtx insn, insn1, insn2;
24354 sd_iterator_def sd_it;
24358 /* Set up issue rate. */
24359 issue_rate = ix86_issue_rate();
24361 /* Do reodering for Atom only. */
24362 if (ix86_tune != PROCESSOR_ATOM)
24364 /* Do not perform ready list reodering for pre-reload schedule pass. */
24365 if (!reload_completed)
24367 /* Nothing to do if ready list contains only 1 instruction. */
24371 /* Check that IMUL instruction is on the top of ready list. */
24372 insn = ready[n_ready - 1];
24373 if (!NONDEBUG_INSN_P (insn))
24375 insn = PATTERN (insn);
24376 if (GET_CODE (insn) == PARALLEL)
24377 insn = XVECEXP (insn, 0, 0);
24378 if (GET_CODE (insn) != SET)
24380 if (!(GET_CODE (SET_SRC (insn)) == MULT
24381 && GET_MODE (SET_SRC (insn)) == SImode))
24384 /* Search for producer of independent IMUL instruction. */
24385 for (i = n_ready - 2; i>= 0; i--)
24388 if (!NONDEBUG_INSN_P (insn))
24390 /* Skip IMUL instruction. */
24391 insn2 = PATTERN (insn);
24392 if (GET_CODE (insn2) == PARALLEL)
24393 insn2 = XVECEXP (insn2, 0, 0);
24394 if (GET_CODE (insn2) == SET
24395 && GET_CODE (SET_SRC (insn2)) == MULT
24396 && GET_MODE (SET_SRC (insn2)) == SImode)
24399 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
24402 con = DEP_CON (dep);
24403 if (!NONDEBUG_INSN_P (con))
24405 insn1 = PATTERN (con);
24406 if (GET_CODE (insn1) == PARALLEL)
24407 insn1 = XVECEXP (insn1, 0, 0);
24409 if (GET_CODE (insn1) == SET
24410 && GET_CODE (SET_SRC (insn1)) == MULT
24411 && GET_MODE (SET_SRC (insn1)) == SImode)
24413 sd_iterator_def sd_it1;
24415 /* Check if there is no other dependee for IMUL. */
24417 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
24420 pro = DEP_PRO (dep1);
24421 if (!NONDEBUG_INSN_P (pro))
24434 return issue_rate; /* Didn't find IMUL producer. */
24436 if (sched_verbose > 1)
24437 fprintf(dump, ";;\tatom sched_reorder: swap %d and %d insns\n",
24438 INSN_UID (ready[index]), INSN_UID (ready[n_ready - 1]));
24440 /* Put IMUL producer (ready[index]) at the top of ready list. */
24441 insn1= ready[index];
24442 for (i = index; i < n_ready - 1; i++)
24443 ready[i] = ready[i + 1];
24444 ready[n_ready - 1] = insn1;
24450 ix86_class_likely_spilled_p (reg_class_t);
24452 /* Returns true if lhs of insn is HW function argument register and set up
24453 is_spilled to true if it is likely spilled HW register. */
24455 insn_is_function_arg (rtx insn, bool* is_spilled)
24459 if (!NONDEBUG_INSN_P (insn))
24461 /* Call instructions are not movable, ignore it. */
24464 insn = PATTERN (insn);
24465 if (GET_CODE (insn) == PARALLEL)
24466 insn = XVECEXP (insn, 0, 0);
24467 if (GET_CODE (insn) != SET)
24469 dst = SET_DEST (insn);
24470 if (REG_P (dst) && HARD_REGISTER_P (dst)
24471 && ix86_function_arg_regno_p (REGNO (dst)))
24473 /* Is it likely spilled HW register? */
24474 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
24475 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
24476 *is_spilled = true;
24482 /* Add output dependencies for chain of function adjacent arguments if only
24483 there is a move to likely spilled HW register. Return first argument
24484 if at least one dependence was added or NULL otherwise. */
24486 add_parameter_dependencies (rtx call, rtx head)
24490 rtx first_arg = NULL;
24491 bool is_spilled = false;
24493 head = PREV_INSN (head);
24495 /* Find nearest to call argument passing instruction. */
24498 last = PREV_INSN (last);
24501 if (!NONDEBUG_INSN_P (last))
24503 if (insn_is_function_arg (last, &is_spilled))
24511 insn = PREV_INSN (last);
24512 if (!INSN_P (insn))
24516 if (!NONDEBUG_INSN_P (insn))
24521 if (insn_is_function_arg (insn, &is_spilled))
24523 /* Add output depdendence between two function arguments if chain
24524 of output arguments contains likely spilled HW registers. */
24526 add_dependence (last, insn, REG_DEP_OUTPUT);
24527 first_arg = last = insn;
24537 /* Add output or anti dependency from insn to first_arg to restrict its code
24540 avoid_func_arg_motion (rtx first_arg, rtx insn)
24545 set = single_set (insn);
24548 tmp = SET_DEST (set);
24551 /* Add output dependency to the first function argument. */
24552 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
24555 /* Add anti dependency. */
24556 add_dependence (first_arg, insn, REG_DEP_ANTI);
24559 /* Avoid cross block motion of function argument through adding dependency
24560 from the first non-jump instruction in bb. */
24562 add_dependee_for_func_arg (rtx arg, basic_block bb)
24564 rtx insn = BB_END (bb);
24568 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
24570 rtx set = single_set (insn);
24573 avoid_func_arg_motion (arg, insn);
24577 if (insn == BB_HEAD (bb))
24579 insn = PREV_INSN (insn);
24583 /* Hook for pre-reload schedule - avoid motion of function arguments
24584 passed in likely spilled HW registers. */
24586 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
24589 rtx first_arg = NULL;
24590 if (reload_completed)
24592 while (head != tail && DEBUG_INSN_P (head))
24593 head = NEXT_INSN (head);
24594 for (insn = tail; insn != head; insn = PREV_INSN (insn))
24595 if (INSN_P (insn) && CALL_P (insn))
24597 first_arg = add_parameter_dependencies (insn, head);
24600 /* Add dependee for first argument to predecessors if only
24601 region contains more than one block. */
24602 basic_block bb = BLOCK_FOR_INSN (insn);
24603 int rgn = CONTAINING_RGN (bb->index);
24604 int nr_blks = RGN_NR_BLOCKS (rgn);
24605 /* Skip trivial regions and region head blocks that can have
24606 predecessors outside of region. */
24607 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
24611 /* Assume that region is SCC, i.e. all immediate predecessors
24612 of non-head block are in the same region. */
24613 FOR_EACH_EDGE (e, ei, bb->preds)
24615 /* Avoid creating of loop-carried dependencies through
24616 using topological odering in region. */
24617 if (BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
24618 add_dependee_for_func_arg (first_arg, e->src);
24626 else if (first_arg)
24627 avoid_func_arg_motion (first_arg, insn);
24630 /* Hook for pre-reload schedule - set priority of moves from likely spilled
24631 HW registers to maximum, to schedule them at soon as possible. These are
24632 moves from function argument registers at the top of the function entry
24633 and moves from function return value registers after call. */
24635 ix86_adjust_priority (rtx insn, int priority)
24639 if (reload_completed)
24642 if (!NONDEBUG_INSN_P (insn))
24645 set = single_set (insn);
24648 rtx tmp = SET_SRC (set);
24650 && HARD_REGISTER_P (tmp)
24651 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
24652 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
24653 return current_sched_info->sched_max_insns_priority;
24659 /* Model decoder of Core 2/i7.
24660 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
24661 track the instruction fetch block boundaries and make sure that long
24662 (9+ bytes) instructions are assigned to D0. */
24664 /* Maximum length of an insn that can be handled by
24665 a secondary decoder unit. '8' for Core 2/i7. */
24666 static int core2i7_secondary_decoder_max_insn_size;
24668 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
24669 '16' for Core 2/i7. */
24670 static int core2i7_ifetch_block_size;
24672 /* Maximum number of instructions decoder can handle per cycle.
24673 '6' for Core 2/i7. */
24674 static int core2i7_ifetch_block_max_insns;
24676 typedef struct ix86_first_cycle_multipass_data_ *
24677 ix86_first_cycle_multipass_data_t;
24678 typedef const struct ix86_first_cycle_multipass_data_ *
24679 const_ix86_first_cycle_multipass_data_t;
24681 /* A variable to store target state across calls to max_issue within
24683 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
24684 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
24686 /* Initialize DATA. */
24688 core2i7_first_cycle_multipass_init (void *_data)
24690 ix86_first_cycle_multipass_data_t data
24691 = (ix86_first_cycle_multipass_data_t) _data;
24693 data->ifetch_block_len = 0;
24694 data->ifetch_block_n_insns = 0;
24695 data->ready_try_change = NULL;
24696 data->ready_try_change_size = 0;
24699 /* Advancing the cycle; reset ifetch block counts. */
24701 core2i7_dfa_post_advance_cycle (void)
24703 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
24705 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
24707 data->ifetch_block_len = 0;
24708 data->ifetch_block_n_insns = 0;
24711 static int min_insn_size (rtx);
24713 /* Filter out insns from ready_try that the core will not be able to issue
24714 on current cycle due to decoder. */
24716 core2i7_first_cycle_multipass_filter_ready_try
24717 (const_ix86_first_cycle_multipass_data_t data,
24718 char *ready_try, int n_ready, bool first_cycle_insn_p)
24725 if (ready_try[n_ready])
24728 insn = get_ready_element (n_ready);
24729 insn_size = min_insn_size (insn);
24731 if (/* If this is a too long an insn for a secondary decoder ... */
24732 (!first_cycle_insn_p
24733 && insn_size > core2i7_secondary_decoder_max_insn_size)
24734 /* ... or it would not fit into the ifetch block ... */
24735 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
24736 /* ... or the decoder is full already ... */
24737 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
24738 /* ... mask the insn out. */
24740 ready_try[n_ready] = 1;
24742 if (data->ready_try_change)
24743 SET_BIT (data->ready_try_change, n_ready);
24748 /* Prepare for a new round of multipass lookahead scheduling. */
24750 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
24751 bool first_cycle_insn_p)
24753 ix86_first_cycle_multipass_data_t data
24754 = (ix86_first_cycle_multipass_data_t) _data;
24755 const_ix86_first_cycle_multipass_data_t prev_data
24756 = ix86_first_cycle_multipass_data;
24758 /* Restore the state from the end of the previous round. */
24759 data->ifetch_block_len = prev_data->ifetch_block_len;
24760 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
24762 /* Filter instructions that cannot be issued on current cycle due to
24763 decoder restrictions. */
24764 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
24765 first_cycle_insn_p);
24768 /* INSN is being issued in current solution. Account for its impact on
24769 the decoder model. */
24771 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
24772 rtx insn, const void *_prev_data)
24774 ix86_first_cycle_multipass_data_t data
24775 = (ix86_first_cycle_multipass_data_t) _data;
24776 const_ix86_first_cycle_multipass_data_t prev_data
24777 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
24779 int insn_size = min_insn_size (insn);
24781 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
24782 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
24783 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
24784 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
24786 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
24787 if (!data->ready_try_change)
24789 data->ready_try_change = sbitmap_alloc (n_ready);
24790 data->ready_try_change_size = n_ready;
24792 else if (data->ready_try_change_size < n_ready)
24794 data->ready_try_change = sbitmap_resize (data->ready_try_change,
24796 data->ready_try_change_size = n_ready;
24798 bitmap_clear (data->ready_try_change);
24800 /* Filter out insns from ready_try that the core will not be able to issue
24801 on current cycle due to decoder. */
24802 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
24806 /* Revert the effect on ready_try. */
24808 core2i7_first_cycle_multipass_backtrack (const void *_data,
24810 int n_ready ATTRIBUTE_UNUSED)
24812 const_ix86_first_cycle_multipass_data_t data
24813 = (const_ix86_first_cycle_multipass_data_t) _data;
24814 unsigned int i = 0;
24815 sbitmap_iterator sbi;
24817 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
24818 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
24824 /* Save the result of multipass lookahead scheduling for the next round. */
24826 core2i7_first_cycle_multipass_end (const void *_data)
24828 const_ix86_first_cycle_multipass_data_t data
24829 = (const_ix86_first_cycle_multipass_data_t) _data;
24830 ix86_first_cycle_multipass_data_t next_data
24831 = ix86_first_cycle_multipass_data;
24835 next_data->ifetch_block_len = data->ifetch_block_len;
24836 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
24840 /* Deallocate target data. */
24842 core2i7_first_cycle_multipass_fini (void *_data)
24844 ix86_first_cycle_multipass_data_t data
24845 = (ix86_first_cycle_multipass_data_t) _data;
24847 if (data->ready_try_change)
24849 sbitmap_free (data->ready_try_change);
24850 data->ready_try_change = NULL;
24851 data->ready_try_change_size = 0;
24855 /* Prepare for scheduling pass. */
24857 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
24858 int verbose ATTRIBUTE_UNUSED,
24859 int max_uid ATTRIBUTE_UNUSED)
24861 /* Install scheduling hooks for current CPU. Some of these hooks are used
24862 in time-critical parts of the scheduler, so we only set them up when
24863 they are actually used. */
24866 case PROCESSOR_CORE2_32:
24867 case PROCESSOR_CORE2_64:
24868 case PROCESSOR_COREI7_32:
24869 case PROCESSOR_COREI7_64:
24870 /* Do not perform multipass scheduling for pre-reload schedule
24871 to save compile time. */
24872 if (reload_completed)
24874 targetm.sched.dfa_post_advance_cycle
24875 = core2i7_dfa_post_advance_cycle;
24876 targetm.sched.first_cycle_multipass_init
24877 = core2i7_first_cycle_multipass_init;
24878 targetm.sched.first_cycle_multipass_begin
24879 = core2i7_first_cycle_multipass_begin;
24880 targetm.sched.first_cycle_multipass_issue
24881 = core2i7_first_cycle_multipass_issue;
24882 targetm.sched.first_cycle_multipass_backtrack
24883 = core2i7_first_cycle_multipass_backtrack;
24884 targetm.sched.first_cycle_multipass_end
24885 = core2i7_first_cycle_multipass_end;
24886 targetm.sched.first_cycle_multipass_fini
24887 = core2i7_first_cycle_multipass_fini;
24889 /* Set decoder parameters. */
24890 core2i7_secondary_decoder_max_insn_size = 8;
24891 core2i7_ifetch_block_size = 16;
24892 core2i7_ifetch_block_max_insns = 6;
24895 /* ... Fall through ... */
24897 targetm.sched.dfa_post_advance_cycle = NULL;
24898 targetm.sched.first_cycle_multipass_init = NULL;
24899 targetm.sched.first_cycle_multipass_begin = NULL;
24900 targetm.sched.first_cycle_multipass_issue = NULL;
24901 targetm.sched.first_cycle_multipass_backtrack = NULL;
24902 targetm.sched.first_cycle_multipass_end = NULL;
24903 targetm.sched.first_cycle_multipass_fini = NULL;
24909 /* Compute the alignment given to a constant that is being placed in memory.
24910 EXP is the constant and ALIGN is the alignment that the object would
24912 The value of this function is used instead of that alignment to align
24916 ix86_constant_alignment (tree exp, int align)
24918 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
24919 || TREE_CODE (exp) == INTEGER_CST)
24921 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
24923 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
24926 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
24927 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
24928 return BITS_PER_WORD;
24933 /* Compute the alignment for a static variable.
24934 TYPE is the data type, and ALIGN is the alignment that
24935 the object would ordinarily have. The value of this function is used
24936 instead of that alignment to align the object. */
24939 ix86_data_alignment (tree type, int align)
24941 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
24943 if (AGGREGATE_TYPE_P (type)
24944 && TYPE_SIZE (type)
24945 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
24946 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
24947 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
24948 && align < max_align)
24951 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24952 to 16byte boundary. */
24955 if (AGGREGATE_TYPE_P (type)
24956 && TYPE_SIZE (type)
24957 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
24958 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
24959 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
24963 if (TREE_CODE (type) == ARRAY_TYPE)
24965 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
24967 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
24970 else if (TREE_CODE (type) == COMPLEX_TYPE)
24973 if (TYPE_MODE (type) == DCmode && align < 64)
24975 if ((TYPE_MODE (type) == XCmode
24976 || TYPE_MODE (type) == TCmode) && align < 128)
24979 else if ((TREE_CODE (type) == RECORD_TYPE
24980 || TREE_CODE (type) == UNION_TYPE
24981 || TREE_CODE (type) == QUAL_UNION_TYPE)
24982 && TYPE_FIELDS (type))
24984 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
24986 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
24989 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
24990 || TREE_CODE (type) == INTEGER_TYPE)
24992 if (TYPE_MODE (type) == DFmode && align < 64)
24994 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
25001 /* Compute the alignment for a local variable or a stack slot. EXP is
25002 the data type or decl itself, MODE is the widest mode available and
25003 ALIGN is the alignment that the object would ordinarily have. The
25004 value of this macro is used instead of that alignment to align the
25008 ix86_local_alignment (tree exp, enum machine_mode mode,
25009 unsigned int align)
25013 if (exp && DECL_P (exp))
25015 type = TREE_TYPE (exp);
25024 /* Don't do dynamic stack realignment for long long objects with
25025 -mpreferred-stack-boundary=2. */
25028 && ix86_preferred_stack_boundary < 64
25029 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
25030 && (!type || !TYPE_USER_ALIGN (type))
25031 && (!decl || !DECL_USER_ALIGN (decl)))
25034 /* If TYPE is NULL, we are allocating a stack slot for caller-save
25035 register in MODE. We will return the largest alignment of XF
25039 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
25040 align = GET_MODE_ALIGNMENT (DFmode);
25044 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
25045 to 16byte boundary. Exact wording is:
25047 An array uses the same alignment as its elements, except that a local or
25048 global array variable of length at least 16 bytes or
25049 a C99 variable-length array variable always has alignment of at least 16 bytes.
25051 This was added to allow use of aligned SSE instructions at arrays. This
25052 rule is meant for static storage (where compiler can not do the analysis
25053 by itself). We follow it for automatic variables only when convenient.
25054 We fully control everything in the function compiled and functions from
25055 other unit can not rely on the alignment.
25057 Exclude va_list type. It is the common case of local array where
25058 we can not benefit from the alignment. */
25059 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
25062 if (AGGREGATE_TYPE_P (type)
25063 && (va_list_type_node == NULL_TREE
25064 || (TYPE_MAIN_VARIANT (type)
25065 != TYPE_MAIN_VARIANT (va_list_type_node)))
25066 && TYPE_SIZE (type)
25067 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
25068 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
25069 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
25072 if (TREE_CODE (type) == ARRAY_TYPE)
25074 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
25076 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
25079 else if (TREE_CODE (type) == COMPLEX_TYPE)
25081 if (TYPE_MODE (type) == DCmode && align < 64)
25083 if ((TYPE_MODE (type) == XCmode
25084 || TYPE_MODE (type) == TCmode) && align < 128)
25087 else if ((TREE_CODE (type) == RECORD_TYPE
25088 || TREE_CODE (type) == UNION_TYPE
25089 || TREE_CODE (type) == QUAL_UNION_TYPE)
25090 && TYPE_FIELDS (type))
25092 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
25094 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
25097 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
25098 || TREE_CODE (type) == INTEGER_TYPE)
25101 if (TYPE_MODE (type) == DFmode && align < 64)
25103 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
25109 /* Compute the minimum required alignment for dynamic stack realignment
25110 purposes for a local variable, parameter or a stack slot. EXP is
25111 the data type or decl itself, MODE is its mode and ALIGN is the
25112 alignment that the object would ordinarily have. */
25115 ix86_minimum_alignment (tree exp, enum machine_mode mode,
25116 unsigned int align)
25120 if (exp && DECL_P (exp))
25122 type = TREE_TYPE (exp);
25131 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
25134 /* Don't do dynamic stack realignment for long long objects with
25135 -mpreferred-stack-boundary=2. */
25136 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
25137 && (!type || !TYPE_USER_ALIGN (type))
25138 && (!decl || !DECL_USER_ALIGN (decl)))
25144 /* Find a location for the static chain incoming to a nested function.
25145 This is a register, unless all free registers are used by arguments. */
25148 ix86_static_chain (const_tree fndecl, bool incoming_p)
25152 if (!DECL_STATIC_CHAIN (fndecl))
25157 /* We always use R10 in 64-bit mode. */
25165 /* By default in 32-bit mode we use ECX to pass the static chain. */
25168 fntype = TREE_TYPE (fndecl);
25169 ccvt = ix86_get_callcvt (fntype);
25170 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
25172 /* Fastcall functions use ecx/edx for arguments, which leaves
25173 us with EAX for the static chain.
25174 Thiscall functions use ecx for arguments, which also
25175 leaves us with EAX for the static chain. */
25178 else if (ix86_function_regparm (fntype, fndecl) == 3)
25180 /* For regparm 3, we have no free call-clobbered registers in
25181 which to store the static chain. In order to implement this,
25182 we have the trampoline push the static chain to the stack.
25183 However, we can't push a value below the return address when
25184 we call the nested function directly, so we have to use an
25185 alternate entry point. For this we use ESI, and have the
25186 alternate entry point push ESI, so that things appear the
25187 same once we're executing the nested function. */
25190 if (fndecl == current_function_decl)
25191 ix86_static_chain_on_stack = true;
25192 return gen_frame_mem (SImode,
25193 plus_constant (Pmode,
25194 arg_pointer_rtx, -8));
25200 return gen_rtx_REG (Pmode, regno);
25203 /* Emit RTL insns to initialize the variable parts of a trampoline.
25204 FNDECL is the decl of the target address; M_TRAMP is a MEM for
25205 the trampoline, and CHAIN_VALUE is an RTX for the static chain
25206 to be passed to the target function. */
25209 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
25215 fnaddr = XEXP (DECL_RTL (fndecl), 0);
25221 /* Load the function address to r11. Try to load address using
25222 the shorter movl instead of movabs. We may want to support
25223 movq for kernel mode, but kernel does not use trampolines at
25224 the moment. FNADDR is a 32bit address and may not be in
25225 DImode when ptr_mode == SImode. Always use movl in this
25227 if (ptr_mode == SImode
25228 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
25230 fnaddr = copy_addr_to_reg (fnaddr);
25232 mem = adjust_address (m_tramp, HImode, offset);
25233 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
25235 mem = adjust_address (m_tramp, SImode, offset + 2);
25236 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
25241 mem = adjust_address (m_tramp, HImode, offset);
25242 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
25244 mem = adjust_address (m_tramp, DImode, offset + 2);
25245 emit_move_insn (mem, fnaddr);
25249 /* Load static chain using movabs to r10. Use the shorter movl
25250 instead of movabs when ptr_mode == SImode. */
25251 if (ptr_mode == SImode)
25262 mem = adjust_address (m_tramp, HImode, offset);
25263 emit_move_insn (mem, gen_int_mode (opcode, HImode));
25265 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
25266 emit_move_insn (mem, chain_value);
25269 /* Jump to r11; the last (unused) byte is a nop, only there to
25270 pad the write out to a single 32-bit store. */
25271 mem = adjust_address (m_tramp, SImode, offset);
25272 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
25279 /* Depending on the static chain location, either load a register
25280 with a constant, or push the constant to the stack. All of the
25281 instructions are the same size. */
25282 chain = ix86_static_chain (fndecl, true);
25285 switch (REGNO (chain))
25288 opcode = 0xb8; break;
25290 opcode = 0xb9; break;
25292 gcc_unreachable ();
25298 mem = adjust_address (m_tramp, QImode, offset);
25299 emit_move_insn (mem, gen_int_mode (opcode, QImode));
25301 mem = adjust_address (m_tramp, SImode, offset + 1);
25302 emit_move_insn (mem, chain_value);
25305 mem = adjust_address (m_tramp, QImode, offset);
25306 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
25308 mem = adjust_address (m_tramp, SImode, offset + 1);
25310 /* Compute offset from the end of the jmp to the target function.
25311 In the case in which the trampoline stores the static chain on
25312 the stack, we need to skip the first insn which pushes the
25313 (call-saved) register static chain; this push is 1 byte. */
25315 disp = expand_binop (SImode, sub_optab, fnaddr,
25316 plus_constant (Pmode, XEXP (m_tramp, 0),
25317 offset - (MEM_P (chain) ? 1 : 0)),
25318 NULL_RTX, 1, OPTAB_DIRECT);
25319 emit_move_insn (mem, disp);
25322 gcc_assert (offset <= TRAMPOLINE_SIZE);
25324 #ifdef HAVE_ENABLE_EXECUTE_STACK
25325 #ifdef CHECK_EXECUTE_STACK_ENABLED
25326 if (CHECK_EXECUTE_STACK_ENABLED)
25328 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
25329 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
25333 /* The following file contains several enumerations and data structures
25334 built from the definitions in i386-builtin-types.def. */
25336 #include "i386-builtin-types.inc"
25338 /* Table for the ix86 builtin non-function types. */
25339 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
25341 /* Retrieve an element from the above table, building some of
25342 the types lazily. */
25345 ix86_get_builtin_type (enum ix86_builtin_type tcode)
25347 unsigned int index;
25350 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
25352 type = ix86_builtin_type_tab[(int) tcode];
25356 gcc_assert (tcode > IX86_BT_LAST_PRIM);
25357 if (tcode <= IX86_BT_LAST_VECT)
25359 enum machine_mode mode;
25361 index = tcode - IX86_BT_LAST_PRIM - 1;
25362 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
25363 mode = ix86_builtin_type_vect_mode[index];
25365 type = build_vector_type_for_mode (itype, mode);
25371 index = tcode - IX86_BT_LAST_VECT - 1;
25372 if (tcode <= IX86_BT_LAST_PTR)
25373 quals = TYPE_UNQUALIFIED;
25375 quals = TYPE_QUAL_CONST;
25377 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
25378 if (quals != TYPE_UNQUALIFIED)
25379 itype = build_qualified_type (itype, quals);
25381 type = build_pointer_type (itype);
25384 ix86_builtin_type_tab[(int) tcode] = type;
25388 /* Table for the ix86 builtin function types. */
25389 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
25391 /* Retrieve an element from the above table, building some of
25392 the types lazily. */
25395 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
25399 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
25401 type = ix86_builtin_func_type_tab[(int) tcode];
25405 if (tcode <= IX86_BT_LAST_FUNC)
25407 unsigned start = ix86_builtin_func_start[(int) tcode];
25408 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
25409 tree rtype, atype, args = void_list_node;
25412 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
25413 for (i = after - 1; i > start; --i)
25415 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
25416 args = tree_cons (NULL, atype, args);
25419 type = build_function_type (rtype, args);
25423 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
25424 enum ix86_builtin_func_type icode;
25426 icode = ix86_builtin_func_alias_base[index];
25427 type = ix86_get_builtin_func_type (icode);
25430 ix86_builtin_func_type_tab[(int) tcode] = type;
25435 /* Codes for all the SSE/MMX builtins. */
25438 IX86_BUILTIN_ADDPS,
25439 IX86_BUILTIN_ADDSS,
25440 IX86_BUILTIN_DIVPS,
25441 IX86_BUILTIN_DIVSS,
25442 IX86_BUILTIN_MULPS,
25443 IX86_BUILTIN_MULSS,
25444 IX86_BUILTIN_SUBPS,
25445 IX86_BUILTIN_SUBSS,
25447 IX86_BUILTIN_CMPEQPS,
25448 IX86_BUILTIN_CMPLTPS,
25449 IX86_BUILTIN_CMPLEPS,
25450 IX86_BUILTIN_CMPGTPS,
25451 IX86_BUILTIN_CMPGEPS,
25452 IX86_BUILTIN_CMPNEQPS,
25453 IX86_BUILTIN_CMPNLTPS,
25454 IX86_BUILTIN_CMPNLEPS,
25455 IX86_BUILTIN_CMPNGTPS,
25456 IX86_BUILTIN_CMPNGEPS,
25457 IX86_BUILTIN_CMPORDPS,
25458 IX86_BUILTIN_CMPUNORDPS,
25459 IX86_BUILTIN_CMPEQSS,
25460 IX86_BUILTIN_CMPLTSS,
25461 IX86_BUILTIN_CMPLESS,
25462 IX86_BUILTIN_CMPNEQSS,
25463 IX86_BUILTIN_CMPNLTSS,
25464 IX86_BUILTIN_CMPNLESS,
25465 IX86_BUILTIN_CMPNGTSS,
25466 IX86_BUILTIN_CMPNGESS,
25467 IX86_BUILTIN_CMPORDSS,
25468 IX86_BUILTIN_CMPUNORDSS,
25470 IX86_BUILTIN_COMIEQSS,
25471 IX86_BUILTIN_COMILTSS,
25472 IX86_BUILTIN_COMILESS,
25473 IX86_BUILTIN_COMIGTSS,
25474 IX86_BUILTIN_COMIGESS,
25475 IX86_BUILTIN_COMINEQSS,
25476 IX86_BUILTIN_UCOMIEQSS,
25477 IX86_BUILTIN_UCOMILTSS,
25478 IX86_BUILTIN_UCOMILESS,
25479 IX86_BUILTIN_UCOMIGTSS,
25480 IX86_BUILTIN_UCOMIGESS,
25481 IX86_BUILTIN_UCOMINEQSS,
25483 IX86_BUILTIN_CVTPI2PS,
25484 IX86_BUILTIN_CVTPS2PI,
25485 IX86_BUILTIN_CVTSI2SS,
25486 IX86_BUILTIN_CVTSI642SS,
25487 IX86_BUILTIN_CVTSS2SI,
25488 IX86_BUILTIN_CVTSS2SI64,
25489 IX86_BUILTIN_CVTTPS2PI,
25490 IX86_BUILTIN_CVTTSS2SI,
25491 IX86_BUILTIN_CVTTSS2SI64,
25493 IX86_BUILTIN_MAXPS,
25494 IX86_BUILTIN_MAXSS,
25495 IX86_BUILTIN_MINPS,
25496 IX86_BUILTIN_MINSS,
25498 IX86_BUILTIN_LOADUPS,
25499 IX86_BUILTIN_STOREUPS,
25500 IX86_BUILTIN_MOVSS,
25502 IX86_BUILTIN_MOVHLPS,
25503 IX86_BUILTIN_MOVLHPS,
25504 IX86_BUILTIN_LOADHPS,
25505 IX86_BUILTIN_LOADLPS,
25506 IX86_BUILTIN_STOREHPS,
25507 IX86_BUILTIN_STORELPS,
25509 IX86_BUILTIN_MASKMOVQ,
25510 IX86_BUILTIN_MOVMSKPS,
25511 IX86_BUILTIN_PMOVMSKB,
25513 IX86_BUILTIN_MOVNTPS,
25514 IX86_BUILTIN_MOVNTQ,
25516 IX86_BUILTIN_LOADDQU,
25517 IX86_BUILTIN_STOREDQU,
25519 IX86_BUILTIN_PACKSSWB,
25520 IX86_BUILTIN_PACKSSDW,
25521 IX86_BUILTIN_PACKUSWB,
25523 IX86_BUILTIN_PADDB,
25524 IX86_BUILTIN_PADDW,
25525 IX86_BUILTIN_PADDD,
25526 IX86_BUILTIN_PADDQ,
25527 IX86_BUILTIN_PADDSB,
25528 IX86_BUILTIN_PADDSW,
25529 IX86_BUILTIN_PADDUSB,
25530 IX86_BUILTIN_PADDUSW,
25531 IX86_BUILTIN_PSUBB,
25532 IX86_BUILTIN_PSUBW,
25533 IX86_BUILTIN_PSUBD,
25534 IX86_BUILTIN_PSUBQ,
25535 IX86_BUILTIN_PSUBSB,
25536 IX86_BUILTIN_PSUBSW,
25537 IX86_BUILTIN_PSUBUSB,
25538 IX86_BUILTIN_PSUBUSW,
25541 IX86_BUILTIN_PANDN,
25545 IX86_BUILTIN_PAVGB,
25546 IX86_BUILTIN_PAVGW,
25548 IX86_BUILTIN_PCMPEQB,
25549 IX86_BUILTIN_PCMPEQW,
25550 IX86_BUILTIN_PCMPEQD,
25551 IX86_BUILTIN_PCMPGTB,
25552 IX86_BUILTIN_PCMPGTW,
25553 IX86_BUILTIN_PCMPGTD,
25555 IX86_BUILTIN_PMADDWD,
25557 IX86_BUILTIN_PMAXSW,
25558 IX86_BUILTIN_PMAXUB,
25559 IX86_BUILTIN_PMINSW,
25560 IX86_BUILTIN_PMINUB,
25562 IX86_BUILTIN_PMULHUW,
25563 IX86_BUILTIN_PMULHW,
25564 IX86_BUILTIN_PMULLW,
25566 IX86_BUILTIN_PSADBW,
25567 IX86_BUILTIN_PSHUFW,
25569 IX86_BUILTIN_PSLLW,
25570 IX86_BUILTIN_PSLLD,
25571 IX86_BUILTIN_PSLLQ,
25572 IX86_BUILTIN_PSRAW,
25573 IX86_BUILTIN_PSRAD,
25574 IX86_BUILTIN_PSRLW,
25575 IX86_BUILTIN_PSRLD,
25576 IX86_BUILTIN_PSRLQ,
25577 IX86_BUILTIN_PSLLWI,
25578 IX86_BUILTIN_PSLLDI,
25579 IX86_BUILTIN_PSLLQI,
25580 IX86_BUILTIN_PSRAWI,
25581 IX86_BUILTIN_PSRADI,
25582 IX86_BUILTIN_PSRLWI,
25583 IX86_BUILTIN_PSRLDI,
25584 IX86_BUILTIN_PSRLQI,
25586 IX86_BUILTIN_PUNPCKHBW,
25587 IX86_BUILTIN_PUNPCKHWD,
25588 IX86_BUILTIN_PUNPCKHDQ,
25589 IX86_BUILTIN_PUNPCKLBW,
25590 IX86_BUILTIN_PUNPCKLWD,
25591 IX86_BUILTIN_PUNPCKLDQ,
25593 IX86_BUILTIN_SHUFPS,
25595 IX86_BUILTIN_RCPPS,
25596 IX86_BUILTIN_RCPSS,
25597 IX86_BUILTIN_RSQRTPS,
25598 IX86_BUILTIN_RSQRTPS_NR,
25599 IX86_BUILTIN_RSQRTSS,
25600 IX86_BUILTIN_RSQRTF,
25601 IX86_BUILTIN_SQRTPS,
25602 IX86_BUILTIN_SQRTPS_NR,
25603 IX86_BUILTIN_SQRTSS,
25605 IX86_BUILTIN_UNPCKHPS,
25606 IX86_BUILTIN_UNPCKLPS,
25608 IX86_BUILTIN_ANDPS,
25609 IX86_BUILTIN_ANDNPS,
25611 IX86_BUILTIN_XORPS,
25614 IX86_BUILTIN_LDMXCSR,
25615 IX86_BUILTIN_STMXCSR,
25616 IX86_BUILTIN_SFENCE,
25618 IX86_BUILTIN_FXSAVE,
25619 IX86_BUILTIN_FXRSTOR,
25620 IX86_BUILTIN_FXSAVE64,
25621 IX86_BUILTIN_FXRSTOR64,
25623 IX86_BUILTIN_XSAVE,
25624 IX86_BUILTIN_XRSTOR,
25625 IX86_BUILTIN_XSAVE64,
25626 IX86_BUILTIN_XRSTOR64,
25628 IX86_BUILTIN_XSAVEOPT,
25629 IX86_BUILTIN_XSAVEOPT64,
25631 /* 3DNow! Original */
25632 IX86_BUILTIN_FEMMS,
25633 IX86_BUILTIN_PAVGUSB,
25634 IX86_BUILTIN_PF2ID,
25635 IX86_BUILTIN_PFACC,
25636 IX86_BUILTIN_PFADD,
25637 IX86_BUILTIN_PFCMPEQ,
25638 IX86_BUILTIN_PFCMPGE,
25639 IX86_BUILTIN_PFCMPGT,
25640 IX86_BUILTIN_PFMAX,
25641 IX86_BUILTIN_PFMIN,
25642 IX86_BUILTIN_PFMUL,
25643 IX86_BUILTIN_PFRCP,
25644 IX86_BUILTIN_PFRCPIT1,
25645 IX86_BUILTIN_PFRCPIT2,
25646 IX86_BUILTIN_PFRSQIT1,
25647 IX86_BUILTIN_PFRSQRT,
25648 IX86_BUILTIN_PFSUB,
25649 IX86_BUILTIN_PFSUBR,
25650 IX86_BUILTIN_PI2FD,
25651 IX86_BUILTIN_PMULHRW,
25653 /* 3DNow! Athlon Extensions */
25654 IX86_BUILTIN_PF2IW,
25655 IX86_BUILTIN_PFNACC,
25656 IX86_BUILTIN_PFPNACC,
25657 IX86_BUILTIN_PI2FW,
25658 IX86_BUILTIN_PSWAPDSI,
25659 IX86_BUILTIN_PSWAPDSF,
25662 IX86_BUILTIN_ADDPD,
25663 IX86_BUILTIN_ADDSD,
25664 IX86_BUILTIN_DIVPD,
25665 IX86_BUILTIN_DIVSD,
25666 IX86_BUILTIN_MULPD,
25667 IX86_BUILTIN_MULSD,
25668 IX86_BUILTIN_SUBPD,
25669 IX86_BUILTIN_SUBSD,
25671 IX86_BUILTIN_CMPEQPD,
25672 IX86_BUILTIN_CMPLTPD,
25673 IX86_BUILTIN_CMPLEPD,
25674 IX86_BUILTIN_CMPGTPD,
25675 IX86_BUILTIN_CMPGEPD,
25676 IX86_BUILTIN_CMPNEQPD,
25677 IX86_BUILTIN_CMPNLTPD,
25678 IX86_BUILTIN_CMPNLEPD,
25679 IX86_BUILTIN_CMPNGTPD,
25680 IX86_BUILTIN_CMPNGEPD,
25681 IX86_BUILTIN_CMPORDPD,
25682 IX86_BUILTIN_CMPUNORDPD,
25683 IX86_BUILTIN_CMPEQSD,
25684 IX86_BUILTIN_CMPLTSD,
25685 IX86_BUILTIN_CMPLESD,
25686 IX86_BUILTIN_CMPNEQSD,
25687 IX86_BUILTIN_CMPNLTSD,
25688 IX86_BUILTIN_CMPNLESD,
25689 IX86_BUILTIN_CMPORDSD,
25690 IX86_BUILTIN_CMPUNORDSD,
25692 IX86_BUILTIN_COMIEQSD,
25693 IX86_BUILTIN_COMILTSD,
25694 IX86_BUILTIN_COMILESD,
25695 IX86_BUILTIN_COMIGTSD,
25696 IX86_BUILTIN_COMIGESD,
25697 IX86_BUILTIN_COMINEQSD,
25698 IX86_BUILTIN_UCOMIEQSD,
25699 IX86_BUILTIN_UCOMILTSD,
25700 IX86_BUILTIN_UCOMILESD,
25701 IX86_BUILTIN_UCOMIGTSD,
25702 IX86_BUILTIN_UCOMIGESD,
25703 IX86_BUILTIN_UCOMINEQSD,
25705 IX86_BUILTIN_MAXPD,
25706 IX86_BUILTIN_MAXSD,
25707 IX86_BUILTIN_MINPD,
25708 IX86_BUILTIN_MINSD,
25710 IX86_BUILTIN_ANDPD,
25711 IX86_BUILTIN_ANDNPD,
25713 IX86_BUILTIN_XORPD,
25715 IX86_BUILTIN_SQRTPD,
25716 IX86_BUILTIN_SQRTSD,
25718 IX86_BUILTIN_UNPCKHPD,
25719 IX86_BUILTIN_UNPCKLPD,
25721 IX86_BUILTIN_SHUFPD,
25723 IX86_BUILTIN_LOADUPD,
25724 IX86_BUILTIN_STOREUPD,
25725 IX86_BUILTIN_MOVSD,
25727 IX86_BUILTIN_LOADHPD,
25728 IX86_BUILTIN_LOADLPD,
25730 IX86_BUILTIN_CVTDQ2PD,
25731 IX86_BUILTIN_CVTDQ2PS,
25733 IX86_BUILTIN_CVTPD2DQ,
25734 IX86_BUILTIN_CVTPD2PI,
25735 IX86_BUILTIN_CVTPD2PS,
25736 IX86_BUILTIN_CVTTPD2DQ,
25737 IX86_BUILTIN_CVTTPD2PI,
25739 IX86_BUILTIN_CVTPI2PD,
25740 IX86_BUILTIN_CVTSI2SD,
25741 IX86_BUILTIN_CVTSI642SD,
25743 IX86_BUILTIN_CVTSD2SI,
25744 IX86_BUILTIN_CVTSD2SI64,
25745 IX86_BUILTIN_CVTSD2SS,
25746 IX86_BUILTIN_CVTSS2SD,
25747 IX86_BUILTIN_CVTTSD2SI,
25748 IX86_BUILTIN_CVTTSD2SI64,
25750 IX86_BUILTIN_CVTPS2DQ,
25751 IX86_BUILTIN_CVTPS2PD,
25752 IX86_BUILTIN_CVTTPS2DQ,
25754 IX86_BUILTIN_MOVNTI,
25755 IX86_BUILTIN_MOVNTI64,
25756 IX86_BUILTIN_MOVNTPD,
25757 IX86_BUILTIN_MOVNTDQ,
25759 IX86_BUILTIN_MOVQ128,
25762 IX86_BUILTIN_MASKMOVDQU,
25763 IX86_BUILTIN_MOVMSKPD,
25764 IX86_BUILTIN_PMOVMSKB128,
25766 IX86_BUILTIN_PACKSSWB128,
25767 IX86_BUILTIN_PACKSSDW128,
25768 IX86_BUILTIN_PACKUSWB128,
25770 IX86_BUILTIN_PADDB128,
25771 IX86_BUILTIN_PADDW128,
25772 IX86_BUILTIN_PADDD128,
25773 IX86_BUILTIN_PADDQ128,
25774 IX86_BUILTIN_PADDSB128,
25775 IX86_BUILTIN_PADDSW128,
25776 IX86_BUILTIN_PADDUSB128,
25777 IX86_BUILTIN_PADDUSW128,
25778 IX86_BUILTIN_PSUBB128,
25779 IX86_BUILTIN_PSUBW128,
25780 IX86_BUILTIN_PSUBD128,
25781 IX86_BUILTIN_PSUBQ128,
25782 IX86_BUILTIN_PSUBSB128,
25783 IX86_BUILTIN_PSUBSW128,
25784 IX86_BUILTIN_PSUBUSB128,
25785 IX86_BUILTIN_PSUBUSW128,
25787 IX86_BUILTIN_PAND128,
25788 IX86_BUILTIN_PANDN128,
25789 IX86_BUILTIN_POR128,
25790 IX86_BUILTIN_PXOR128,
25792 IX86_BUILTIN_PAVGB128,
25793 IX86_BUILTIN_PAVGW128,
25795 IX86_BUILTIN_PCMPEQB128,
25796 IX86_BUILTIN_PCMPEQW128,
25797 IX86_BUILTIN_PCMPEQD128,
25798 IX86_BUILTIN_PCMPGTB128,
25799 IX86_BUILTIN_PCMPGTW128,
25800 IX86_BUILTIN_PCMPGTD128,
25802 IX86_BUILTIN_PMADDWD128,
25804 IX86_BUILTIN_PMAXSW128,
25805 IX86_BUILTIN_PMAXUB128,
25806 IX86_BUILTIN_PMINSW128,
25807 IX86_BUILTIN_PMINUB128,
25809 IX86_BUILTIN_PMULUDQ,
25810 IX86_BUILTIN_PMULUDQ128,
25811 IX86_BUILTIN_PMULHUW128,
25812 IX86_BUILTIN_PMULHW128,
25813 IX86_BUILTIN_PMULLW128,
25815 IX86_BUILTIN_PSADBW128,
25816 IX86_BUILTIN_PSHUFHW,
25817 IX86_BUILTIN_PSHUFLW,
25818 IX86_BUILTIN_PSHUFD,
25820 IX86_BUILTIN_PSLLDQI128,
25821 IX86_BUILTIN_PSLLWI128,
25822 IX86_BUILTIN_PSLLDI128,
25823 IX86_BUILTIN_PSLLQI128,
25824 IX86_BUILTIN_PSRAWI128,
25825 IX86_BUILTIN_PSRADI128,
25826 IX86_BUILTIN_PSRLDQI128,
25827 IX86_BUILTIN_PSRLWI128,
25828 IX86_BUILTIN_PSRLDI128,
25829 IX86_BUILTIN_PSRLQI128,
25831 IX86_BUILTIN_PSLLDQ128,
25832 IX86_BUILTIN_PSLLW128,
25833 IX86_BUILTIN_PSLLD128,
25834 IX86_BUILTIN_PSLLQ128,
25835 IX86_BUILTIN_PSRAW128,
25836 IX86_BUILTIN_PSRAD128,
25837 IX86_BUILTIN_PSRLW128,
25838 IX86_BUILTIN_PSRLD128,
25839 IX86_BUILTIN_PSRLQ128,
25841 IX86_BUILTIN_PUNPCKHBW128,
25842 IX86_BUILTIN_PUNPCKHWD128,
25843 IX86_BUILTIN_PUNPCKHDQ128,
25844 IX86_BUILTIN_PUNPCKHQDQ128,
25845 IX86_BUILTIN_PUNPCKLBW128,
25846 IX86_BUILTIN_PUNPCKLWD128,
25847 IX86_BUILTIN_PUNPCKLDQ128,
25848 IX86_BUILTIN_PUNPCKLQDQ128,
25850 IX86_BUILTIN_CLFLUSH,
25851 IX86_BUILTIN_MFENCE,
25852 IX86_BUILTIN_LFENCE,
25853 IX86_BUILTIN_PAUSE,
25855 IX86_BUILTIN_BSRSI,
25856 IX86_BUILTIN_BSRDI,
25857 IX86_BUILTIN_RDPMC,
25858 IX86_BUILTIN_RDTSC,
25859 IX86_BUILTIN_RDTSCP,
25860 IX86_BUILTIN_ROLQI,
25861 IX86_BUILTIN_ROLHI,
25862 IX86_BUILTIN_RORQI,
25863 IX86_BUILTIN_RORHI,
25866 IX86_BUILTIN_ADDSUBPS,
25867 IX86_BUILTIN_HADDPS,
25868 IX86_BUILTIN_HSUBPS,
25869 IX86_BUILTIN_MOVSHDUP,
25870 IX86_BUILTIN_MOVSLDUP,
25871 IX86_BUILTIN_ADDSUBPD,
25872 IX86_BUILTIN_HADDPD,
25873 IX86_BUILTIN_HSUBPD,
25874 IX86_BUILTIN_LDDQU,
25876 IX86_BUILTIN_MONITOR,
25877 IX86_BUILTIN_MWAIT,
25880 IX86_BUILTIN_PHADDW,
25881 IX86_BUILTIN_PHADDD,
25882 IX86_BUILTIN_PHADDSW,
25883 IX86_BUILTIN_PHSUBW,
25884 IX86_BUILTIN_PHSUBD,
25885 IX86_BUILTIN_PHSUBSW,
25886 IX86_BUILTIN_PMADDUBSW,
25887 IX86_BUILTIN_PMULHRSW,
25888 IX86_BUILTIN_PSHUFB,
25889 IX86_BUILTIN_PSIGNB,
25890 IX86_BUILTIN_PSIGNW,
25891 IX86_BUILTIN_PSIGND,
25892 IX86_BUILTIN_PALIGNR,
25893 IX86_BUILTIN_PABSB,
25894 IX86_BUILTIN_PABSW,
25895 IX86_BUILTIN_PABSD,
25897 IX86_BUILTIN_PHADDW128,
25898 IX86_BUILTIN_PHADDD128,
25899 IX86_BUILTIN_PHADDSW128,
25900 IX86_BUILTIN_PHSUBW128,
25901 IX86_BUILTIN_PHSUBD128,
25902 IX86_BUILTIN_PHSUBSW128,
25903 IX86_BUILTIN_PMADDUBSW128,
25904 IX86_BUILTIN_PMULHRSW128,
25905 IX86_BUILTIN_PSHUFB128,
25906 IX86_BUILTIN_PSIGNB128,
25907 IX86_BUILTIN_PSIGNW128,
25908 IX86_BUILTIN_PSIGND128,
25909 IX86_BUILTIN_PALIGNR128,
25910 IX86_BUILTIN_PABSB128,
25911 IX86_BUILTIN_PABSW128,
25912 IX86_BUILTIN_PABSD128,
25914 /* AMDFAM10 - SSE4A New Instructions. */
25915 IX86_BUILTIN_MOVNTSD,
25916 IX86_BUILTIN_MOVNTSS,
25917 IX86_BUILTIN_EXTRQI,
25918 IX86_BUILTIN_EXTRQ,
25919 IX86_BUILTIN_INSERTQI,
25920 IX86_BUILTIN_INSERTQ,
25923 IX86_BUILTIN_BLENDPD,
25924 IX86_BUILTIN_BLENDPS,
25925 IX86_BUILTIN_BLENDVPD,
25926 IX86_BUILTIN_BLENDVPS,
25927 IX86_BUILTIN_PBLENDVB128,
25928 IX86_BUILTIN_PBLENDW128,
25933 IX86_BUILTIN_INSERTPS128,
25935 IX86_BUILTIN_MOVNTDQA,
25936 IX86_BUILTIN_MPSADBW128,
25937 IX86_BUILTIN_PACKUSDW128,
25938 IX86_BUILTIN_PCMPEQQ,
25939 IX86_BUILTIN_PHMINPOSUW128,
25941 IX86_BUILTIN_PMAXSB128,
25942 IX86_BUILTIN_PMAXSD128,
25943 IX86_BUILTIN_PMAXUD128,
25944 IX86_BUILTIN_PMAXUW128,
25946 IX86_BUILTIN_PMINSB128,
25947 IX86_BUILTIN_PMINSD128,
25948 IX86_BUILTIN_PMINUD128,
25949 IX86_BUILTIN_PMINUW128,
25951 IX86_BUILTIN_PMOVSXBW128,
25952 IX86_BUILTIN_PMOVSXBD128,
25953 IX86_BUILTIN_PMOVSXBQ128,
25954 IX86_BUILTIN_PMOVSXWD128,
25955 IX86_BUILTIN_PMOVSXWQ128,
25956 IX86_BUILTIN_PMOVSXDQ128,
25958 IX86_BUILTIN_PMOVZXBW128,
25959 IX86_BUILTIN_PMOVZXBD128,
25960 IX86_BUILTIN_PMOVZXBQ128,
25961 IX86_BUILTIN_PMOVZXWD128,
25962 IX86_BUILTIN_PMOVZXWQ128,
25963 IX86_BUILTIN_PMOVZXDQ128,
25965 IX86_BUILTIN_PMULDQ128,
25966 IX86_BUILTIN_PMULLD128,
25968 IX86_BUILTIN_ROUNDSD,
25969 IX86_BUILTIN_ROUNDSS,
25971 IX86_BUILTIN_ROUNDPD,
25972 IX86_BUILTIN_ROUNDPS,
25974 IX86_BUILTIN_FLOORPD,
25975 IX86_BUILTIN_CEILPD,
25976 IX86_BUILTIN_TRUNCPD,
25977 IX86_BUILTIN_RINTPD,
25978 IX86_BUILTIN_ROUNDPD_AZ,
25980 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
25981 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
25982 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
25984 IX86_BUILTIN_FLOORPS,
25985 IX86_BUILTIN_CEILPS,
25986 IX86_BUILTIN_TRUNCPS,
25987 IX86_BUILTIN_RINTPS,
25988 IX86_BUILTIN_ROUNDPS_AZ,
25990 IX86_BUILTIN_FLOORPS_SFIX,
25991 IX86_BUILTIN_CEILPS_SFIX,
25992 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
25994 IX86_BUILTIN_PTESTZ,
25995 IX86_BUILTIN_PTESTC,
25996 IX86_BUILTIN_PTESTNZC,
25998 IX86_BUILTIN_VEC_INIT_V2SI,
25999 IX86_BUILTIN_VEC_INIT_V4HI,
26000 IX86_BUILTIN_VEC_INIT_V8QI,
26001 IX86_BUILTIN_VEC_EXT_V2DF,
26002 IX86_BUILTIN_VEC_EXT_V2DI,
26003 IX86_BUILTIN_VEC_EXT_V4SF,
26004 IX86_BUILTIN_VEC_EXT_V4SI,
26005 IX86_BUILTIN_VEC_EXT_V8HI,
26006 IX86_BUILTIN_VEC_EXT_V2SI,
26007 IX86_BUILTIN_VEC_EXT_V4HI,
26008 IX86_BUILTIN_VEC_EXT_V16QI,
26009 IX86_BUILTIN_VEC_SET_V2DI,
26010 IX86_BUILTIN_VEC_SET_V4SF,
26011 IX86_BUILTIN_VEC_SET_V4SI,
26012 IX86_BUILTIN_VEC_SET_V8HI,
26013 IX86_BUILTIN_VEC_SET_V4HI,
26014 IX86_BUILTIN_VEC_SET_V16QI,
26016 IX86_BUILTIN_VEC_PACK_SFIX,
26017 IX86_BUILTIN_VEC_PACK_SFIX256,
26020 IX86_BUILTIN_CRC32QI,
26021 IX86_BUILTIN_CRC32HI,
26022 IX86_BUILTIN_CRC32SI,
26023 IX86_BUILTIN_CRC32DI,
26025 IX86_BUILTIN_PCMPESTRI128,
26026 IX86_BUILTIN_PCMPESTRM128,
26027 IX86_BUILTIN_PCMPESTRA128,
26028 IX86_BUILTIN_PCMPESTRC128,
26029 IX86_BUILTIN_PCMPESTRO128,
26030 IX86_BUILTIN_PCMPESTRS128,
26031 IX86_BUILTIN_PCMPESTRZ128,
26032 IX86_BUILTIN_PCMPISTRI128,
26033 IX86_BUILTIN_PCMPISTRM128,
26034 IX86_BUILTIN_PCMPISTRA128,
26035 IX86_BUILTIN_PCMPISTRC128,
26036 IX86_BUILTIN_PCMPISTRO128,
26037 IX86_BUILTIN_PCMPISTRS128,
26038 IX86_BUILTIN_PCMPISTRZ128,
26040 IX86_BUILTIN_PCMPGTQ,
26042 /* AES instructions */
26043 IX86_BUILTIN_AESENC128,
26044 IX86_BUILTIN_AESENCLAST128,
26045 IX86_BUILTIN_AESDEC128,
26046 IX86_BUILTIN_AESDECLAST128,
26047 IX86_BUILTIN_AESIMC128,
26048 IX86_BUILTIN_AESKEYGENASSIST128,
26050 /* PCLMUL instruction */
26051 IX86_BUILTIN_PCLMULQDQ128,
26054 IX86_BUILTIN_ADDPD256,
26055 IX86_BUILTIN_ADDPS256,
26056 IX86_BUILTIN_ADDSUBPD256,
26057 IX86_BUILTIN_ADDSUBPS256,
26058 IX86_BUILTIN_ANDPD256,
26059 IX86_BUILTIN_ANDPS256,
26060 IX86_BUILTIN_ANDNPD256,
26061 IX86_BUILTIN_ANDNPS256,
26062 IX86_BUILTIN_BLENDPD256,
26063 IX86_BUILTIN_BLENDPS256,
26064 IX86_BUILTIN_BLENDVPD256,
26065 IX86_BUILTIN_BLENDVPS256,
26066 IX86_BUILTIN_DIVPD256,
26067 IX86_BUILTIN_DIVPS256,
26068 IX86_BUILTIN_DPPS256,
26069 IX86_BUILTIN_HADDPD256,
26070 IX86_BUILTIN_HADDPS256,
26071 IX86_BUILTIN_HSUBPD256,
26072 IX86_BUILTIN_HSUBPS256,
26073 IX86_BUILTIN_MAXPD256,
26074 IX86_BUILTIN_MAXPS256,
26075 IX86_BUILTIN_MINPD256,
26076 IX86_BUILTIN_MINPS256,
26077 IX86_BUILTIN_MULPD256,
26078 IX86_BUILTIN_MULPS256,
26079 IX86_BUILTIN_ORPD256,
26080 IX86_BUILTIN_ORPS256,
26081 IX86_BUILTIN_SHUFPD256,
26082 IX86_BUILTIN_SHUFPS256,
26083 IX86_BUILTIN_SUBPD256,
26084 IX86_BUILTIN_SUBPS256,
26085 IX86_BUILTIN_XORPD256,
26086 IX86_BUILTIN_XORPS256,
26087 IX86_BUILTIN_CMPSD,
26088 IX86_BUILTIN_CMPSS,
26089 IX86_BUILTIN_CMPPD,
26090 IX86_BUILTIN_CMPPS,
26091 IX86_BUILTIN_CMPPD256,
26092 IX86_BUILTIN_CMPPS256,
26093 IX86_BUILTIN_CVTDQ2PD256,
26094 IX86_BUILTIN_CVTDQ2PS256,
26095 IX86_BUILTIN_CVTPD2PS256,
26096 IX86_BUILTIN_CVTPS2DQ256,
26097 IX86_BUILTIN_CVTPS2PD256,
26098 IX86_BUILTIN_CVTTPD2DQ256,
26099 IX86_BUILTIN_CVTPD2DQ256,
26100 IX86_BUILTIN_CVTTPS2DQ256,
26101 IX86_BUILTIN_EXTRACTF128PD256,
26102 IX86_BUILTIN_EXTRACTF128PS256,
26103 IX86_BUILTIN_EXTRACTF128SI256,
26104 IX86_BUILTIN_VZEROALL,
26105 IX86_BUILTIN_VZEROUPPER,
26106 IX86_BUILTIN_VPERMILVARPD,
26107 IX86_BUILTIN_VPERMILVARPS,
26108 IX86_BUILTIN_VPERMILVARPD256,
26109 IX86_BUILTIN_VPERMILVARPS256,
26110 IX86_BUILTIN_VPERMILPD,
26111 IX86_BUILTIN_VPERMILPS,
26112 IX86_BUILTIN_VPERMILPD256,
26113 IX86_BUILTIN_VPERMILPS256,
26114 IX86_BUILTIN_VPERMIL2PD,
26115 IX86_BUILTIN_VPERMIL2PS,
26116 IX86_BUILTIN_VPERMIL2PD256,
26117 IX86_BUILTIN_VPERMIL2PS256,
26118 IX86_BUILTIN_VPERM2F128PD256,
26119 IX86_BUILTIN_VPERM2F128PS256,
26120 IX86_BUILTIN_VPERM2F128SI256,
26121 IX86_BUILTIN_VBROADCASTSS,
26122 IX86_BUILTIN_VBROADCASTSD256,
26123 IX86_BUILTIN_VBROADCASTSS256,
26124 IX86_BUILTIN_VBROADCASTPD256,
26125 IX86_BUILTIN_VBROADCASTPS256,
26126 IX86_BUILTIN_VINSERTF128PD256,
26127 IX86_BUILTIN_VINSERTF128PS256,
26128 IX86_BUILTIN_VINSERTF128SI256,
26129 IX86_BUILTIN_LOADUPD256,
26130 IX86_BUILTIN_LOADUPS256,
26131 IX86_BUILTIN_STOREUPD256,
26132 IX86_BUILTIN_STOREUPS256,
26133 IX86_BUILTIN_LDDQU256,
26134 IX86_BUILTIN_MOVNTDQ256,
26135 IX86_BUILTIN_MOVNTPD256,
26136 IX86_BUILTIN_MOVNTPS256,
26137 IX86_BUILTIN_LOADDQU256,
26138 IX86_BUILTIN_STOREDQU256,
26139 IX86_BUILTIN_MASKLOADPD,
26140 IX86_BUILTIN_MASKLOADPS,
26141 IX86_BUILTIN_MASKSTOREPD,
26142 IX86_BUILTIN_MASKSTOREPS,
26143 IX86_BUILTIN_MASKLOADPD256,
26144 IX86_BUILTIN_MASKLOADPS256,
26145 IX86_BUILTIN_MASKSTOREPD256,
26146 IX86_BUILTIN_MASKSTOREPS256,
26147 IX86_BUILTIN_MOVSHDUP256,
26148 IX86_BUILTIN_MOVSLDUP256,
26149 IX86_BUILTIN_MOVDDUP256,
26151 IX86_BUILTIN_SQRTPD256,
26152 IX86_BUILTIN_SQRTPS256,
26153 IX86_BUILTIN_SQRTPS_NR256,
26154 IX86_BUILTIN_RSQRTPS256,
26155 IX86_BUILTIN_RSQRTPS_NR256,
26157 IX86_BUILTIN_RCPPS256,
26159 IX86_BUILTIN_ROUNDPD256,
26160 IX86_BUILTIN_ROUNDPS256,
26162 IX86_BUILTIN_FLOORPD256,
26163 IX86_BUILTIN_CEILPD256,
26164 IX86_BUILTIN_TRUNCPD256,
26165 IX86_BUILTIN_RINTPD256,
26166 IX86_BUILTIN_ROUNDPD_AZ256,
26168 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
26169 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
26170 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
26172 IX86_BUILTIN_FLOORPS256,
26173 IX86_BUILTIN_CEILPS256,
26174 IX86_BUILTIN_TRUNCPS256,
26175 IX86_BUILTIN_RINTPS256,
26176 IX86_BUILTIN_ROUNDPS_AZ256,
26178 IX86_BUILTIN_FLOORPS_SFIX256,
26179 IX86_BUILTIN_CEILPS_SFIX256,
26180 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
26182 IX86_BUILTIN_UNPCKHPD256,
26183 IX86_BUILTIN_UNPCKLPD256,
26184 IX86_BUILTIN_UNPCKHPS256,
26185 IX86_BUILTIN_UNPCKLPS256,
26187 IX86_BUILTIN_SI256_SI,
26188 IX86_BUILTIN_PS256_PS,
26189 IX86_BUILTIN_PD256_PD,
26190 IX86_BUILTIN_SI_SI256,
26191 IX86_BUILTIN_PS_PS256,
26192 IX86_BUILTIN_PD_PD256,
26194 IX86_BUILTIN_VTESTZPD,
26195 IX86_BUILTIN_VTESTCPD,
26196 IX86_BUILTIN_VTESTNZCPD,
26197 IX86_BUILTIN_VTESTZPS,
26198 IX86_BUILTIN_VTESTCPS,
26199 IX86_BUILTIN_VTESTNZCPS,
26200 IX86_BUILTIN_VTESTZPD256,
26201 IX86_BUILTIN_VTESTCPD256,
26202 IX86_BUILTIN_VTESTNZCPD256,
26203 IX86_BUILTIN_VTESTZPS256,
26204 IX86_BUILTIN_VTESTCPS256,
26205 IX86_BUILTIN_VTESTNZCPS256,
26206 IX86_BUILTIN_PTESTZ256,
26207 IX86_BUILTIN_PTESTC256,
26208 IX86_BUILTIN_PTESTNZC256,
26210 IX86_BUILTIN_MOVMSKPD256,
26211 IX86_BUILTIN_MOVMSKPS256,
26214 IX86_BUILTIN_MPSADBW256,
26215 IX86_BUILTIN_PABSB256,
26216 IX86_BUILTIN_PABSW256,
26217 IX86_BUILTIN_PABSD256,
26218 IX86_BUILTIN_PACKSSDW256,
26219 IX86_BUILTIN_PACKSSWB256,
26220 IX86_BUILTIN_PACKUSDW256,
26221 IX86_BUILTIN_PACKUSWB256,
26222 IX86_BUILTIN_PADDB256,
26223 IX86_BUILTIN_PADDW256,
26224 IX86_BUILTIN_PADDD256,
26225 IX86_BUILTIN_PADDQ256,
26226 IX86_BUILTIN_PADDSB256,
26227 IX86_BUILTIN_PADDSW256,
26228 IX86_BUILTIN_PADDUSB256,
26229 IX86_BUILTIN_PADDUSW256,
26230 IX86_BUILTIN_PALIGNR256,
26231 IX86_BUILTIN_AND256I,
26232 IX86_BUILTIN_ANDNOT256I,
26233 IX86_BUILTIN_PAVGB256,
26234 IX86_BUILTIN_PAVGW256,
26235 IX86_BUILTIN_PBLENDVB256,
26236 IX86_BUILTIN_PBLENDVW256,
26237 IX86_BUILTIN_PCMPEQB256,
26238 IX86_BUILTIN_PCMPEQW256,
26239 IX86_BUILTIN_PCMPEQD256,
26240 IX86_BUILTIN_PCMPEQQ256,
26241 IX86_BUILTIN_PCMPGTB256,
26242 IX86_BUILTIN_PCMPGTW256,
26243 IX86_BUILTIN_PCMPGTD256,
26244 IX86_BUILTIN_PCMPGTQ256,
26245 IX86_BUILTIN_PHADDW256,
26246 IX86_BUILTIN_PHADDD256,
26247 IX86_BUILTIN_PHADDSW256,
26248 IX86_BUILTIN_PHSUBW256,
26249 IX86_BUILTIN_PHSUBD256,
26250 IX86_BUILTIN_PHSUBSW256,
26251 IX86_BUILTIN_PMADDUBSW256,
26252 IX86_BUILTIN_PMADDWD256,
26253 IX86_BUILTIN_PMAXSB256,
26254 IX86_BUILTIN_PMAXSW256,
26255 IX86_BUILTIN_PMAXSD256,
26256 IX86_BUILTIN_PMAXUB256,
26257 IX86_BUILTIN_PMAXUW256,
26258 IX86_BUILTIN_PMAXUD256,
26259 IX86_BUILTIN_PMINSB256,
26260 IX86_BUILTIN_PMINSW256,
26261 IX86_BUILTIN_PMINSD256,
26262 IX86_BUILTIN_PMINUB256,
26263 IX86_BUILTIN_PMINUW256,
26264 IX86_BUILTIN_PMINUD256,
26265 IX86_BUILTIN_PMOVMSKB256,
26266 IX86_BUILTIN_PMOVSXBW256,
26267 IX86_BUILTIN_PMOVSXBD256,
26268 IX86_BUILTIN_PMOVSXBQ256,
26269 IX86_BUILTIN_PMOVSXWD256,
26270 IX86_BUILTIN_PMOVSXWQ256,
26271 IX86_BUILTIN_PMOVSXDQ256,
26272 IX86_BUILTIN_PMOVZXBW256,
26273 IX86_BUILTIN_PMOVZXBD256,
26274 IX86_BUILTIN_PMOVZXBQ256,
26275 IX86_BUILTIN_PMOVZXWD256,
26276 IX86_BUILTIN_PMOVZXWQ256,
26277 IX86_BUILTIN_PMOVZXDQ256,
26278 IX86_BUILTIN_PMULDQ256,
26279 IX86_BUILTIN_PMULHRSW256,
26280 IX86_BUILTIN_PMULHUW256,
26281 IX86_BUILTIN_PMULHW256,
26282 IX86_BUILTIN_PMULLW256,
26283 IX86_BUILTIN_PMULLD256,
26284 IX86_BUILTIN_PMULUDQ256,
26285 IX86_BUILTIN_POR256,
26286 IX86_BUILTIN_PSADBW256,
26287 IX86_BUILTIN_PSHUFB256,
26288 IX86_BUILTIN_PSHUFD256,
26289 IX86_BUILTIN_PSHUFHW256,
26290 IX86_BUILTIN_PSHUFLW256,
26291 IX86_BUILTIN_PSIGNB256,
26292 IX86_BUILTIN_PSIGNW256,
26293 IX86_BUILTIN_PSIGND256,
26294 IX86_BUILTIN_PSLLDQI256,
26295 IX86_BUILTIN_PSLLWI256,
26296 IX86_BUILTIN_PSLLW256,
26297 IX86_BUILTIN_PSLLDI256,
26298 IX86_BUILTIN_PSLLD256,
26299 IX86_BUILTIN_PSLLQI256,
26300 IX86_BUILTIN_PSLLQ256,
26301 IX86_BUILTIN_PSRAWI256,
26302 IX86_BUILTIN_PSRAW256,
26303 IX86_BUILTIN_PSRADI256,
26304 IX86_BUILTIN_PSRAD256,
26305 IX86_BUILTIN_PSRLDQI256,
26306 IX86_BUILTIN_PSRLWI256,
26307 IX86_BUILTIN_PSRLW256,
26308 IX86_BUILTIN_PSRLDI256,
26309 IX86_BUILTIN_PSRLD256,
26310 IX86_BUILTIN_PSRLQI256,
26311 IX86_BUILTIN_PSRLQ256,
26312 IX86_BUILTIN_PSUBB256,
26313 IX86_BUILTIN_PSUBW256,
26314 IX86_BUILTIN_PSUBD256,
26315 IX86_BUILTIN_PSUBQ256,
26316 IX86_BUILTIN_PSUBSB256,
26317 IX86_BUILTIN_PSUBSW256,
26318 IX86_BUILTIN_PSUBUSB256,
26319 IX86_BUILTIN_PSUBUSW256,
26320 IX86_BUILTIN_PUNPCKHBW256,
26321 IX86_BUILTIN_PUNPCKHWD256,
26322 IX86_BUILTIN_PUNPCKHDQ256,
26323 IX86_BUILTIN_PUNPCKHQDQ256,
26324 IX86_BUILTIN_PUNPCKLBW256,
26325 IX86_BUILTIN_PUNPCKLWD256,
26326 IX86_BUILTIN_PUNPCKLDQ256,
26327 IX86_BUILTIN_PUNPCKLQDQ256,
26328 IX86_BUILTIN_PXOR256,
26329 IX86_BUILTIN_MOVNTDQA256,
26330 IX86_BUILTIN_VBROADCASTSS_PS,
26331 IX86_BUILTIN_VBROADCASTSS_PS256,
26332 IX86_BUILTIN_VBROADCASTSD_PD256,
26333 IX86_BUILTIN_VBROADCASTSI256,
26334 IX86_BUILTIN_PBLENDD256,
26335 IX86_BUILTIN_PBLENDD128,
26336 IX86_BUILTIN_PBROADCASTB256,
26337 IX86_BUILTIN_PBROADCASTW256,
26338 IX86_BUILTIN_PBROADCASTD256,
26339 IX86_BUILTIN_PBROADCASTQ256,
26340 IX86_BUILTIN_PBROADCASTB128,
26341 IX86_BUILTIN_PBROADCASTW128,
26342 IX86_BUILTIN_PBROADCASTD128,
26343 IX86_BUILTIN_PBROADCASTQ128,
26344 IX86_BUILTIN_VPERMVARSI256,
26345 IX86_BUILTIN_VPERMDF256,
26346 IX86_BUILTIN_VPERMVARSF256,
26347 IX86_BUILTIN_VPERMDI256,
26348 IX86_BUILTIN_VPERMTI256,
26349 IX86_BUILTIN_VEXTRACT128I256,
26350 IX86_BUILTIN_VINSERT128I256,
26351 IX86_BUILTIN_MASKLOADD,
26352 IX86_BUILTIN_MASKLOADQ,
26353 IX86_BUILTIN_MASKLOADD256,
26354 IX86_BUILTIN_MASKLOADQ256,
26355 IX86_BUILTIN_MASKSTORED,
26356 IX86_BUILTIN_MASKSTOREQ,
26357 IX86_BUILTIN_MASKSTORED256,
26358 IX86_BUILTIN_MASKSTOREQ256,
26359 IX86_BUILTIN_PSLLVV4DI,
26360 IX86_BUILTIN_PSLLVV2DI,
26361 IX86_BUILTIN_PSLLVV8SI,
26362 IX86_BUILTIN_PSLLVV4SI,
26363 IX86_BUILTIN_PSRAVV8SI,
26364 IX86_BUILTIN_PSRAVV4SI,
26365 IX86_BUILTIN_PSRLVV4DI,
26366 IX86_BUILTIN_PSRLVV2DI,
26367 IX86_BUILTIN_PSRLVV8SI,
26368 IX86_BUILTIN_PSRLVV4SI,
26370 IX86_BUILTIN_GATHERSIV2DF,
26371 IX86_BUILTIN_GATHERSIV4DF,
26372 IX86_BUILTIN_GATHERDIV2DF,
26373 IX86_BUILTIN_GATHERDIV4DF,
26374 IX86_BUILTIN_GATHERSIV4SF,
26375 IX86_BUILTIN_GATHERSIV8SF,
26376 IX86_BUILTIN_GATHERDIV4SF,
26377 IX86_BUILTIN_GATHERDIV8SF,
26378 IX86_BUILTIN_GATHERSIV2DI,
26379 IX86_BUILTIN_GATHERSIV4DI,
26380 IX86_BUILTIN_GATHERDIV2DI,
26381 IX86_BUILTIN_GATHERDIV4DI,
26382 IX86_BUILTIN_GATHERSIV4SI,
26383 IX86_BUILTIN_GATHERSIV8SI,
26384 IX86_BUILTIN_GATHERDIV4SI,
26385 IX86_BUILTIN_GATHERDIV8SI,
26387 /* Alternate 4 element gather for the vectorizer where
26388 all operands are 32-byte wide. */
26389 IX86_BUILTIN_GATHERALTSIV4DF,
26390 IX86_BUILTIN_GATHERALTDIV8SF,
26391 IX86_BUILTIN_GATHERALTSIV4DI,
26392 IX86_BUILTIN_GATHERALTDIV8SI,
26394 /* TFmode support builtins. */
26396 IX86_BUILTIN_HUGE_VALQ,
26397 IX86_BUILTIN_FABSQ,
26398 IX86_BUILTIN_COPYSIGNQ,
26400 /* Vectorizer support builtins. */
26401 IX86_BUILTIN_CPYSGNPS,
26402 IX86_BUILTIN_CPYSGNPD,
26403 IX86_BUILTIN_CPYSGNPS256,
26404 IX86_BUILTIN_CPYSGNPD256,
26406 /* FMA4 instructions. */
26407 IX86_BUILTIN_VFMADDSS,
26408 IX86_BUILTIN_VFMADDSD,
26409 IX86_BUILTIN_VFMADDPS,
26410 IX86_BUILTIN_VFMADDPD,
26411 IX86_BUILTIN_VFMADDPS256,
26412 IX86_BUILTIN_VFMADDPD256,
26413 IX86_BUILTIN_VFMADDSUBPS,
26414 IX86_BUILTIN_VFMADDSUBPD,
26415 IX86_BUILTIN_VFMADDSUBPS256,
26416 IX86_BUILTIN_VFMADDSUBPD256,
26418 /* FMA3 instructions. */
26419 IX86_BUILTIN_VFMADDSS3,
26420 IX86_BUILTIN_VFMADDSD3,
26422 /* XOP instructions. */
26423 IX86_BUILTIN_VPCMOV,
26424 IX86_BUILTIN_VPCMOV_V2DI,
26425 IX86_BUILTIN_VPCMOV_V4SI,
26426 IX86_BUILTIN_VPCMOV_V8HI,
26427 IX86_BUILTIN_VPCMOV_V16QI,
26428 IX86_BUILTIN_VPCMOV_V4SF,
26429 IX86_BUILTIN_VPCMOV_V2DF,
26430 IX86_BUILTIN_VPCMOV256,
26431 IX86_BUILTIN_VPCMOV_V4DI256,
26432 IX86_BUILTIN_VPCMOV_V8SI256,
26433 IX86_BUILTIN_VPCMOV_V16HI256,
26434 IX86_BUILTIN_VPCMOV_V32QI256,
26435 IX86_BUILTIN_VPCMOV_V8SF256,
26436 IX86_BUILTIN_VPCMOV_V4DF256,
26438 IX86_BUILTIN_VPPERM,
26440 IX86_BUILTIN_VPMACSSWW,
26441 IX86_BUILTIN_VPMACSWW,
26442 IX86_BUILTIN_VPMACSSWD,
26443 IX86_BUILTIN_VPMACSWD,
26444 IX86_BUILTIN_VPMACSSDD,
26445 IX86_BUILTIN_VPMACSDD,
26446 IX86_BUILTIN_VPMACSSDQL,
26447 IX86_BUILTIN_VPMACSSDQH,
26448 IX86_BUILTIN_VPMACSDQL,
26449 IX86_BUILTIN_VPMACSDQH,
26450 IX86_BUILTIN_VPMADCSSWD,
26451 IX86_BUILTIN_VPMADCSWD,
26453 IX86_BUILTIN_VPHADDBW,
26454 IX86_BUILTIN_VPHADDBD,
26455 IX86_BUILTIN_VPHADDBQ,
26456 IX86_BUILTIN_VPHADDWD,
26457 IX86_BUILTIN_VPHADDWQ,
26458 IX86_BUILTIN_VPHADDDQ,
26459 IX86_BUILTIN_VPHADDUBW,
26460 IX86_BUILTIN_VPHADDUBD,
26461 IX86_BUILTIN_VPHADDUBQ,
26462 IX86_BUILTIN_VPHADDUWD,
26463 IX86_BUILTIN_VPHADDUWQ,
26464 IX86_BUILTIN_VPHADDUDQ,
26465 IX86_BUILTIN_VPHSUBBW,
26466 IX86_BUILTIN_VPHSUBWD,
26467 IX86_BUILTIN_VPHSUBDQ,
26469 IX86_BUILTIN_VPROTB,
26470 IX86_BUILTIN_VPROTW,
26471 IX86_BUILTIN_VPROTD,
26472 IX86_BUILTIN_VPROTQ,
26473 IX86_BUILTIN_VPROTB_IMM,
26474 IX86_BUILTIN_VPROTW_IMM,
26475 IX86_BUILTIN_VPROTD_IMM,
26476 IX86_BUILTIN_VPROTQ_IMM,
26478 IX86_BUILTIN_VPSHLB,
26479 IX86_BUILTIN_VPSHLW,
26480 IX86_BUILTIN_VPSHLD,
26481 IX86_BUILTIN_VPSHLQ,
26482 IX86_BUILTIN_VPSHAB,
26483 IX86_BUILTIN_VPSHAW,
26484 IX86_BUILTIN_VPSHAD,
26485 IX86_BUILTIN_VPSHAQ,
26487 IX86_BUILTIN_VFRCZSS,
26488 IX86_BUILTIN_VFRCZSD,
26489 IX86_BUILTIN_VFRCZPS,
26490 IX86_BUILTIN_VFRCZPD,
26491 IX86_BUILTIN_VFRCZPS256,
26492 IX86_BUILTIN_VFRCZPD256,
26494 IX86_BUILTIN_VPCOMEQUB,
26495 IX86_BUILTIN_VPCOMNEUB,
26496 IX86_BUILTIN_VPCOMLTUB,
26497 IX86_BUILTIN_VPCOMLEUB,
26498 IX86_BUILTIN_VPCOMGTUB,
26499 IX86_BUILTIN_VPCOMGEUB,
26500 IX86_BUILTIN_VPCOMFALSEUB,
26501 IX86_BUILTIN_VPCOMTRUEUB,
26503 IX86_BUILTIN_VPCOMEQUW,
26504 IX86_BUILTIN_VPCOMNEUW,
26505 IX86_BUILTIN_VPCOMLTUW,
26506 IX86_BUILTIN_VPCOMLEUW,
26507 IX86_BUILTIN_VPCOMGTUW,
26508 IX86_BUILTIN_VPCOMGEUW,
26509 IX86_BUILTIN_VPCOMFALSEUW,
26510 IX86_BUILTIN_VPCOMTRUEUW,
26512 IX86_BUILTIN_VPCOMEQUD,
26513 IX86_BUILTIN_VPCOMNEUD,
26514 IX86_BUILTIN_VPCOMLTUD,
26515 IX86_BUILTIN_VPCOMLEUD,
26516 IX86_BUILTIN_VPCOMGTUD,
26517 IX86_BUILTIN_VPCOMGEUD,
26518 IX86_BUILTIN_VPCOMFALSEUD,
26519 IX86_BUILTIN_VPCOMTRUEUD,
26521 IX86_BUILTIN_VPCOMEQUQ,
26522 IX86_BUILTIN_VPCOMNEUQ,
26523 IX86_BUILTIN_VPCOMLTUQ,
26524 IX86_BUILTIN_VPCOMLEUQ,
26525 IX86_BUILTIN_VPCOMGTUQ,
26526 IX86_BUILTIN_VPCOMGEUQ,
26527 IX86_BUILTIN_VPCOMFALSEUQ,
26528 IX86_BUILTIN_VPCOMTRUEUQ,
26530 IX86_BUILTIN_VPCOMEQB,
26531 IX86_BUILTIN_VPCOMNEB,
26532 IX86_BUILTIN_VPCOMLTB,
26533 IX86_BUILTIN_VPCOMLEB,
26534 IX86_BUILTIN_VPCOMGTB,
26535 IX86_BUILTIN_VPCOMGEB,
26536 IX86_BUILTIN_VPCOMFALSEB,
26537 IX86_BUILTIN_VPCOMTRUEB,
26539 IX86_BUILTIN_VPCOMEQW,
26540 IX86_BUILTIN_VPCOMNEW,
26541 IX86_BUILTIN_VPCOMLTW,
26542 IX86_BUILTIN_VPCOMLEW,
26543 IX86_BUILTIN_VPCOMGTW,
26544 IX86_BUILTIN_VPCOMGEW,
26545 IX86_BUILTIN_VPCOMFALSEW,
26546 IX86_BUILTIN_VPCOMTRUEW,
26548 IX86_BUILTIN_VPCOMEQD,
26549 IX86_BUILTIN_VPCOMNED,
26550 IX86_BUILTIN_VPCOMLTD,
26551 IX86_BUILTIN_VPCOMLED,
26552 IX86_BUILTIN_VPCOMGTD,
26553 IX86_BUILTIN_VPCOMGED,
26554 IX86_BUILTIN_VPCOMFALSED,
26555 IX86_BUILTIN_VPCOMTRUED,
26557 IX86_BUILTIN_VPCOMEQQ,
26558 IX86_BUILTIN_VPCOMNEQ,
26559 IX86_BUILTIN_VPCOMLTQ,
26560 IX86_BUILTIN_VPCOMLEQ,
26561 IX86_BUILTIN_VPCOMGTQ,
26562 IX86_BUILTIN_VPCOMGEQ,
26563 IX86_BUILTIN_VPCOMFALSEQ,
26564 IX86_BUILTIN_VPCOMTRUEQ,
26566 /* LWP instructions. */
26567 IX86_BUILTIN_LLWPCB,
26568 IX86_BUILTIN_SLWPCB,
26569 IX86_BUILTIN_LWPVAL32,
26570 IX86_BUILTIN_LWPVAL64,
26571 IX86_BUILTIN_LWPINS32,
26572 IX86_BUILTIN_LWPINS64,
26577 IX86_BUILTIN_XBEGIN,
26579 IX86_BUILTIN_XABORT,
26580 IX86_BUILTIN_XTEST,
26582 /* BMI instructions. */
26583 IX86_BUILTIN_BEXTR32,
26584 IX86_BUILTIN_BEXTR64,
26587 /* TBM instructions. */
26588 IX86_BUILTIN_BEXTRI32,
26589 IX86_BUILTIN_BEXTRI64,
26591 /* BMI2 instructions. */
26592 IX86_BUILTIN_BZHI32,
26593 IX86_BUILTIN_BZHI64,
26594 IX86_BUILTIN_PDEP32,
26595 IX86_BUILTIN_PDEP64,
26596 IX86_BUILTIN_PEXT32,
26597 IX86_BUILTIN_PEXT64,
26599 /* ADX instructions. */
26600 IX86_BUILTIN_ADDCARRYX32,
26601 IX86_BUILTIN_ADDCARRYX64,
26603 /* FSGSBASE instructions. */
26604 IX86_BUILTIN_RDFSBASE32,
26605 IX86_BUILTIN_RDFSBASE64,
26606 IX86_BUILTIN_RDGSBASE32,
26607 IX86_BUILTIN_RDGSBASE64,
26608 IX86_BUILTIN_WRFSBASE32,
26609 IX86_BUILTIN_WRFSBASE64,
26610 IX86_BUILTIN_WRGSBASE32,
26611 IX86_BUILTIN_WRGSBASE64,
26613 /* RDRND instructions. */
26614 IX86_BUILTIN_RDRAND16_STEP,
26615 IX86_BUILTIN_RDRAND32_STEP,
26616 IX86_BUILTIN_RDRAND64_STEP,
26618 /* RDSEED instructions. */
26619 IX86_BUILTIN_RDSEED16_STEP,
26620 IX86_BUILTIN_RDSEED32_STEP,
26621 IX86_BUILTIN_RDSEED64_STEP,
26623 /* F16C instructions. */
26624 IX86_BUILTIN_CVTPH2PS,
26625 IX86_BUILTIN_CVTPH2PS256,
26626 IX86_BUILTIN_CVTPS2PH,
26627 IX86_BUILTIN_CVTPS2PH256,
26629 /* CFString built-in for darwin */
26630 IX86_BUILTIN_CFSTRING,
26632 /* Builtins to get CPU type and supported features. */
26633 IX86_BUILTIN_CPU_INIT,
26634 IX86_BUILTIN_CPU_IS,
26635 IX86_BUILTIN_CPU_SUPPORTS,
26640 /* Table for the ix86 builtin decls. */
26641 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
26643 /* Table of all of the builtin functions that are possible with different ISA's
26644 but are waiting to be built until a function is declared to use that
26646 struct builtin_isa {
26647 const char *name; /* function name */
26648 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
26649 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
26650 bool const_p; /* true if the declaration is constant */
26651 bool set_and_not_built_p;
26654 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
26657 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
26658 of which isa_flags to use in the ix86_builtins_isa array. Stores the
26659 function decl in the ix86_builtins array. Returns the function decl or
26660 NULL_TREE, if the builtin was not added.
26662 If the front end has a special hook for builtin functions, delay adding
26663 builtin functions that aren't in the current ISA until the ISA is changed
26664 with function specific optimization. Doing so, can save about 300K for the
26665 default compiler. When the builtin is expanded, check at that time whether
26668 If the front end doesn't have a special hook, record all builtins, even if
26669 it isn't an instruction set in the current ISA in case the user uses
26670 function specific options for a different ISA, so that we don't get scope
26671 errors if a builtin is added in the middle of a function scope. */
26674 def_builtin (HOST_WIDE_INT mask, const char *name,
26675 enum ix86_builtin_func_type tcode,
26676 enum ix86_builtins code)
26678 tree decl = NULL_TREE;
26680 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
26682 ix86_builtins_isa[(int) code].isa = mask;
26684 mask &= ~OPTION_MASK_ISA_64BIT;
26686 || (mask & ix86_isa_flags) != 0
26687 || (lang_hooks.builtin_function
26688 == lang_hooks.builtin_function_ext_scope))
26691 tree type = ix86_get_builtin_func_type (tcode);
26692 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
26694 ix86_builtins[(int) code] = decl;
26695 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
26699 ix86_builtins[(int) code] = NULL_TREE;
26700 ix86_builtins_isa[(int) code].tcode = tcode;
26701 ix86_builtins_isa[(int) code].name = name;
26702 ix86_builtins_isa[(int) code].const_p = false;
26703 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
26710 /* Like def_builtin, but also marks the function decl "const". */
26713 def_builtin_const (HOST_WIDE_INT mask, const char *name,
26714 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
26716 tree decl = def_builtin (mask, name, tcode, code);
26718 TREE_READONLY (decl) = 1;
26720 ix86_builtins_isa[(int) code].const_p = true;
26725 /* Add any new builtin functions for a given ISA that may not have been
26726 declared. This saves a bit of space compared to adding all of the
26727 declarations to the tree, even if we didn't use them. */
26730 ix86_add_new_builtins (HOST_WIDE_INT isa)
26734 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
26736 if ((ix86_builtins_isa[i].isa & isa) != 0
26737 && ix86_builtins_isa[i].set_and_not_built_p)
26741 /* Don't define the builtin again. */
26742 ix86_builtins_isa[i].set_and_not_built_p = false;
26744 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
26745 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
26746 type, i, BUILT_IN_MD, NULL,
26749 ix86_builtins[i] = decl;
26750 if (ix86_builtins_isa[i].const_p)
26751 TREE_READONLY (decl) = 1;
26756 /* Bits for builtin_description.flag. */
26758 /* Set when we don't support the comparison natively, and should
26759 swap_comparison in order to support it. */
26760 #define BUILTIN_DESC_SWAP_OPERANDS 1
26762 struct builtin_description
26764 const HOST_WIDE_INT mask;
26765 const enum insn_code icode;
26766 const char *const name;
26767 const enum ix86_builtins code;
26768 const enum rtx_code comparison;
26772 static const struct builtin_description bdesc_comi[] =
26774 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
26775 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
26776 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
26777 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
26778 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
26779 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
26780 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
26781 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
26782 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
26783 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
26784 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
26785 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
26786 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
26787 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
26788 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
26789 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
26790 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
26791 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
26792 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
26793 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
26794 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
26795 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
26796 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
26797 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
26800 static const struct builtin_description bdesc_pcmpestr[] =
26803 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
26804 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
26805 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
26806 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
26807 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
26808 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
26809 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
26812 static const struct builtin_description bdesc_pcmpistr[] =
26815 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
26816 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
26817 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
26818 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
26819 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
26820 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
26821 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
26824 /* Special builtins with variable number of arguments. */
26825 static const struct builtin_description bdesc_special_args[] =
26827 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
26828 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
26829 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
26832 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
26835 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
26837 /* FXSR, XSAVE and XSAVEOPT */
26838 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
26839 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
26840 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
26841 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
26842 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
26844 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
26845 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
26846 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
26847 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
26848 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
26851 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
26852 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
26853 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
26855 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
26856 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
26857 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
26858 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
26860 /* SSE or 3DNow!A */
26861 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
26862 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
26865 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
26866 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
26867 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
26868 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
26869 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
26870 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
26871 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
26872 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
26873 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
26874 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
26876 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
26877 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
26880 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
26883 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
26886 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
26887 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
26890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
26891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
26893 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
26894 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
26895 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
26896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
26897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
26899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
26900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
26901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
26902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
26903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
26904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
26905 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
26907 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
26908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
26909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
26911 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
26912 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
26913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
26914 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
26915 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
26916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
26917 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
26918 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
26921 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
26922 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
26923 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
26924 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
26925 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
26926 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
26927 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
26928 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
26929 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
26931 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
26932 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
26933 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
26934 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
26935 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
26936 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
26939 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
26940 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
26941 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
26942 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
26943 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
26944 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
26945 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
26946 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
26949 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
26950 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
26951 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
26954 /* Builtins with variable number of arguments. */
26955 static const struct builtin_description bdesc_args[] =
26957 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
26958 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
26959 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
26960 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
26961 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
26962 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
26963 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
26966 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26967 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26968 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26969 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26970 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26971 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26973 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26974 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26975 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26976 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26977 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26978 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26979 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26980 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26982 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26983 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26985 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26986 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26987 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26988 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26990 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26991 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26992 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26993 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26994 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26995 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26997 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26998 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26999 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27000 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27001 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
27002 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
27004 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
27005 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
27006 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
27008 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
27010 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
27011 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
27012 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
27013 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
27014 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
27015 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
27017 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
27018 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
27019 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
27020 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
27021 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
27022 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
27024 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
27025 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
27026 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
27027 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
27030 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
27031 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
27032 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
27033 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
27035 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27036 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27037 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27038 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
27039 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
27040 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
27041 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27042 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27043 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27044 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27045 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27046 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27047 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27048 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27049 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27052 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
27053 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
27054 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
27055 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
27056 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27057 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27060 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
27061 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27062 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27064 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27065 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27066 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
27067 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
27068 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
27069 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
27070 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
27071 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
27073 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
27075 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27076 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27077 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27078 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27079 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27080 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27081 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27082 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27084 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
27085 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
27086 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
27087 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
27088 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
27089 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
27090 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
27091 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
27092 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
27093 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
27094 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
27095 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
27096 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
27097 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
27098 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
27099 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
27100 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
27101 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
27102 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
27103 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
27104 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
27105 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
27107 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27108 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27109 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27110 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27112 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27113 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27114 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27115 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27117 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27119 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27120 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27121 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27122 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27123 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27125 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
27126 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
27127 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
27129 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
27131 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
27132 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
27133 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
27135 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
27136 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
27138 /* SSE MMX or 3Dnow!A */
27139 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27140 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27141 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27143 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27144 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27145 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27146 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27148 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
27149 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
27151 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
27154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
27156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
27157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
27158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
27159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
27160 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
27162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
27163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
27164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
27165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
27166 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
27168 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
27170 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
27171 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
27172 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
27173 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
27175 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
27176 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
27177 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
27179 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27180 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27181 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27182 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27183 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27184 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27185 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27186 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27188 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
27189 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
27190 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
27191 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
27192 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
27193 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
27194 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
27195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
27196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
27197 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
27198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
27199 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
27200 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
27201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
27202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
27203 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
27204 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
27205 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
27206 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
27207 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
27209 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27210 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27211 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27212 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27214 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27215 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27216 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27217 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27219 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27221 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27222 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27223 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27225 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
27227 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27228 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27229 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27230 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27231 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27232 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27233 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27234 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27236 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27237 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27238 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27239 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27241 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27242 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27243 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27245 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27246 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
27248 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27249 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27250 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27251 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27253 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27254 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27256 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27257 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27258 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27259 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27260 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27261 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27263 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27264 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27265 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27266 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27268 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27269 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27270 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27271 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27272 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27273 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27274 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27275 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27277 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
27278 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
27279 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
27281 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27282 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
27284 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
27285 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
27287 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
27289 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
27290 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
27291 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
27292 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
27294 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
27295 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
27296 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
27297 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
27298 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
27299 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
27300 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
27302 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
27303 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
27304 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
27305 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
27306 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
27307 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
27308 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
27310 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
27311 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
27312 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
27313 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
27315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
27316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
27317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
27319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
27321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
27324 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
27325 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
27328 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
27329 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27331 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27332 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27333 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27334 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27335 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27336 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27339 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
27340 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
27341 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
27342 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
27343 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
27344 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
27346 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27347 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27348 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27349 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27350 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27351 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27352 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27353 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27354 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27355 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27356 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27357 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27358 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
27359 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
27360 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27361 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27362 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27363 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27364 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27365 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27366 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27367 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27368 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27369 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27372 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
27373 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
27376 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
27377 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
27378 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
27379 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
27380 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
27381 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
27382 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
27383 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
27384 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
27385 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
27387 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
27388 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
27389 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
27390 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
27391 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
27392 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
27393 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
27394 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
27395 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
27396 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
27397 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
27398 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
27399 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
27401 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
27402 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27403 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27404 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27405 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27406 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27407 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27408 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27409 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27410 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27411 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
27412 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27415 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
27416 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
27417 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
27418 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
27420 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
27421 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
27422 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
27423 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
27425 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
27426 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
27428 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
27429 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
27431 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
27432 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
27433 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
27434 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
27436 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
27437 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
27439 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27440 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
27442 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
27443 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
27444 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
27447 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27448 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
27449 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
27450 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
27451 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
27454 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
27455 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
27456 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
27457 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27460 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
27461 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
27463 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27464 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27465 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27466 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27469 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
27472 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27473 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27474 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27475 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27476 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27477 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27478 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27479 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27480 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27481 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27482 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27483 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27484 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27485 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27486 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27487 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27488 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27489 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27490 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27491 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27492 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27493 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27494 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27495 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27496 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27497 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27499 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
27500 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
27501 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
27502 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
27504 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
27505 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
27506 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
27507 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
27508 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
27509 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
27510 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
27511 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
27512 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
27513 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
27514 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
27515 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
27516 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
27517 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
27518 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
27519 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
27520 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
27521 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
27522 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
27523 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
27524 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
27525 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
27526 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
27527 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
27528 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
27529 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
27530 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
27531 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
27532 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
27533 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
27534 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
27535 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
27536 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
27537 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
27539 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27540 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27541 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
27543 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
27544 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27545 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27546 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27547 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27549 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27551 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
27552 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
27554 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
27555 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
27556 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
27557 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
27559 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
27560 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
27562 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
27563 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
27565 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
27566 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
27567 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
27568 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
27570 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
27571 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
27573 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27574 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
27576 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27577 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27578 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27579 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27581 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
27582 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
27583 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
27584 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
27585 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
27586 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
27588 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
27589 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
27590 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
27591 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
27592 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
27593 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
27594 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
27595 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
27596 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
27597 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
27598 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
27599 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
27600 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
27601 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
27602 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
27604 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
27605 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
27607 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27608 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27610 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
27613 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
27614 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
27615 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
27616 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
27617 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
27618 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
27619 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
27620 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
27621 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27622 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27623 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27624 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27625 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27626 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27627 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27628 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27629 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
27630 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27631 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27632 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27633 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27634 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
27635 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
27636 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27637 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27638 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27639 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27640 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27641 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27642 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27643 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27644 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27645 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27646 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27647 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27648 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27649 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27650 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
27651 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
27652 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27653 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27654 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27655 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27656 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27657 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27658 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27659 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27660 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27661 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27662 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27663 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27664 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
27665 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
27666 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
27667 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
27668 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
27669 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
27670 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
27671 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
27672 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
27673 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
27674 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
27675 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
27676 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
27677 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
27678 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27679 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27680 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27681 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27682 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27683 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
27684 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27685 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
27686 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27687 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
27688 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
27689 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
27690 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27691 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27692 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27693 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
27694 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
27695 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
27696 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
27697 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
27698 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
27699 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
27700 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
27701 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
27702 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
27703 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
27704 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
27705 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
27706 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
27707 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
27708 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
27709 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
27710 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
27711 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27712 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27713 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27714 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27715 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27716 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27717 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27718 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27719 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27720 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27721 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27722 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27723 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27724 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27725 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27726 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27727 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27728 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27729 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
27730 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
27731 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
27732 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
27733 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
27734 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
27735 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
27736 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
27737 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
27738 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
27739 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
27740 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
27741 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
27742 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27743 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
27744 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
27745 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
27746 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
27747 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
27748 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
27749 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27750 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27751 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27752 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27753 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27754 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27755 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27756 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27757 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27758 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27760 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
27763 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
27764 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
27765 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
27768 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
27769 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
27772 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
27773 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
27774 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
27775 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
27778 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
27779 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
27780 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
27781 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
27782 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
27783 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
27786 /* FMA4 and XOP. */
27787 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
27788 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
27789 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
27790 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
27791 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
27792 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
27793 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
27794 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
27795 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
27796 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
27797 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
27798 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
27799 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
27800 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
27801 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
27802 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
27803 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
27804 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
27805 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
27806 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
27807 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
27808 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
27809 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
27810 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
27811 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
27812 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
27813 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
27814 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
27815 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
27816 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
27817 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
27818 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
27819 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
27820 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
27821 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
27822 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
27823 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
27824 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
27825 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
27826 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
27827 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
27828 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
27829 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
27830 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
27831 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
27832 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
27833 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
27834 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
27835 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
27836 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
27837 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
27838 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
27840 static const struct builtin_description bdesc_multi_arg[] =
27842 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
27843 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
27844 UNKNOWN, (int)MULTI_ARG_3_SF },
27845 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
27846 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
27847 UNKNOWN, (int)MULTI_ARG_3_DF },
27849 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
27850 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
27851 UNKNOWN, (int)MULTI_ARG_3_SF },
27852 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
27853 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
27854 UNKNOWN, (int)MULTI_ARG_3_DF },
27856 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
27857 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
27858 UNKNOWN, (int)MULTI_ARG_3_SF },
27859 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
27860 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
27861 UNKNOWN, (int)MULTI_ARG_3_DF },
27862 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
27863 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
27864 UNKNOWN, (int)MULTI_ARG_3_SF2 },
27865 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
27866 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
27867 UNKNOWN, (int)MULTI_ARG_3_DF2 },
27869 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
27870 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
27871 UNKNOWN, (int)MULTI_ARG_3_SF },
27872 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
27873 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
27874 UNKNOWN, (int)MULTI_ARG_3_DF },
27875 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
27876 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
27877 UNKNOWN, (int)MULTI_ARG_3_SF2 },
27878 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
27879 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
27880 UNKNOWN, (int)MULTI_ARG_3_DF2 },
27882 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
27883 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
27884 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
27885 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
27886 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
27887 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
27888 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
27890 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
27891 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
27892 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
27893 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
27894 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
27895 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
27896 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
27898 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
27900 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
27901 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
27902 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27903 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27904 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
27905 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
27906 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27907 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27908 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27909 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27910 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27911 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27913 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
27914 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
27915 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
27916 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
27917 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
27918 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
27919 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
27920 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
27921 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
27922 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
27923 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
27924 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
27925 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
27926 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
27927 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
27928 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
27930 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
27931 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
27932 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
27933 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
27934 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
27935 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
27937 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
27938 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
27939 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
27940 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
27941 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
27942 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
27943 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
27944 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
27945 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
27946 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
27947 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
27948 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
27949 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
27950 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
27951 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
27953 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
27954 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
27955 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
27956 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
27957 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
27958 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
27959 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
27961 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
27962 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
27963 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
27964 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
27965 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
27966 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
27967 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
27969 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
27970 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
27971 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
27972 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
27973 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
27974 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
27975 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
27977 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
27978 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
27979 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
27980 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
27981 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
27982 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
27983 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
27985 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
27986 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
27987 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
27988 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
27989 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
27990 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
27991 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
27993 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
27994 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
27995 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
27996 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
27997 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
27998 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
27999 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
28001 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
28002 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
28003 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
28004 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
28005 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
28006 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
28007 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
28009 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
28010 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
28011 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
28012 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
28013 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
28014 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
28015 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
28017 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
28018 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
28019 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
28020 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
28021 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
28022 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
28023 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
28024 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
28026 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
28027 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
28028 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
28029 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
28030 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
28031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
28032 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
28033 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
28035 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
28036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
28037 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
28038 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
28042 /* TM vector builtins. */
28044 /* Reuse the existing x86-specific `struct builtin_description' cause
28045 we're lazy. Add casts to make them fit. */
28046 static const struct builtin_description bdesc_tm[] =
28048 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
28049 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
28050 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
28051 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
28052 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
28053 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
28054 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
28056 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
28057 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
28058 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
28059 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
28060 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
28061 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
28062 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
28064 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
28065 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
28066 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
28067 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
28068 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
28069 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
28070 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
28072 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
28073 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
28074 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
28077 /* TM callbacks. */
28079 /* Return the builtin decl needed to load a vector of TYPE. */
28082 ix86_builtin_tm_load (tree type)
28084 if (TREE_CODE (type) == VECTOR_TYPE)
28086 switch (tree_low_cst (TYPE_SIZE (type), 1))
28089 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
28091 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
28093 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
28099 /* Return the builtin decl needed to store a vector of TYPE. */
28102 ix86_builtin_tm_store (tree type)
28104 if (TREE_CODE (type) == VECTOR_TYPE)
28106 switch (tree_low_cst (TYPE_SIZE (type), 1))
28109 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
28111 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
28113 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
28119 /* Initialize the transactional memory vector load/store builtins. */
28122 ix86_init_tm_builtins (void)
28124 enum ix86_builtin_func_type ftype;
28125 const struct builtin_description *d;
28128 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
28129 tree attrs_log, attrs_type_log;
28134 /* If there are no builtins defined, we must be compiling in a
28135 language without trans-mem support. */
28136 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
28139 /* Use whatever attributes a normal TM load has. */
28140 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
28141 attrs_load = DECL_ATTRIBUTES (decl);
28142 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
28143 /* Use whatever attributes a normal TM store has. */
28144 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
28145 attrs_store = DECL_ATTRIBUTES (decl);
28146 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
28147 /* Use whatever attributes a normal TM log has. */
28148 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
28149 attrs_log = DECL_ATTRIBUTES (decl);
28150 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
28152 for (i = 0, d = bdesc_tm;
28153 i < ARRAY_SIZE (bdesc_tm);
28156 if ((d->mask & ix86_isa_flags) != 0
28157 || (lang_hooks.builtin_function
28158 == lang_hooks.builtin_function_ext_scope))
28160 tree type, attrs, attrs_type;
28161 enum built_in_function code = (enum built_in_function) d->code;
28163 ftype = (enum ix86_builtin_func_type) d->flag;
28164 type = ix86_get_builtin_func_type (ftype);
28166 if (BUILTIN_TM_LOAD_P (code))
28168 attrs = attrs_load;
28169 attrs_type = attrs_type_load;
28171 else if (BUILTIN_TM_STORE_P (code))
28173 attrs = attrs_store;
28174 attrs_type = attrs_type_store;
28179 attrs_type = attrs_type_log;
28181 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
28182 /* The builtin without the prefix for
28183 calling it directly. */
28184 d->name + strlen ("__builtin_"),
28186 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
28187 set the TYPE_ATTRIBUTES. */
28188 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
28190 set_builtin_decl (code, decl, false);
28195 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
28196 in the current target ISA to allow the user to compile particular modules
28197 with different target specific options that differ from the command line
28200 ix86_init_mmx_sse_builtins (void)
28202 const struct builtin_description * d;
28203 enum ix86_builtin_func_type ftype;
28206 /* Add all special builtins with variable number of operands. */
28207 for (i = 0, d = bdesc_special_args;
28208 i < ARRAY_SIZE (bdesc_special_args);
28214 ftype = (enum ix86_builtin_func_type) d->flag;
28215 def_builtin (d->mask, d->name, ftype, d->code);
28218 /* Add all builtins with variable number of operands. */
28219 for (i = 0, d = bdesc_args;
28220 i < ARRAY_SIZE (bdesc_args);
28226 ftype = (enum ix86_builtin_func_type) d->flag;
28227 def_builtin_const (d->mask, d->name, ftype, d->code);
28230 /* pcmpestr[im] insns. */
28231 for (i = 0, d = bdesc_pcmpestr;
28232 i < ARRAY_SIZE (bdesc_pcmpestr);
28235 if (d->code == IX86_BUILTIN_PCMPESTRM128)
28236 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
28238 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
28239 def_builtin_const (d->mask, d->name, ftype, d->code);
28242 /* pcmpistr[im] insns. */
28243 for (i = 0, d = bdesc_pcmpistr;
28244 i < ARRAY_SIZE (bdesc_pcmpistr);
28247 if (d->code == IX86_BUILTIN_PCMPISTRM128)
28248 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
28250 ftype = INT_FTYPE_V16QI_V16QI_INT;
28251 def_builtin_const (d->mask, d->name, ftype, d->code);
28254 /* comi/ucomi insns. */
28255 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
28257 if (d->mask == OPTION_MASK_ISA_SSE2)
28258 ftype = INT_FTYPE_V2DF_V2DF;
28260 ftype = INT_FTYPE_V4SF_V4SF;
28261 def_builtin_const (d->mask, d->name, ftype, d->code);
28265 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
28266 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
28267 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
28268 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
28270 /* SSE or 3DNow!A */
28271 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
28272 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
28273 IX86_BUILTIN_MASKMOVQ);
28276 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
28277 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
28279 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
28280 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
28281 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
28282 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
28285 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
28286 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
28287 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
28288 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
28291 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
28292 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
28293 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
28294 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
28295 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
28296 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
28297 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
28298 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
28299 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
28300 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
28301 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
28302 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
28305 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
28306 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
28309 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
28310 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
28311 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
28312 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
28313 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
28314 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
28315 IX86_BUILTIN_RDRAND64_STEP);
28318 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
28319 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
28320 IX86_BUILTIN_GATHERSIV2DF);
28322 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
28323 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
28324 IX86_BUILTIN_GATHERSIV4DF);
28326 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
28327 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
28328 IX86_BUILTIN_GATHERDIV2DF);
28330 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
28331 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
28332 IX86_BUILTIN_GATHERDIV4DF);
28334 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
28335 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
28336 IX86_BUILTIN_GATHERSIV4SF);
28338 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
28339 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
28340 IX86_BUILTIN_GATHERSIV8SF);
28342 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
28343 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
28344 IX86_BUILTIN_GATHERDIV4SF);
28346 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
28347 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
28348 IX86_BUILTIN_GATHERDIV8SF);
28350 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
28351 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
28352 IX86_BUILTIN_GATHERSIV2DI);
28354 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
28355 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
28356 IX86_BUILTIN_GATHERSIV4DI);
28358 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
28359 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
28360 IX86_BUILTIN_GATHERDIV2DI);
28362 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
28363 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
28364 IX86_BUILTIN_GATHERDIV4DI);
28366 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
28367 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
28368 IX86_BUILTIN_GATHERSIV4SI);
28370 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
28371 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
28372 IX86_BUILTIN_GATHERSIV8SI);
28374 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
28375 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
28376 IX86_BUILTIN_GATHERDIV4SI);
28378 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
28379 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
28380 IX86_BUILTIN_GATHERDIV8SI);
28382 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
28383 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
28384 IX86_BUILTIN_GATHERALTSIV4DF);
28386 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
28387 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
28388 IX86_BUILTIN_GATHERALTDIV8SF);
28390 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
28391 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
28392 IX86_BUILTIN_GATHERALTSIV4DI);
28394 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
28395 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
28396 IX86_BUILTIN_GATHERALTDIV8SI);
28399 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
28400 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
28402 /* MMX access to the vec_init patterns. */
28403 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
28404 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
28406 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
28407 V4HI_FTYPE_HI_HI_HI_HI,
28408 IX86_BUILTIN_VEC_INIT_V4HI);
28410 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
28411 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
28412 IX86_BUILTIN_VEC_INIT_V8QI);
28414 /* Access to the vec_extract patterns. */
28415 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
28416 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
28417 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
28418 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
28419 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
28420 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
28421 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
28422 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
28423 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
28424 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
28426 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
28427 "__builtin_ia32_vec_ext_v4hi",
28428 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
28430 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
28431 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
28433 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
28434 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
28436 /* Access to the vec_set patterns. */
28437 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
28438 "__builtin_ia32_vec_set_v2di",
28439 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
28441 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
28442 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
28444 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
28445 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
28447 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
28448 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
28450 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
28451 "__builtin_ia32_vec_set_v4hi",
28452 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
28454 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
28455 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
28458 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
28459 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
28460 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
28461 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
28462 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
28463 "__builtin_ia32_rdseed_di_step",
28464 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
28467 def_builtin (0, "__builtin_ia32_addcarryx_u32",
28468 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
28469 def_builtin (OPTION_MASK_ISA_64BIT,
28470 "__builtin_ia32_addcarryx_u64",
28471 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
28472 IX86_BUILTIN_ADDCARRYX64);
28474 /* Add FMA4 multi-arg argument instructions */
28475 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
28480 ftype = (enum ix86_builtin_func_type) d->flag;
28481 def_builtin_const (d->mask, d->name, ftype, d->code);
28485 /* This builds the processor_model struct type defined in
28486 libgcc/config/i386/cpuinfo.c */
28489 build_processor_model_struct (void)
28491 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
28493 tree field = NULL_TREE, field_chain = NULL_TREE;
28495 tree type = make_node (RECORD_TYPE);
28497 /* The first 3 fields are unsigned int. */
28498 for (i = 0; i < 3; ++i)
28500 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
28501 get_identifier (field_name[i]), unsigned_type_node);
28502 if (field_chain != NULL_TREE)
28503 DECL_CHAIN (field) = field_chain;
28504 field_chain = field;
28507 /* The last field is an array of unsigned integers of size one. */
28508 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
28509 get_identifier (field_name[3]),
28510 build_array_type (unsigned_type_node,
28511 build_index_type (size_one_node)));
28512 if (field_chain != NULL_TREE)
28513 DECL_CHAIN (field) = field_chain;
28514 field_chain = field;
28516 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
28520 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
28523 make_var_decl (tree type, const char *name)
28527 new_decl = build_decl (UNKNOWN_LOCATION,
28529 get_identifier(name),
28532 DECL_EXTERNAL (new_decl) = 1;
28533 TREE_STATIC (new_decl) = 1;
28534 TREE_PUBLIC (new_decl) = 1;
28535 DECL_INITIAL (new_decl) = 0;
28536 DECL_ARTIFICIAL (new_decl) = 0;
28537 DECL_PRESERVE_P (new_decl) = 1;
28539 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
28540 assemble_variable (new_decl, 0, 0, 0);
28545 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
28546 into an integer defined in libgcc/config/i386/cpuinfo.c */
28549 fold_builtin_cpu (tree fndecl, tree *args)
28552 enum ix86_builtins fn_code = (enum ix86_builtins)
28553 DECL_FUNCTION_CODE (fndecl);
28554 tree param_string_cst = NULL;
28556 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
28557 enum processor_features
28573 /* These are the values for vendor types and cpu types and subtypes
28574 in cpuinfo.c. Cpu types and subtypes should be subtracted by
28575 the corresponding start value. */
28576 enum processor_model
28586 M_CPU_SUBTYPE_START,
28587 M_INTEL_COREI7_NEHALEM,
28588 M_INTEL_COREI7_WESTMERE,
28589 M_INTEL_COREI7_SANDYBRIDGE,
28590 M_AMDFAM10H_BARCELONA,
28591 M_AMDFAM10H_SHANGHAI,
28592 M_AMDFAM10H_ISTANBUL,
28593 M_AMDFAM15H_BDVER1,
28597 static struct _arch_names_table
28599 const char *const name;
28600 const enum processor_model model;
28602 const arch_names_table[] =
28605 {"intel", M_INTEL},
28606 {"atom", M_INTEL_ATOM},
28607 {"core2", M_INTEL_CORE2},
28608 {"corei7", M_INTEL_COREI7},
28609 {"nehalem", M_INTEL_COREI7_NEHALEM},
28610 {"westmere", M_INTEL_COREI7_WESTMERE},
28611 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
28612 {"amdfam10h", M_AMDFAM10H},
28613 {"barcelona", M_AMDFAM10H_BARCELONA},
28614 {"shanghai", M_AMDFAM10H_SHANGHAI},
28615 {"istanbul", M_AMDFAM10H_ISTANBUL},
28616 {"amdfam15h", M_AMDFAM15H},
28617 {"bdver1", M_AMDFAM15H_BDVER1},
28618 {"bdver2", M_AMDFAM15H_BDVER2},
28621 static struct _isa_names_table
28623 const char *const name;
28624 const enum processor_features feature;
28626 const isa_names_table[] =
28630 {"popcnt", F_POPCNT},
28634 {"ssse3", F_SSSE3},
28635 {"sse4.1", F_SSE4_1},
28636 {"sse4.2", F_SSE4_2},
28641 static tree __processor_model_type = NULL_TREE;
28642 static tree __cpu_model_var = NULL_TREE;
28644 if (__processor_model_type == NULL_TREE)
28645 __processor_model_type = build_processor_model_struct ();
28647 if (__cpu_model_var == NULL_TREE)
28648 __cpu_model_var = make_var_decl (__processor_model_type,
28651 gcc_assert ((args != NULL) && (*args != NULL));
28653 param_string_cst = *args;
28654 while (param_string_cst
28655 && TREE_CODE (param_string_cst) != STRING_CST)
28657 /* *args must be a expr that can contain other EXPRS leading to a
28659 if (!EXPR_P (param_string_cst))
28661 error ("Parameter to builtin must be a string constant or literal");
28662 return integer_zero_node;
28664 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
28667 gcc_assert (param_string_cst);
28669 if (fn_code == IX86_BUILTIN_CPU_IS)
28673 unsigned int field_val = 0;
28674 unsigned int NUM_ARCH_NAMES
28675 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
28677 for (i = 0; i < NUM_ARCH_NAMES; i++)
28678 if (strcmp (arch_names_table[i].name,
28679 TREE_STRING_POINTER (param_string_cst)) == 0)
28682 if (i == NUM_ARCH_NAMES)
28684 error ("Parameter to builtin not valid: %s",
28685 TREE_STRING_POINTER (param_string_cst));
28686 return integer_zero_node;
28689 field = TYPE_FIELDS (__processor_model_type);
28690 field_val = arch_names_table[i].model;
28692 /* CPU types are stored in the next field. */
28693 if (field_val > M_CPU_TYPE_START
28694 && field_val < M_CPU_SUBTYPE_START)
28696 field = DECL_CHAIN (field);
28697 field_val -= M_CPU_TYPE_START;
28700 /* CPU subtypes are stored in the next field. */
28701 if (field_val > M_CPU_SUBTYPE_START)
28703 field = DECL_CHAIN ( DECL_CHAIN (field));
28704 field_val -= M_CPU_SUBTYPE_START;
28707 /* Get the appropriate field in __cpu_model. */
28708 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
28711 /* Check the value. */
28712 return build2 (EQ_EXPR, unsigned_type_node, ref,
28713 build_int_cstu (unsigned_type_node, field_val));
28715 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
28720 unsigned int field_val = 0;
28721 unsigned int NUM_ISA_NAMES
28722 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
28724 for (i = 0; i < NUM_ISA_NAMES; i++)
28725 if (strcmp (isa_names_table[i].name,
28726 TREE_STRING_POINTER (param_string_cst)) == 0)
28729 if (i == NUM_ISA_NAMES)
28731 error ("Parameter to builtin not valid: %s",
28732 TREE_STRING_POINTER (param_string_cst));
28733 return integer_zero_node;
28736 field = TYPE_FIELDS (__processor_model_type);
28737 /* Get the last field, which is __cpu_features. */
28738 while (DECL_CHAIN (field))
28739 field = DECL_CHAIN (field);
28741 /* Get the appropriate field: __cpu_model.__cpu_features */
28742 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
28745 /* Access the 0th element of __cpu_features array. */
28746 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
28747 integer_zero_node, NULL_TREE, NULL_TREE);
28749 field_val = (1 << isa_names_table[i].feature);
28750 /* Return __cpu_model.__cpu_features[0] & field_val */
28751 return build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
28752 build_int_cstu (unsigned_type_node, field_val));
28754 gcc_unreachable ();
28758 ix86_fold_builtin (tree fndecl, int n_args,
28759 tree *args, bool ignore ATTRIBUTE_UNUSED)
28761 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
28763 enum ix86_builtins fn_code = (enum ix86_builtins)
28764 DECL_FUNCTION_CODE (fndecl);
28765 if (fn_code == IX86_BUILTIN_CPU_IS
28766 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
28768 gcc_assert (n_args == 1);
28769 return fold_builtin_cpu (fndecl, args);
28773 #ifdef SUBTARGET_FOLD_BUILTIN
28774 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
28780 /* Make builtins to detect cpu type and features supported. NAME is
28781 the builtin name, CODE is the builtin code, and FTYPE is the function
28782 type of the builtin. */
28785 make_cpu_type_builtin (const char* name, int code,
28786 enum ix86_builtin_func_type ftype, bool is_const)
28791 type = ix86_get_builtin_func_type (ftype);
28792 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28794 gcc_assert (decl != NULL_TREE);
28795 ix86_builtins[(int) code] = decl;
28796 TREE_READONLY (decl) = is_const;
28799 /* Make builtins to get CPU type and features supported. The created
28802 __builtin_cpu_init (), to detect cpu type and features,
28803 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
28804 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
28808 ix86_init_platform_type_builtins (void)
28810 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
28811 INT_FTYPE_VOID, false);
28812 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
28813 INT_FTYPE_PCCHAR, true);
28814 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
28815 INT_FTYPE_PCCHAR, true);
28818 /* Internal method for ix86_init_builtins. */
28821 ix86_init_builtins_va_builtins_abi (void)
28823 tree ms_va_ref, sysv_va_ref;
28824 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
28825 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
28826 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
28827 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
28831 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
28832 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
28833 ms_va_ref = build_reference_type (ms_va_list_type_node);
28835 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
28838 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
28839 fnvoid_va_start_ms =
28840 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
28841 fnvoid_va_end_sysv =
28842 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
28843 fnvoid_va_start_sysv =
28844 build_varargs_function_type_list (void_type_node, sysv_va_ref,
28846 fnvoid_va_copy_ms =
28847 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
28849 fnvoid_va_copy_sysv =
28850 build_function_type_list (void_type_node, sysv_va_ref,
28851 sysv_va_ref, NULL_TREE);
28853 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
28854 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
28855 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
28856 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
28857 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
28858 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
28859 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
28860 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
28861 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
28862 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
28863 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
28864 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
28868 ix86_init_builtin_types (void)
28870 tree float128_type_node, float80_type_node;
28872 /* The __float80 type. */
28873 float80_type_node = long_double_type_node;
28874 if (TYPE_MODE (float80_type_node) != XFmode)
28876 /* The __float80 type. */
28877 float80_type_node = make_node (REAL_TYPE);
28879 TYPE_PRECISION (float80_type_node) = 80;
28880 layout_type (float80_type_node);
28882 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
28884 /* The __float128 type. */
28885 float128_type_node = make_node (REAL_TYPE);
28886 TYPE_PRECISION (float128_type_node) = 128;
28887 layout_type (float128_type_node);
28888 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
28890 /* This macro is built by i386-builtin-types.awk. */
28891 DEFINE_BUILTIN_PRIMITIVE_TYPES;
28895 ix86_init_builtins (void)
28899 ix86_init_builtin_types ();
28901 /* Builtins to get CPU type and features. */
28902 ix86_init_platform_type_builtins ();
28904 /* TFmode support builtins. */
28905 def_builtin_const (0, "__builtin_infq",
28906 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
28907 def_builtin_const (0, "__builtin_huge_valq",
28908 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
28910 /* We will expand them to normal call if SSE isn't available since
28911 they are used by libgcc. */
28912 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
28913 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
28914 BUILT_IN_MD, "__fabstf2", NULL_TREE);
28915 TREE_READONLY (t) = 1;
28916 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
28918 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
28919 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
28920 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
28921 TREE_READONLY (t) = 1;
28922 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
28924 ix86_init_tm_builtins ();
28925 ix86_init_mmx_sse_builtins ();
28928 ix86_init_builtins_va_builtins_abi ();
28930 #ifdef SUBTARGET_INIT_BUILTINS
28931 SUBTARGET_INIT_BUILTINS;
28935 /* Return the ix86 builtin for CODE. */
28938 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
28940 if (code >= IX86_BUILTIN_MAX)
28941 return error_mark_node;
28943 return ix86_builtins[code];
28946 /* Errors in the source file can cause expand_expr to return const0_rtx
28947 where we expect a vector. To avoid crashing, use one of the vector
28948 clear instructions. */
28950 safe_vector_operand (rtx x, enum machine_mode mode)
28952 if (x == const0_rtx)
28953 x = CONST0_RTX (mode);
28957 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
28960 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
28963 tree arg0 = CALL_EXPR_ARG (exp, 0);
28964 tree arg1 = CALL_EXPR_ARG (exp, 1);
28965 rtx op0 = expand_normal (arg0);
28966 rtx op1 = expand_normal (arg1);
28967 enum machine_mode tmode = insn_data[icode].operand[0].mode;
28968 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
28969 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
28971 if (VECTOR_MODE_P (mode0))
28972 op0 = safe_vector_operand (op0, mode0);
28973 if (VECTOR_MODE_P (mode1))
28974 op1 = safe_vector_operand (op1, mode1);
28976 if (optimize || !target
28977 || GET_MODE (target) != tmode
28978 || !insn_data[icode].operand[0].predicate (target, tmode))
28979 target = gen_reg_rtx (tmode);
28981 if (GET_MODE (op1) == SImode && mode1 == TImode)
28983 rtx x = gen_reg_rtx (V4SImode);
28984 emit_insn (gen_sse2_loadd (x, op1));
28985 op1 = gen_lowpart (TImode, x);
28988 if (!insn_data[icode].operand[1].predicate (op0, mode0))
28989 op0 = copy_to_mode_reg (mode0, op0);
28990 if (!insn_data[icode].operand[2].predicate (op1, mode1))
28991 op1 = copy_to_mode_reg (mode1, op1);
28993 pat = GEN_FCN (icode) (target, op0, op1);
29002 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
29005 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
29006 enum ix86_builtin_func_type m_type,
29007 enum rtx_code sub_code)
29012 bool comparison_p = false;
29014 bool last_arg_constant = false;
29015 int num_memory = 0;
29018 enum machine_mode mode;
29021 enum machine_mode tmode = insn_data[icode].operand[0].mode;
29025 case MULTI_ARG_4_DF2_DI_I:
29026 case MULTI_ARG_4_DF2_DI_I1:
29027 case MULTI_ARG_4_SF2_SI_I:
29028 case MULTI_ARG_4_SF2_SI_I1:
29030 last_arg_constant = true;
29033 case MULTI_ARG_3_SF:
29034 case MULTI_ARG_3_DF:
29035 case MULTI_ARG_3_SF2:
29036 case MULTI_ARG_3_DF2:
29037 case MULTI_ARG_3_DI:
29038 case MULTI_ARG_3_SI:
29039 case MULTI_ARG_3_SI_DI:
29040 case MULTI_ARG_3_HI:
29041 case MULTI_ARG_3_HI_SI:
29042 case MULTI_ARG_3_QI:
29043 case MULTI_ARG_3_DI2:
29044 case MULTI_ARG_3_SI2:
29045 case MULTI_ARG_3_HI2:
29046 case MULTI_ARG_3_QI2:
29050 case MULTI_ARG_2_SF:
29051 case MULTI_ARG_2_DF:
29052 case MULTI_ARG_2_DI:
29053 case MULTI_ARG_2_SI:
29054 case MULTI_ARG_2_HI:
29055 case MULTI_ARG_2_QI:
29059 case MULTI_ARG_2_DI_IMM:
29060 case MULTI_ARG_2_SI_IMM:
29061 case MULTI_ARG_2_HI_IMM:
29062 case MULTI_ARG_2_QI_IMM:
29064 last_arg_constant = true;
29067 case MULTI_ARG_1_SF:
29068 case MULTI_ARG_1_DF:
29069 case MULTI_ARG_1_SF2:
29070 case MULTI_ARG_1_DF2:
29071 case MULTI_ARG_1_DI:
29072 case MULTI_ARG_1_SI:
29073 case MULTI_ARG_1_HI:
29074 case MULTI_ARG_1_QI:
29075 case MULTI_ARG_1_SI_DI:
29076 case MULTI_ARG_1_HI_DI:
29077 case MULTI_ARG_1_HI_SI:
29078 case MULTI_ARG_1_QI_DI:
29079 case MULTI_ARG_1_QI_SI:
29080 case MULTI_ARG_1_QI_HI:
29084 case MULTI_ARG_2_DI_CMP:
29085 case MULTI_ARG_2_SI_CMP:
29086 case MULTI_ARG_2_HI_CMP:
29087 case MULTI_ARG_2_QI_CMP:
29089 comparison_p = true;
29092 case MULTI_ARG_2_SF_TF:
29093 case MULTI_ARG_2_DF_TF:
29094 case MULTI_ARG_2_DI_TF:
29095 case MULTI_ARG_2_SI_TF:
29096 case MULTI_ARG_2_HI_TF:
29097 case MULTI_ARG_2_QI_TF:
29103 gcc_unreachable ();
29106 if (optimize || !target
29107 || GET_MODE (target) != tmode
29108 || !insn_data[icode].operand[0].predicate (target, tmode))
29109 target = gen_reg_rtx (tmode);
29111 gcc_assert (nargs <= 4);
29113 for (i = 0; i < nargs; i++)
29115 tree arg = CALL_EXPR_ARG (exp, i);
29116 rtx op = expand_normal (arg);
29117 int adjust = (comparison_p) ? 1 : 0;
29118 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
29120 if (last_arg_constant && i == nargs - 1)
29122 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
29124 enum insn_code new_icode = icode;
29127 case CODE_FOR_xop_vpermil2v2df3:
29128 case CODE_FOR_xop_vpermil2v4sf3:
29129 case CODE_FOR_xop_vpermil2v4df3:
29130 case CODE_FOR_xop_vpermil2v8sf3:
29131 error ("the last argument must be a 2-bit immediate");
29132 return gen_reg_rtx (tmode);
29133 case CODE_FOR_xop_rotlv2di3:
29134 new_icode = CODE_FOR_rotlv2di3;
29136 case CODE_FOR_xop_rotlv4si3:
29137 new_icode = CODE_FOR_rotlv4si3;
29139 case CODE_FOR_xop_rotlv8hi3:
29140 new_icode = CODE_FOR_rotlv8hi3;
29142 case CODE_FOR_xop_rotlv16qi3:
29143 new_icode = CODE_FOR_rotlv16qi3;
29145 if (CONST_INT_P (op))
29147 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
29148 op = GEN_INT (INTVAL (op) & mask);
29149 gcc_checking_assert
29150 (insn_data[icode].operand[i + 1].predicate (op, mode));
29154 gcc_checking_assert
29156 && insn_data[new_icode].operand[0].mode == tmode
29157 && insn_data[new_icode].operand[1].mode == tmode
29158 && insn_data[new_icode].operand[2].mode == mode
29159 && insn_data[new_icode].operand[0].predicate
29160 == insn_data[icode].operand[0].predicate
29161 && insn_data[new_icode].operand[1].predicate
29162 == insn_data[icode].operand[1].predicate);
29168 gcc_unreachable ();
29175 if (VECTOR_MODE_P (mode))
29176 op = safe_vector_operand (op, mode);
29178 /* If we aren't optimizing, only allow one memory operand to be
29180 if (memory_operand (op, mode))
29183 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
29186 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
29188 op = force_reg (mode, op);
29192 args[i].mode = mode;
29198 pat = GEN_FCN (icode) (target, args[0].op);
29203 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
29204 GEN_INT ((int)sub_code));
29205 else if (! comparison_p)
29206 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
29209 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
29213 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
29218 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
29222 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
29226 gcc_unreachable ();
29236 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
29237 insns with vec_merge. */
29240 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
29244 tree arg0 = CALL_EXPR_ARG (exp, 0);
29245 rtx op1, op0 = expand_normal (arg0);
29246 enum machine_mode tmode = insn_data[icode].operand[0].mode;
29247 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
29249 if (optimize || !target
29250 || GET_MODE (target) != tmode
29251 || !insn_data[icode].operand[0].predicate (target, tmode))
29252 target = gen_reg_rtx (tmode);
29254 if (VECTOR_MODE_P (mode0))
29255 op0 = safe_vector_operand (op0, mode0);
29257 if ((optimize && !register_operand (op0, mode0))
29258 || !insn_data[icode].operand[1].predicate (op0, mode0))
29259 op0 = copy_to_mode_reg (mode0, op0);
29262 if (!insn_data[icode].operand[2].predicate (op1, mode0))
29263 op1 = copy_to_mode_reg (mode0, op1);
29265 pat = GEN_FCN (icode) (target, op0, op1);
29272 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
29275 ix86_expand_sse_compare (const struct builtin_description *d,
29276 tree exp, rtx target, bool swap)
29279 tree arg0 = CALL_EXPR_ARG (exp, 0);
29280 tree arg1 = CALL_EXPR_ARG (exp, 1);
29281 rtx op0 = expand_normal (arg0);
29282 rtx op1 = expand_normal (arg1);
29284 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
29285 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
29286 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
29287 enum rtx_code comparison = d->comparison;
29289 if (VECTOR_MODE_P (mode0))
29290 op0 = safe_vector_operand (op0, mode0);
29291 if (VECTOR_MODE_P (mode1))
29292 op1 = safe_vector_operand (op1, mode1);
29294 /* Swap operands if we have a comparison that isn't available in
29298 rtx tmp = gen_reg_rtx (mode1);
29299 emit_move_insn (tmp, op1);
29304 if (optimize || !target
29305 || GET_MODE (target) != tmode
29306 || !insn_data[d->icode].operand[0].predicate (target, tmode))
29307 target = gen_reg_rtx (tmode);
29309 if ((optimize && !register_operand (op0, mode0))
29310 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
29311 op0 = copy_to_mode_reg (mode0, op0);
29312 if ((optimize && !register_operand (op1, mode1))
29313 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
29314 op1 = copy_to_mode_reg (mode1, op1);
29316 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
29317 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
29324 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
29327 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
29331 tree arg0 = CALL_EXPR_ARG (exp, 0);
29332 tree arg1 = CALL_EXPR_ARG (exp, 1);
29333 rtx op0 = expand_normal (arg0);
29334 rtx op1 = expand_normal (arg1);
29335 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
29336 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
29337 enum rtx_code comparison = d->comparison;
29339 if (VECTOR_MODE_P (mode0))
29340 op0 = safe_vector_operand (op0, mode0);
29341 if (VECTOR_MODE_P (mode1))
29342 op1 = safe_vector_operand (op1, mode1);
29344 /* Swap operands if we have a comparison that isn't available in
29346 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
29353 target = gen_reg_rtx (SImode);
29354 emit_move_insn (target, const0_rtx);
29355 target = gen_rtx_SUBREG (QImode, target, 0);
29357 if ((optimize && !register_operand (op0, mode0))
29358 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
29359 op0 = copy_to_mode_reg (mode0, op0);
29360 if ((optimize && !register_operand (op1, mode1))
29361 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
29362 op1 = copy_to_mode_reg (mode1, op1);
29364 pat = GEN_FCN (d->icode) (op0, op1);
29368 emit_insn (gen_rtx_SET (VOIDmode,
29369 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
29370 gen_rtx_fmt_ee (comparison, QImode,
29374 return SUBREG_REG (target);
29377 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
29380 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
29384 tree arg0 = CALL_EXPR_ARG (exp, 0);
29385 rtx op1, op0 = expand_normal (arg0);
29386 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
29387 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
29389 if (optimize || target == 0
29390 || GET_MODE (target) != tmode
29391 || !insn_data[d->icode].operand[0].predicate (target, tmode))
29392 target = gen_reg_rtx (tmode);
29394 if (VECTOR_MODE_P (mode0))
29395 op0 = safe_vector_operand (op0, mode0);
29397 if ((optimize && !register_operand (op0, mode0))
29398 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
29399 op0 = copy_to_mode_reg (mode0, op0);
29401 op1 = GEN_INT (d->comparison);
29403 pat = GEN_FCN (d->icode) (target, op0, op1);
29411 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
29412 tree exp, rtx target)
29415 tree arg0 = CALL_EXPR_ARG (exp, 0);
29416 tree arg1 = CALL_EXPR_ARG (exp, 1);
29417 rtx op0 = expand_normal (arg0);
29418 rtx op1 = expand_normal (arg1);
29420 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
29421 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
29422 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
29424 if (optimize || target == 0
29425 || GET_MODE (target) != tmode
29426 || !insn_data[d->icode].operand[0].predicate (target, tmode))
29427 target = gen_reg_rtx (tmode);
29429 op0 = safe_vector_operand (op0, mode0);
29430 op1 = safe_vector_operand (op1, mode1);
29432 if ((optimize && !register_operand (op0, mode0))
29433 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
29434 op0 = copy_to_mode_reg (mode0, op0);
29435 if ((optimize && !register_operand (op1, mode1))
29436 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
29437 op1 = copy_to_mode_reg (mode1, op1);
29439 op2 = GEN_INT (d->comparison);
29441 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
29448 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
29451 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
29455 tree arg0 = CALL_EXPR_ARG (exp, 0);
29456 tree arg1 = CALL_EXPR_ARG (exp, 1);
29457 rtx op0 = expand_normal (arg0);
29458 rtx op1 = expand_normal (arg1);
29459 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
29460 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
29461 enum rtx_code comparison = d->comparison;
29463 if (VECTOR_MODE_P (mode0))
29464 op0 = safe_vector_operand (op0, mode0);
29465 if (VECTOR_MODE_P (mode1))
29466 op1 = safe_vector_operand (op1, mode1);
29468 target = gen_reg_rtx (SImode);
29469 emit_move_insn (target, const0_rtx);
29470 target = gen_rtx_SUBREG (QImode, target, 0);
29472 if ((optimize && !register_operand (op0, mode0))
29473 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
29474 op0 = copy_to_mode_reg (mode0, op0);
29475 if ((optimize && !register_operand (op1, mode1))
29476 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
29477 op1 = copy_to_mode_reg (mode1, op1);
29479 pat = GEN_FCN (d->icode) (op0, op1);
29483 emit_insn (gen_rtx_SET (VOIDmode,
29484 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
29485 gen_rtx_fmt_ee (comparison, QImode,
29489 return SUBREG_REG (target);
29492 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
29495 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
29496 tree exp, rtx target)
29499 tree arg0 = CALL_EXPR_ARG (exp, 0);
29500 tree arg1 = CALL_EXPR_ARG (exp, 1);
29501 tree arg2 = CALL_EXPR_ARG (exp, 2);
29502 tree arg3 = CALL_EXPR_ARG (exp, 3);
29503 tree arg4 = CALL_EXPR_ARG (exp, 4);
29504 rtx scratch0, scratch1;
29505 rtx op0 = expand_normal (arg0);
29506 rtx op1 = expand_normal (arg1);
29507 rtx op2 = expand_normal (arg2);
29508 rtx op3 = expand_normal (arg3);
29509 rtx op4 = expand_normal (arg4);
29510 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
29512 tmode0 = insn_data[d->icode].operand[0].mode;
29513 tmode1 = insn_data[d->icode].operand[1].mode;
29514 modev2 = insn_data[d->icode].operand[2].mode;
29515 modei3 = insn_data[d->icode].operand[3].mode;
29516 modev4 = insn_data[d->icode].operand[4].mode;
29517 modei5 = insn_data[d->icode].operand[5].mode;
29518 modeimm = insn_data[d->icode].operand[6].mode;
29520 if (VECTOR_MODE_P (modev2))
29521 op0 = safe_vector_operand (op0, modev2);
29522 if (VECTOR_MODE_P (modev4))
29523 op2 = safe_vector_operand (op2, modev4);
29525 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
29526 op0 = copy_to_mode_reg (modev2, op0);
29527 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
29528 op1 = copy_to_mode_reg (modei3, op1);
29529 if ((optimize && !register_operand (op2, modev4))
29530 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
29531 op2 = copy_to_mode_reg (modev4, op2);
29532 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
29533 op3 = copy_to_mode_reg (modei5, op3);
29535 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
29537 error ("the fifth argument must be an 8-bit immediate");
29541 if (d->code == IX86_BUILTIN_PCMPESTRI128)
29543 if (optimize || !target
29544 || GET_MODE (target) != tmode0
29545 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
29546 target = gen_reg_rtx (tmode0);
29548 scratch1 = gen_reg_rtx (tmode1);
29550 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
29552 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
29554 if (optimize || !target
29555 || GET_MODE (target) != tmode1
29556 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
29557 target = gen_reg_rtx (tmode1);
29559 scratch0 = gen_reg_rtx (tmode0);
29561 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
29565 gcc_assert (d->flag);
29567 scratch0 = gen_reg_rtx (tmode0);
29568 scratch1 = gen_reg_rtx (tmode1);
29570 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
29580 target = gen_reg_rtx (SImode);
29581 emit_move_insn (target, const0_rtx);
29582 target = gen_rtx_SUBREG (QImode, target, 0);
29585 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
29586 gen_rtx_fmt_ee (EQ, QImode,
29587 gen_rtx_REG ((enum machine_mode) d->flag,
29590 return SUBREG_REG (target);
29597 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
29600 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
29601 tree exp, rtx target)
29604 tree arg0 = CALL_EXPR_ARG (exp, 0);
29605 tree arg1 = CALL_EXPR_ARG (exp, 1);
29606 tree arg2 = CALL_EXPR_ARG (exp, 2);
29607 rtx scratch0, scratch1;
29608 rtx op0 = expand_normal (arg0);
29609 rtx op1 = expand_normal (arg1);
29610 rtx op2 = expand_normal (arg2);
29611 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
29613 tmode0 = insn_data[d->icode].operand[0].mode;
29614 tmode1 = insn_data[d->icode].operand[1].mode;
29615 modev2 = insn_data[d->icode].operand[2].mode;
29616 modev3 = insn_data[d->icode].operand[3].mode;
29617 modeimm = insn_data[d->icode].operand[4].mode;
29619 if (VECTOR_MODE_P (modev2))
29620 op0 = safe_vector_operand (op0, modev2);
29621 if (VECTOR_MODE_P (modev3))
29622 op1 = safe_vector_operand (op1, modev3);
29624 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
29625 op0 = copy_to_mode_reg (modev2, op0);
29626 if ((optimize && !register_operand (op1, modev3))
29627 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
29628 op1 = copy_to_mode_reg (modev3, op1);
29630 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
29632 error ("the third argument must be an 8-bit immediate");
29636 if (d->code == IX86_BUILTIN_PCMPISTRI128)
29638 if (optimize || !target
29639 || GET_MODE (target) != tmode0
29640 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
29641 target = gen_reg_rtx (tmode0);
29643 scratch1 = gen_reg_rtx (tmode1);
29645 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
29647 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
29649 if (optimize || !target
29650 || GET_MODE (target) != tmode1
29651 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
29652 target = gen_reg_rtx (tmode1);
29654 scratch0 = gen_reg_rtx (tmode0);
29656 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
29660 gcc_assert (d->flag);
29662 scratch0 = gen_reg_rtx (tmode0);
29663 scratch1 = gen_reg_rtx (tmode1);
29665 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
29675 target = gen_reg_rtx (SImode);
29676 emit_move_insn (target, const0_rtx);
29677 target = gen_rtx_SUBREG (QImode, target, 0);
29680 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
29681 gen_rtx_fmt_ee (EQ, QImode,
29682 gen_rtx_REG ((enum machine_mode) d->flag,
29685 return SUBREG_REG (target);
29691 /* Subroutine of ix86_expand_builtin to take care of insns with
29692 variable number of operands. */
29695 ix86_expand_args_builtin (const struct builtin_description *d,
29696 tree exp, rtx target)
29698 rtx pat, real_target;
29699 unsigned int i, nargs;
29700 unsigned int nargs_constant = 0;
29701 int num_memory = 0;
29705 enum machine_mode mode;
29707 bool last_arg_count = false;
29708 enum insn_code icode = d->icode;
29709 const struct insn_data_d *insn_p = &insn_data[icode];
29710 enum machine_mode tmode = insn_p->operand[0].mode;
29711 enum machine_mode rmode = VOIDmode;
29713 enum rtx_code comparison = d->comparison;
29715 switch ((enum ix86_builtin_func_type) d->flag)
29717 case V2DF_FTYPE_V2DF_ROUND:
29718 case V4DF_FTYPE_V4DF_ROUND:
29719 case V4SF_FTYPE_V4SF_ROUND:
29720 case V8SF_FTYPE_V8SF_ROUND:
29721 case V4SI_FTYPE_V4SF_ROUND:
29722 case V8SI_FTYPE_V8SF_ROUND:
29723 return ix86_expand_sse_round (d, exp, target);
29724 case V4SI_FTYPE_V2DF_V2DF_ROUND:
29725 case V8SI_FTYPE_V4DF_V4DF_ROUND:
29726 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
29727 case INT_FTYPE_V8SF_V8SF_PTEST:
29728 case INT_FTYPE_V4DI_V4DI_PTEST:
29729 case INT_FTYPE_V4DF_V4DF_PTEST:
29730 case INT_FTYPE_V4SF_V4SF_PTEST:
29731 case INT_FTYPE_V2DI_V2DI_PTEST:
29732 case INT_FTYPE_V2DF_V2DF_PTEST:
29733 return ix86_expand_sse_ptest (d, exp, target);
29734 case FLOAT128_FTYPE_FLOAT128:
29735 case FLOAT_FTYPE_FLOAT:
29736 case INT_FTYPE_INT:
29737 case UINT64_FTYPE_INT:
29738 case UINT16_FTYPE_UINT16:
29739 case INT64_FTYPE_INT64:
29740 case INT64_FTYPE_V4SF:
29741 case INT64_FTYPE_V2DF:
29742 case INT_FTYPE_V16QI:
29743 case INT_FTYPE_V8QI:
29744 case INT_FTYPE_V8SF:
29745 case INT_FTYPE_V4DF:
29746 case INT_FTYPE_V4SF:
29747 case INT_FTYPE_V2DF:
29748 case INT_FTYPE_V32QI:
29749 case V16QI_FTYPE_V16QI:
29750 case V8SI_FTYPE_V8SF:
29751 case V8SI_FTYPE_V4SI:
29752 case V8HI_FTYPE_V8HI:
29753 case V8HI_FTYPE_V16QI:
29754 case V8QI_FTYPE_V8QI:
29755 case V8SF_FTYPE_V8SF:
29756 case V8SF_FTYPE_V8SI:
29757 case V8SF_FTYPE_V4SF:
29758 case V8SF_FTYPE_V8HI:
29759 case V4SI_FTYPE_V4SI:
29760 case V4SI_FTYPE_V16QI:
29761 case V4SI_FTYPE_V4SF:
29762 case V4SI_FTYPE_V8SI:
29763 case V4SI_FTYPE_V8HI:
29764 case V4SI_FTYPE_V4DF:
29765 case V4SI_FTYPE_V2DF:
29766 case V4HI_FTYPE_V4HI:
29767 case V4DF_FTYPE_V4DF:
29768 case V4DF_FTYPE_V4SI:
29769 case V4DF_FTYPE_V4SF:
29770 case V4DF_FTYPE_V2DF:
29771 case V4SF_FTYPE_V4SF:
29772 case V4SF_FTYPE_V4SI:
29773 case V4SF_FTYPE_V8SF:
29774 case V4SF_FTYPE_V4DF:
29775 case V4SF_FTYPE_V8HI:
29776 case V4SF_FTYPE_V2DF:
29777 case V2DI_FTYPE_V2DI:
29778 case V2DI_FTYPE_V16QI:
29779 case V2DI_FTYPE_V8HI:
29780 case V2DI_FTYPE_V4SI:
29781 case V2DF_FTYPE_V2DF:
29782 case V2DF_FTYPE_V4SI:
29783 case V2DF_FTYPE_V4DF:
29784 case V2DF_FTYPE_V4SF:
29785 case V2DF_FTYPE_V2SI:
29786 case V2SI_FTYPE_V2SI:
29787 case V2SI_FTYPE_V4SF:
29788 case V2SI_FTYPE_V2SF:
29789 case V2SI_FTYPE_V2DF:
29790 case V2SF_FTYPE_V2SF:
29791 case V2SF_FTYPE_V2SI:
29792 case V32QI_FTYPE_V32QI:
29793 case V32QI_FTYPE_V16QI:
29794 case V16HI_FTYPE_V16HI:
29795 case V16HI_FTYPE_V8HI:
29796 case V8SI_FTYPE_V8SI:
29797 case V16HI_FTYPE_V16QI:
29798 case V8SI_FTYPE_V16QI:
29799 case V4DI_FTYPE_V16QI:
29800 case V8SI_FTYPE_V8HI:
29801 case V4DI_FTYPE_V8HI:
29802 case V4DI_FTYPE_V4SI:
29803 case V4DI_FTYPE_V2DI:
29806 case V4SF_FTYPE_V4SF_VEC_MERGE:
29807 case V2DF_FTYPE_V2DF_VEC_MERGE:
29808 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
29809 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
29810 case V16QI_FTYPE_V16QI_V16QI:
29811 case V16QI_FTYPE_V8HI_V8HI:
29812 case V8QI_FTYPE_V8QI_V8QI:
29813 case V8QI_FTYPE_V4HI_V4HI:
29814 case V8HI_FTYPE_V8HI_V8HI:
29815 case V8HI_FTYPE_V16QI_V16QI:
29816 case V8HI_FTYPE_V4SI_V4SI:
29817 case V8SF_FTYPE_V8SF_V8SF:
29818 case V8SF_FTYPE_V8SF_V8SI:
29819 case V4SI_FTYPE_V4SI_V4SI:
29820 case V4SI_FTYPE_V8HI_V8HI:
29821 case V4SI_FTYPE_V4SF_V4SF:
29822 case V4SI_FTYPE_V2DF_V2DF:
29823 case V4HI_FTYPE_V4HI_V4HI:
29824 case V4HI_FTYPE_V8QI_V8QI:
29825 case V4HI_FTYPE_V2SI_V2SI:
29826 case V4DF_FTYPE_V4DF_V4DF:
29827 case V4DF_FTYPE_V4DF_V4DI:
29828 case V4SF_FTYPE_V4SF_V4SF:
29829 case V4SF_FTYPE_V4SF_V4SI:
29830 case V4SF_FTYPE_V4SF_V2SI:
29831 case V4SF_FTYPE_V4SF_V2DF:
29832 case V4SF_FTYPE_V4SF_DI:
29833 case V4SF_FTYPE_V4SF_SI:
29834 case V2DI_FTYPE_V2DI_V2DI:
29835 case V2DI_FTYPE_V16QI_V16QI:
29836 case V2DI_FTYPE_V4SI_V4SI:
29837 case V2UDI_FTYPE_V4USI_V4USI:
29838 case V2DI_FTYPE_V2DI_V16QI:
29839 case V2DI_FTYPE_V2DF_V2DF:
29840 case V2SI_FTYPE_V2SI_V2SI:
29841 case V2SI_FTYPE_V4HI_V4HI:
29842 case V2SI_FTYPE_V2SF_V2SF:
29843 case V2DF_FTYPE_V2DF_V2DF:
29844 case V2DF_FTYPE_V2DF_V4SF:
29845 case V2DF_FTYPE_V2DF_V2DI:
29846 case V2DF_FTYPE_V2DF_DI:
29847 case V2DF_FTYPE_V2DF_SI:
29848 case V2SF_FTYPE_V2SF_V2SF:
29849 case V1DI_FTYPE_V1DI_V1DI:
29850 case V1DI_FTYPE_V8QI_V8QI:
29851 case V1DI_FTYPE_V2SI_V2SI:
29852 case V32QI_FTYPE_V16HI_V16HI:
29853 case V16HI_FTYPE_V8SI_V8SI:
29854 case V32QI_FTYPE_V32QI_V32QI:
29855 case V16HI_FTYPE_V32QI_V32QI:
29856 case V16HI_FTYPE_V16HI_V16HI:
29857 case V8SI_FTYPE_V4DF_V4DF:
29858 case V8SI_FTYPE_V8SI_V8SI:
29859 case V8SI_FTYPE_V16HI_V16HI:
29860 case V4DI_FTYPE_V4DI_V4DI:
29861 case V4DI_FTYPE_V8SI_V8SI:
29862 case V4UDI_FTYPE_V8USI_V8USI:
29863 if (comparison == UNKNOWN)
29864 return ix86_expand_binop_builtin (icode, exp, target);
29867 case V4SF_FTYPE_V4SF_V4SF_SWAP:
29868 case V2DF_FTYPE_V2DF_V2DF_SWAP:
29869 gcc_assert (comparison != UNKNOWN);
29873 case V16HI_FTYPE_V16HI_V8HI_COUNT:
29874 case V16HI_FTYPE_V16HI_SI_COUNT:
29875 case V8SI_FTYPE_V8SI_V4SI_COUNT:
29876 case V8SI_FTYPE_V8SI_SI_COUNT:
29877 case V4DI_FTYPE_V4DI_V2DI_COUNT:
29878 case V4DI_FTYPE_V4DI_INT_COUNT:
29879 case V8HI_FTYPE_V8HI_V8HI_COUNT:
29880 case V8HI_FTYPE_V8HI_SI_COUNT:
29881 case V4SI_FTYPE_V4SI_V4SI_COUNT:
29882 case V4SI_FTYPE_V4SI_SI_COUNT:
29883 case V4HI_FTYPE_V4HI_V4HI_COUNT:
29884 case V4HI_FTYPE_V4HI_SI_COUNT:
29885 case V2DI_FTYPE_V2DI_V2DI_COUNT:
29886 case V2DI_FTYPE_V2DI_SI_COUNT:
29887 case V2SI_FTYPE_V2SI_V2SI_COUNT:
29888 case V2SI_FTYPE_V2SI_SI_COUNT:
29889 case V1DI_FTYPE_V1DI_V1DI_COUNT:
29890 case V1DI_FTYPE_V1DI_SI_COUNT:
29892 last_arg_count = true;
29894 case UINT64_FTYPE_UINT64_UINT64:
29895 case UINT_FTYPE_UINT_UINT:
29896 case UINT_FTYPE_UINT_USHORT:
29897 case UINT_FTYPE_UINT_UCHAR:
29898 case UINT16_FTYPE_UINT16_INT:
29899 case UINT8_FTYPE_UINT8_INT:
29902 case V2DI_FTYPE_V2DI_INT_CONVERT:
29905 nargs_constant = 1;
29907 case V4DI_FTYPE_V4DI_INT_CONVERT:
29910 nargs_constant = 1;
29912 case V8HI_FTYPE_V8HI_INT:
29913 case V8HI_FTYPE_V8SF_INT:
29914 case V8HI_FTYPE_V4SF_INT:
29915 case V8SF_FTYPE_V8SF_INT:
29916 case V4SI_FTYPE_V4SI_INT:
29917 case V4SI_FTYPE_V8SI_INT:
29918 case V4HI_FTYPE_V4HI_INT:
29919 case V4DF_FTYPE_V4DF_INT:
29920 case V4SF_FTYPE_V4SF_INT:
29921 case V4SF_FTYPE_V8SF_INT:
29922 case V2DI_FTYPE_V2DI_INT:
29923 case V2DF_FTYPE_V2DF_INT:
29924 case V2DF_FTYPE_V4DF_INT:
29925 case V16HI_FTYPE_V16HI_INT:
29926 case V8SI_FTYPE_V8SI_INT:
29927 case V4DI_FTYPE_V4DI_INT:
29928 case V2DI_FTYPE_V4DI_INT:
29930 nargs_constant = 1;
29932 case V16QI_FTYPE_V16QI_V16QI_V16QI:
29933 case V8SF_FTYPE_V8SF_V8SF_V8SF:
29934 case V4DF_FTYPE_V4DF_V4DF_V4DF:
29935 case V4SF_FTYPE_V4SF_V4SF_V4SF:
29936 case V2DF_FTYPE_V2DF_V2DF_V2DF:
29937 case V32QI_FTYPE_V32QI_V32QI_V32QI:
29940 case V32QI_FTYPE_V32QI_V32QI_INT:
29941 case V16HI_FTYPE_V16HI_V16HI_INT:
29942 case V16QI_FTYPE_V16QI_V16QI_INT:
29943 case V4DI_FTYPE_V4DI_V4DI_INT:
29944 case V8HI_FTYPE_V8HI_V8HI_INT:
29945 case V8SI_FTYPE_V8SI_V8SI_INT:
29946 case V8SI_FTYPE_V8SI_V4SI_INT:
29947 case V8SF_FTYPE_V8SF_V8SF_INT:
29948 case V8SF_FTYPE_V8SF_V4SF_INT:
29949 case V4SI_FTYPE_V4SI_V4SI_INT:
29950 case V4DF_FTYPE_V4DF_V4DF_INT:
29951 case V4DF_FTYPE_V4DF_V2DF_INT:
29952 case V4SF_FTYPE_V4SF_V4SF_INT:
29953 case V2DI_FTYPE_V2DI_V2DI_INT:
29954 case V4DI_FTYPE_V4DI_V2DI_INT:
29955 case V2DF_FTYPE_V2DF_V2DF_INT:
29957 nargs_constant = 1;
29959 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
29962 nargs_constant = 1;
29964 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
29967 nargs_constant = 1;
29969 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
29972 nargs_constant = 1;
29974 case V2DI_FTYPE_V2DI_UINT_UINT:
29976 nargs_constant = 2;
29978 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
29979 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
29980 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
29981 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
29983 nargs_constant = 1;
29985 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
29987 nargs_constant = 2;
29989 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
29990 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
29994 gcc_unreachable ();
29997 gcc_assert (nargs <= ARRAY_SIZE (args));
29999 if (comparison != UNKNOWN)
30001 gcc_assert (nargs == 2);
30002 return ix86_expand_sse_compare (d, exp, target, swap);
30005 if (rmode == VOIDmode || rmode == tmode)
30009 || GET_MODE (target) != tmode
30010 || !insn_p->operand[0].predicate (target, tmode))
30011 target = gen_reg_rtx (tmode);
30012 real_target = target;
30016 target = gen_reg_rtx (rmode);
30017 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
30020 for (i = 0; i < nargs; i++)
30022 tree arg = CALL_EXPR_ARG (exp, i);
30023 rtx op = expand_normal (arg);
30024 enum machine_mode mode = insn_p->operand[i + 1].mode;
30025 bool match = insn_p->operand[i + 1].predicate (op, mode);
30027 if (last_arg_count && (i + 1) == nargs)
30029 /* SIMD shift insns take either an 8-bit immediate or
30030 register as count. But builtin functions take int as
30031 count. If count doesn't match, we put it in register. */
30034 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
30035 if (!insn_p->operand[i + 1].predicate (op, mode))
30036 op = copy_to_reg (op);
30039 else if ((nargs - i) <= nargs_constant)
30044 case CODE_FOR_avx2_inserti128:
30045 case CODE_FOR_avx2_extracti128:
30046 error ("the last argument must be an 1-bit immediate");
30049 case CODE_FOR_sse4_1_roundsd:
30050 case CODE_FOR_sse4_1_roundss:
30052 case CODE_FOR_sse4_1_roundpd:
30053 case CODE_FOR_sse4_1_roundps:
30054 case CODE_FOR_avx_roundpd256:
30055 case CODE_FOR_avx_roundps256:
30057 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
30058 case CODE_FOR_sse4_1_roundps_sfix:
30059 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
30060 case CODE_FOR_avx_roundps_sfix256:
30062 case CODE_FOR_sse4_1_blendps:
30063 case CODE_FOR_avx_blendpd256:
30064 case CODE_FOR_avx_vpermilv4df:
30065 error ("the last argument must be a 4-bit immediate");
30068 case CODE_FOR_sse4_1_blendpd:
30069 case CODE_FOR_avx_vpermilv2df:
30070 case CODE_FOR_xop_vpermil2v2df3:
30071 case CODE_FOR_xop_vpermil2v4sf3:
30072 case CODE_FOR_xop_vpermil2v4df3:
30073 case CODE_FOR_xop_vpermil2v8sf3:
30074 error ("the last argument must be a 2-bit immediate");
30077 case CODE_FOR_avx_vextractf128v4df:
30078 case CODE_FOR_avx_vextractf128v8sf:
30079 case CODE_FOR_avx_vextractf128v8si:
30080 case CODE_FOR_avx_vinsertf128v4df:
30081 case CODE_FOR_avx_vinsertf128v8sf:
30082 case CODE_FOR_avx_vinsertf128v8si:
30083 error ("the last argument must be a 1-bit immediate");
30086 case CODE_FOR_avx_vmcmpv2df3:
30087 case CODE_FOR_avx_vmcmpv4sf3:
30088 case CODE_FOR_avx_cmpv2df3:
30089 case CODE_FOR_avx_cmpv4sf3:
30090 case CODE_FOR_avx_cmpv4df3:
30091 case CODE_FOR_avx_cmpv8sf3:
30092 error ("the last argument must be a 5-bit immediate");
30096 switch (nargs_constant)
30099 if ((nargs - i) == nargs_constant)
30101 error ("the next to last argument must be an 8-bit immediate");
30105 error ("the last argument must be an 8-bit immediate");
30108 gcc_unreachable ();
30115 if (VECTOR_MODE_P (mode))
30116 op = safe_vector_operand (op, mode);
30118 /* If we aren't optimizing, only allow one memory operand to
30120 if (memory_operand (op, mode))
30123 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
30125 if (optimize || !match || num_memory > 1)
30126 op = copy_to_mode_reg (mode, op);
30130 op = copy_to_reg (op);
30131 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
30136 args[i].mode = mode;
30142 pat = GEN_FCN (icode) (real_target, args[0].op);
30145 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
30148 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
30152 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
30153 args[2].op, args[3].op);
30156 gcc_unreachable ();
30166 /* Subroutine of ix86_expand_builtin to take care of special insns
30167 with variable number of operands. */
30170 ix86_expand_special_args_builtin (const struct builtin_description *d,
30171 tree exp, rtx target)
30175 unsigned int i, nargs, arg_adjust, memory;
30179 enum machine_mode mode;
30181 enum insn_code icode = d->icode;
30182 bool last_arg_constant = false;
30183 const struct insn_data_d *insn_p = &insn_data[icode];
30184 enum machine_mode tmode = insn_p->operand[0].mode;
30185 enum { load, store } klass;
30187 switch ((enum ix86_builtin_func_type) d->flag)
30189 case VOID_FTYPE_VOID:
30190 if (icode == CODE_FOR_avx_vzeroupper)
30191 target = GEN_INT (vzeroupper_intrinsic);
30192 emit_insn (GEN_FCN (icode) (target));
30194 case VOID_FTYPE_UINT64:
30195 case VOID_FTYPE_UNSIGNED:
30201 case INT_FTYPE_VOID:
30202 case UINT64_FTYPE_VOID:
30203 case UNSIGNED_FTYPE_VOID:
30208 case UINT64_FTYPE_PUNSIGNED:
30209 case V2DI_FTYPE_PV2DI:
30210 case V4DI_FTYPE_PV4DI:
30211 case V32QI_FTYPE_PCCHAR:
30212 case V16QI_FTYPE_PCCHAR:
30213 case V8SF_FTYPE_PCV4SF:
30214 case V8SF_FTYPE_PCFLOAT:
30215 case V4SF_FTYPE_PCFLOAT:
30216 case V4DF_FTYPE_PCV2DF:
30217 case V4DF_FTYPE_PCDOUBLE:
30218 case V2DF_FTYPE_PCDOUBLE:
30219 case VOID_FTYPE_PVOID:
30224 case VOID_FTYPE_PV2SF_V4SF:
30225 case VOID_FTYPE_PV4DI_V4DI:
30226 case VOID_FTYPE_PV2DI_V2DI:
30227 case VOID_FTYPE_PCHAR_V32QI:
30228 case VOID_FTYPE_PCHAR_V16QI:
30229 case VOID_FTYPE_PFLOAT_V8SF:
30230 case VOID_FTYPE_PFLOAT_V4SF:
30231 case VOID_FTYPE_PDOUBLE_V4DF:
30232 case VOID_FTYPE_PDOUBLE_V2DF:
30233 case VOID_FTYPE_PLONGLONG_LONGLONG:
30234 case VOID_FTYPE_PULONGLONG_ULONGLONG:
30235 case VOID_FTYPE_PINT_INT:
30238 /* Reserve memory operand for target. */
30239 memory = ARRAY_SIZE (args);
30241 case V4SF_FTYPE_V4SF_PCV2SF:
30242 case V2DF_FTYPE_V2DF_PCDOUBLE:
30247 case V8SF_FTYPE_PCV8SF_V8SI:
30248 case V4DF_FTYPE_PCV4DF_V4DI:
30249 case V4SF_FTYPE_PCV4SF_V4SI:
30250 case V2DF_FTYPE_PCV2DF_V2DI:
30251 case V8SI_FTYPE_PCV8SI_V8SI:
30252 case V4DI_FTYPE_PCV4DI_V4DI:
30253 case V4SI_FTYPE_PCV4SI_V4SI:
30254 case V2DI_FTYPE_PCV2DI_V2DI:
30259 case VOID_FTYPE_PV8SF_V8SI_V8SF:
30260 case VOID_FTYPE_PV4DF_V4DI_V4DF:
30261 case VOID_FTYPE_PV4SF_V4SI_V4SF:
30262 case VOID_FTYPE_PV2DF_V2DI_V2DF:
30263 case VOID_FTYPE_PV8SI_V8SI_V8SI:
30264 case VOID_FTYPE_PV4DI_V4DI_V4DI:
30265 case VOID_FTYPE_PV4SI_V4SI_V4SI:
30266 case VOID_FTYPE_PV2DI_V2DI_V2DI:
30269 /* Reserve memory operand for target. */
30270 memory = ARRAY_SIZE (args);
30272 case VOID_FTYPE_UINT_UINT_UINT:
30273 case VOID_FTYPE_UINT64_UINT_UINT:
30274 case UCHAR_FTYPE_UINT_UINT_UINT:
30275 case UCHAR_FTYPE_UINT64_UINT_UINT:
30278 memory = ARRAY_SIZE (args);
30279 last_arg_constant = true;
30282 gcc_unreachable ();
30285 gcc_assert (nargs <= ARRAY_SIZE (args));
30287 if (klass == store)
30289 arg = CALL_EXPR_ARG (exp, 0);
30290 op = expand_normal (arg);
30291 gcc_assert (target == 0);
30294 if (GET_MODE (op) != Pmode)
30295 op = convert_to_mode (Pmode, op, 1);
30296 target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
30299 target = force_reg (tmode, op);
30307 || !register_operand (target, tmode)
30308 || GET_MODE (target) != tmode)
30309 target = gen_reg_rtx (tmode);
30312 for (i = 0; i < nargs; i++)
30314 enum machine_mode mode = insn_p->operand[i + 1].mode;
30317 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
30318 op = expand_normal (arg);
30319 match = insn_p->operand[i + 1].predicate (op, mode);
30321 if (last_arg_constant && (i + 1) == nargs)
30325 if (icode == CODE_FOR_lwp_lwpvalsi3
30326 || icode == CODE_FOR_lwp_lwpinssi3
30327 || icode == CODE_FOR_lwp_lwpvaldi3
30328 || icode == CODE_FOR_lwp_lwpinsdi3)
30329 error ("the last argument must be a 32-bit immediate");
30331 error ("the last argument must be an 8-bit immediate");
30339 /* This must be the memory operand. */
30340 if (GET_MODE (op) != Pmode)
30341 op = convert_to_mode (Pmode, op, 1);
30342 op = gen_rtx_MEM (mode, force_reg (Pmode, op));
30343 gcc_assert (GET_MODE (op) == mode
30344 || GET_MODE (op) == VOIDmode);
30348 /* This must be register. */
30349 if (VECTOR_MODE_P (mode))
30350 op = safe_vector_operand (op, mode);
30352 gcc_assert (GET_MODE (op) == mode
30353 || GET_MODE (op) == VOIDmode);
30354 op = copy_to_mode_reg (mode, op);
30359 args[i].mode = mode;
30365 pat = GEN_FCN (icode) (target);
30368 pat = GEN_FCN (icode) (target, args[0].op);
30371 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
30374 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
30377 gcc_unreachable ();
30383 return klass == store ? 0 : target;
30386 /* Return the integer constant in ARG. Constrain it to be in the range
30387 of the subparts of VEC_TYPE; issue an error if not. */
30390 get_element_number (tree vec_type, tree arg)
30392 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
30394 if (!host_integerp (arg, 1)
30395 || (elt = tree_low_cst (arg, 1), elt > max))
30397 error ("selector must be an integer constant in the range 0..%wi", max);
30404 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
30405 ix86_expand_vector_init. We DO have language-level syntax for this, in
30406 the form of (type){ init-list }. Except that since we can't place emms
30407 instructions from inside the compiler, we can't allow the use of MMX
30408 registers unless the user explicitly asks for it. So we do *not* define
30409 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
30410 we have builtins invoked by mmintrin.h that gives us license to emit
30411 these sorts of instructions. */
30414 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
30416 enum machine_mode tmode = TYPE_MODE (type);
30417 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
30418 int i, n_elt = GET_MODE_NUNITS (tmode);
30419 rtvec v = rtvec_alloc (n_elt);
30421 gcc_assert (VECTOR_MODE_P (tmode));
30422 gcc_assert (call_expr_nargs (exp) == n_elt);
30424 for (i = 0; i < n_elt; ++i)
30426 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
30427 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
30430 if (!target || !register_operand (target, tmode))
30431 target = gen_reg_rtx (tmode);
30433 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
30437 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
30438 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
30439 had a language-level syntax for referencing vector elements. */
30442 ix86_expand_vec_ext_builtin (tree exp, rtx target)
30444 enum machine_mode tmode, mode0;
30449 arg0 = CALL_EXPR_ARG (exp, 0);
30450 arg1 = CALL_EXPR_ARG (exp, 1);
30452 op0 = expand_normal (arg0);
30453 elt = get_element_number (TREE_TYPE (arg0), arg1);
30455 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
30456 mode0 = TYPE_MODE (TREE_TYPE (arg0));
30457 gcc_assert (VECTOR_MODE_P (mode0));
30459 op0 = force_reg (mode0, op0);
30461 if (optimize || !target || !register_operand (target, tmode))
30462 target = gen_reg_rtx (tmode);
30464 ix86_expand_vector_extract (true, target, op0, elt);
30469 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
30470 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
30471 a language-level syntax for referencing vector elements. */
30474 ix86_expand_vec_set_builtin (tree exp)
30476 enum machine_mode tmode, mode1;
30477 tree arg0, arg1, arg2;
30479 rtx op0, op1, target;
30481 arg0 = CALL_EXPR_ARG (exp, 0);
30482 arg1 = CALL_EXPR_ARG (exp, 1);
30483 arg2 = CALL_EXPR_ARG (exp, 2);
30485 tmode = TYPE_MODE (TREE_TYPE (arg0));
30486 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
30487 gcc_assert (VECTOR_MODE_P (tmode));
30489 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
30490 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
30491 elt = get_element_number (TREE_TYPE (arg0), arg2);
30493 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
30494 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
30496 op0 = force_reg (tmode, op0);
30497 op1 = force_reg (mode1, op1);
30499 /* OP0 is the source of these builtin functions and shouldn't be
30500 modified. Create a copy, use it and return it as target. */
30501 target = gen_reg_rtx (tmode);
30502 emit_move_insn (target, op0);
30503 ix86_expand_vector_set (true, target, op1, elt);
30508 /* Expand an expression EXP that calls a built-in function,
30509 with result going to TARGET if that's convenient
30510 (and in mode MODE if that's convenient).
30511 SUBTARGET may be used as the target for computing one of EXP's operands.
30512 IGNORE is nonzero if the value is to be ignored. */
30515 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
30516 enum machine_mode mode ATTRIBUTE_UNUSED,
30517 int ignore ATTRIBUTE_UNUSED)
30519 const struct builtin_description *d;
30521 enum insn_code icode;
30522 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
30523 tree arg0, arg1, arg2, arg3, arg4;
30524 rtx op0, op1, op2, op3, op4, pat, insn;
30525 enum machine_mode mode0, mode1, mode2, mode3, mode4;
30526 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
30528 /* For CPU builtins that can be folded, fold first and expand the fold. */
30531 case IX86_BUILTIN_CPU_INIT:
30533 /* Make it call __cpu_indicator_init in libgcc. */
30534 tree call_expr, fndecl, type;
30535 type = build_function_type_list (integer_type_node, NULL_TREE);
30536 fndecl = build_fn_decl ("__cpu_indicator_init", type);
30537 call_expr = build_call_expr (fndecl, 0);
30538 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
30540 case IX86_BUILTIN_CPU_IS:
30541 case IX86_BUILTIN_CPU_SUPPORTS:
30543 tree arg0 = CALL_EXPR_ARG (exp, 0);
30544 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
30545 gcc_assert (fold_expr != NULL_TREE);
30546 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
30550 /* Determine whether the builtin function is available under the current ISA.
30551 Originally the builtin was not created if it wasn't applicable to the
30552 current ISA based on the command line switches. With function specific
30553 options, we need to check in the context of the function making the call
30554 whether it is supported. */
30555 if (ix86_builtins_isa[fcode].isa
30556 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
30558 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
30559 NULL, (enum fpmath_unit) 0, false);
30562 error ("%qE needs unknown isa option", fndecl);
30565 gcc_assert (opts != NULL);
30566 error ("%qE needs isa option %s", fndecl, opts);
30574 case IX86_BUILTIN_MASKMOVQ:
30575 case IX86_BUILTIN_MASKMOVDQU:
30576 icode = (fcode == IX86_BUILTIN_MASKMOVQ
30577 ? CODE_FOR_mmx_maskmovq
30578 : CODE_FOR_sse2_maskmovdqu);
30579 /* Note the arg order is different from the operand order. */
30580 arg1 = CALL_EXPR_ARG (exp, 0);
30581 arg2 = CALL_EXPR_ARG (exp, 1);
30582 arg0 = CALL_EXPR_ARG (exp, 2);
30583 op0 = expand_normal (arg0);
30584 op1 = expand_normal (arg1);
30585 op2 = expand_normal (arg2);
30586 mode0 = insn_data[icode].operand[0].mode;
30587 mode1 = insn_data[icode].operand[1].mode;
30588 mode2 = insn_data[icode].operand[2].mode;
30590 if (GET_MODE (op0) != Pmode)
30591 op0 = convert_to_mode (Pmode, op0, 1);
30592 op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
30594 if (!insn_data[icode].operand[0].predicate (op0, mode0))
30595 op0 = copy_to_mode_reg (mode0, op0);
30596 if (!insn_data[icode].operand[1].predicate (op1, mode1))
30597 op1 = copy_to_mode_reg (mode1, op1);
30598 if (!insn_data[icode].operand[2].predicate (op2, mode2))
30599 op2 = copy_to_mode_reg (mode2, op2);
30600 pat = GEN_FCN (icode) (op0, op1, op2);
30606 case IX86_BUILTIN_LDMXCSR:
30607 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
30608 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
30609 emit_move_insn (target, op0);
30610 emit_insn (gen_sse_ldmxcsr (target));
30613 case IX86_BUILTIN_STMXCSR:
30614 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
30615 emit_insn (gen_sse_stmxcsr (target));
30616 return copy_to_mode_reg (SImode, target);
30618 case IX86_BUILTIN_CLFLUSH:
30619 arg0 = CALL_EXPR_ARG (exp, 0);
30620 op0 = expand_normal (arg0);
30621 icode = CODE_FOR_sse2_clflush;
30622 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
30624 if (GET_MODE (op0) != Pmode)
30625 op0 = convert_to_mode (Pmode, op0, 1);
30626 op0 = force_reg (Pmode, op0);
30629 emit_insn (gen_sse2_clflush (op0));
30632 case IX86_BUILTIN_MONITOR:
30633 arg0 = CALL_EXPR_ARG (exp, 0);
30634 arg1 = CALL_EXPR_ARG (exp, 1);
30635 arg2 = CALL_EXPR_ARG (exp, 2);
30636 op0 = expand_normal (arg0);
30637 op1 = expand_normal (arg1);
30638 op2 = expand_normal (arg2);
30641 if (GET_MODE (op0) != Pmode)
30642 op0 = convert_to_mode (Pmode, op0, 1);
30643 op0 = force_reg (Pmode, op0);
30646 op1 = copy_to_mode_reg (SImode, op1);
30648 op2 = copy_to_mode_reg (SImode, op2);
30649 emit_insn (ix86_gen_monitor (op0, op1, op2));
30652 case IX86_BUILTIN_MWAIT:
30653 arg0 = CALL_EXPR_ARG (exp, 0);
30654 arg1 = CALL_EXPR_ARG (exp, 1);
30655 op0 = expand_normal (arg0);
30656 op1 = expand_normal (arg1);
30658 op0 = copy_to_mode_reg (SImode, op0);
30660 op1 = copy_to_mode_reg (SImode, op1);
30661 emit_insn (gen_sse3_mwait (op0, op1));
30664 case IX86_BUILTIN_VEC_INIT_V2SI:
30665 case IX86_BUILTIN_VEC_INIT_V4HI:
30666 case IX86_BUILTIN_VEC_INIT_V8QI:
30667 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
30669 case IX86_BUILTIN_VEC_EXT_V2DF:
30670 case IX86_BUILTIN_VEC_EXT_V2DI:
30671 case IX86_BUILTIN_VEC_EXT_V4SF:
30672 case IX86_BUILTIN_VEC_EXT_V4SI:
30673 case IX86_BUILTIN_VEC_EXT_V8HI:
30674 case IX86_BUILTIN_VEC_EXT_V2SI:
30675 case IX86_BUILTIN_VEC_EXT_V4HI:
30676 case IX86_BUILTIN_VEC_EXT_V16QI:
30677 return ix86_expand_vec_ext_builtin (exp, target);
30679 case IX86_BUILTIN_VEC_SET_V2DI:
30680 case IX86_BUILTIN_VEC_SET_V4SF:
30681 case IX86_BUILTIN_VEC_SET_V4SI:
30682 case IX86_BUILTIN_VEC_SET_V8HI:
30683 case IX86_BUILTIN_VEC_SET_V4HI:
30684 case IX86_BUILTIN_VEC_SET_V16QI:
30685 return ix86_expand_vec_set_builtin (exp);
30687 case IX86_BUILTIN_INFQ:
30688 case IX86_BUILTIN_HUGE_VALQ:
30690 REAL_VALUE_TYPE inf;
30694 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
30696 tmp = validize_mem (force_const_mem (mode, tmp));
30699 target = gen_reg_rtx (mode);
30701 emit_move_insn (target, tmp);
30705 case IX86_BUILTIN_RDPMC:
30706 case IX86_BUILTIN_RDTSC:
30707 case IX86_BUILTIN_RDTSCP:
30709 op0 = gen_reg_rtx (DImode);
30710 op1 = gen_reg_rtx (DImode);
30712 if (fcode == IX86_BUILTIN_RDPMC)
30714 arg0 = CALL_EXPR_ARG (exp, 0);
30715 op2 = expand_normal (arg0);
30716 if (!register_operand (op2, SImode))
30717 op2 = copy_to_mode_reg (SImode, op2);
30719 insn = (TARGET_64BIT
30720 ? gen_rdpmc_rex64 (op0, op1, op2)
30721 : gen_rdpmc (op0, op2));
30724 else if (fcode == IX86_BUILTIN_RDTSC)
30726 insn = (TARGET_64BIT
30727 ? gen_rdtsc_rex64 (op0, op1)
30728 : gen_rdtsc (op0));
30733 op2 = gen_reg_rtx (SImode);
30735 insn = (TARGET_64BIT
30736 ? gen_rdtscp_rex64 (op0, op1, op2)
30737 : gen_rdtscp (op0, op2));
30740 arg0 = CALL_EXPR_ARG (exp, 0);
30741 op4 = expand_normal (arg0);
30742 if (!address_operand (op4, VOIDmode))
30744 op4 = convert_memory_address (Pmode, op4);
30745 op4 = copy_addr_to_reg (op4);
30747 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
30751 target = gen_reg_rtx (mode);
30755 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
30756 op1, 1, OPTAB_DIRECT);
30757 op0 = expand_simple_binop (DImode, IOR, op0, op1,
30758 op0, 1, OPTAB_DIRECT);
30761 emit_move_insn (target, op0);
30764 case IX86_BUILTIN_FXSAVE:
30765 case IX86_BUILTIN_FXRSTOR:
30766 case IX86_BUILTIN_FXSAVE64:
30767 case IX86_BUILTIN_FXRSTOR64:
30770 case IX86_BUILTIN_FXSAVE:
30771 icode = CODE_FOR_fxsave;
30773 case IX86_BUILTIN_FXRSTOR:
30774 icode = CODE_FOR_fxrstor;
30776 case IX86_BUILTIN_FXSAVE64:
30777 icode = CODE_FOR_fxsave64;
30779 case IX86_BUILTIN_FXRSTOR64:
30780 icode = CODE_FOR_fxrstor64;
30783 gcc_unreachable ();
30786 arg0 = CALL_EXPR_ARG (exp, 0);
30787 op0 = expand_normal (arg0);
30789 if (!address_operand (op0, VOIDmode))
30791 op0 = convert_memory_address (Pmode, op0);
30792 op0 = copy_addr_to_reg (op0);
30794 op0 = gen_rtx_MEM (BLKmode, op0);
30796 pat = GEN_FCN (icode) (op0);
30801 case IX86_BUILTIN_XSAVE:
30802 case IX86_BUILTIN_XRSTOR:
30803 case IX86_BUILTIN_XSAVE64:
30804 case IX86_BUILTIN_XRSTOR64:
30805 case IX86_BUILTIN_XSAVEOPT:
30806 case IX86_BUILTIN_XSAVEOPT64:
30807 arg0 = CALL_EXPR_ARG (exp, 0);
30808 arg1 = CALL_EXPR_ARG (exp, 1);
30809 op0 = expand_normal (arg0);
30810 op1 = expand_normal (arg1);
30812 if (!address_operand (op0, VOIDmode))
30814 op0 = convert_memory_address (Pmode, op0);
30815 op0 = copy_addr_to_reg (op0);
30817 op0 = gen_rtx_MEM (BLKmode, op0);
30819 op1 = force_reg (DImode, op1);
30823 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
30824 NULL, 1, OPTAB_DIRECT);
30827 case IX86_BUILTIN_XSAVE:
30828 icode = CODE_FOR_xsave_rex64;
30830 case IX86_BUILTIN_XRSTOR:
30831 icode = CODE_FOR_xrstor_rex64;
30833 case IX86_BUILTIN_XSAVE64:
30834 icode = CODE_FOR_xsave64;
30836 case IX86_BUILTIN_XRSTOR64:
30837 icode = CODE_FOR_xrstor64;
30839 case IX86_BUILTIN_XSAVEOPT:
30840 icode = CODE_FOR_xsaveopt_rex64;
30842 case IX86_BUILTIN_XSAVEOPT64:
30843 icode = CODE_FOR_xsaveopt64;
30846 gcc_unreachable ();
30849 op2 = gen_lowpart (SImode, op2);
30850 op1 = gen_lowpart (SImode, op1);
30851 pat = GEN_FCN (icode) (op0, op1, op2);
30857 case IX86_BUILTIN_XSAVE:
30858 icode = CODE_FOR_xsave;
30860 case IX86_BUILTIN_XRSTOR:
30861 icode = CODE_FOR_xrstor;
30863 case IX86_BUILTIN_XSAVEOPT:
30864 icode = CODE_FOR_xsaveopt;
30867 gcc_unreachable ();
30869 pat = GEN_FCN (icode) (op0, op1);
30876 case IX86_BUILTIN_LLWPCB:
30877 arg0 = CALL_EXPR_ARG (exp, 0);
30878 op0 = expand_normal (arg0);
30879 icode = CODE_FOR_lwp_llwpcb;
30880 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
30882 if (GET_MODE (op0) != Pmode)
30883 op0 = convert_to_mode (Pmode, op0, 1);
30884 op0 = force_reg (Pmode, op0);
30886 emit_insn (gen_lwp_llwpcb (op0));
30889 case IX86_BUILTIN_SLWPCB:
30890 icode = CODE_FOR_lwp_slwpcb;
30892 || !insn_data[icode].operand[0].predicate (target, Pmode))
30893 target = gen_reg_rtx (Pmode);
30894 emit_insn (gen_lwp_slwpcb (target));
30897 case IX86_BUILTIN_BEXTRI32:
30898 case IX86_BUILTIN_BEXTRI64:
30899 arg0 = CALL_EXPR_ARG (exp, 0);
30900 arg1 = CALL_EXPR_ARG (exp, 1);
30901 op0 = expand_normal (arg0);
30902 op1 = expand_normal (arg1);
30903 icode = (fcode == IX86_BUILTIN_BEXTRI32
30904 ? CODE_FOR_tbm_bextri_si
30905 : CODE_FOR_tbm_bextri_di);
30906 if (!CONST_INT_P (op1))
30908 error ("last argument must be an immediate");
30913 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
30914 unsigned char lsb_index = INTVAL (op1) & 0xFF;
30915 op1 = GEN_INT (length);
30916 op2 = GEN_INT (lsb_index);
30917 pat = GEN_FCN (icode) (target, op0, op1, op2);
30923 case IX86_BUILTIN_RDRAND16_STEP:
30924 icode = CODE_FOR_rdrandhi_1;
30928 case IX86_BUILTIN_RDRAND32_STEP:
30929 icode = CODE_FOR_rdrandsi_1;
30933 case IX86_BUILTIN_RDRAND64_STEP:
30934 icode = CODE_FOR_rdranddi_1;
30938 op0 = gen_reg_rtx (mode0);
30939 emit_insn (GEN_FCN (icode) (op0));
30941 arg0 = CALL_EXPR_ARG (exp, 0);
30942 op1 = expand_normal (arg0);
30943 if (!address_operand (op1, VOIDmode))
30945 op1 = convert_memory_address (Pmode, op1);
30946 op1 = copy_addr_to_reg (op1);
30948 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
30950 op1 = gen_reg_rtx (SImode);
30951 emit_move_insn (op1, CONST1_RTX (SImode));
30953 /* Emit SImode conditional move. */
30954 if (mode0 == HImode)
30956 op2 = gen_reg_rtx (SImode);
30957 emit_insn (gen_zero_extendhisi2 (op2, op0));
30959 else if (mode0 == SImode)
30962 op2 = gen_rtx_SUBREG (SImode, op0, 0);
30965 target = gen_reg_rtx (SImode);
30967 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
30969 emit_insn (gen_rtx_SET (VOIDmode, target,
30970 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
30973 case IX86_BUILTIN_RDSEED16_STEP:
30974 icode = CODE_FOR_rdseedhi_1;
30978 case IX86_BUILTIN_RDSEED32_STEP:
30979 icode = CODE_FOR_rdseedsi_1;
30983 case IX86_BUILTIN_RDSEED64_STEP:
30984 icode = CODE_FOR_rdseeddi_1;
30988 op0 = gen_reg_rtx (mode0);
30989 emit_insn (GEN_FCN (icode) (op0));
30991 arg0 = CALL_EXPR_ARG (exp, 0);
30992 op1 = expand_normal (arg0);
30993 if (!address_operand (op1, VOIDmode))
30995 op1 = convert_memory_address (Pmode, op1);
30996 op1 = copy_addr_to_reg (op1);
30998 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
31000 op2 = gen_reg_rtx (QImode);
31002 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
31004 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
31007 target = gen_reg_rtx (SImode);
31009 emit_insn (gen_zero_extendqisi2 (target, op2));
31012 case IX86_BUILTIN_ADDCARRYX32:
31013 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
31017 case IX86_BUILTIN_ADDCARRYX64:
31018 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
31022 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
31023 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
31024 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
31025 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
31027 op0 = gen_reg_rtx (QImode);
31029 /* Generate CF from input operand. */
31030 op1 = expand_normal (arg0);
31031 if (GET_MODE (op1) != QImode)
31032 op1 = convert_to_mode (QImode, op1, 1);
31033 op1 = copy_to_mode_reg (QImode, op1);
31034 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
31036 /* Gen ADCX instruction to compute X+Y+CF. */
31037 op2 = expand_normal (arg1);
31038 op3 = expand_normal (arg2);
31041 op2 = copy_to_mode_reg (mode0, op2);
31043 op3 = copy_to_mode_reg (mode0, op3);
31045 op0 = gen_reg_rtx (mode0);
31047 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
31048 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
31049 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
31051 /* Store the result. */
31052 op4 = expand_normal (arg3);
31053 if (!address_operand (op4, VOIDmode))
31055 op4 = convert_memory_address (Pmode, op4);
31056 op4 = copy_addr_to_reg (op4);
31058 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
31060 /* Return current CF value. */
31062 target = gen_reg_rtx (QImode);
31064 PUT_MODE (pat, QImode);
31065 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
31068 case IX86_BUILTIN_GATHERSIV2DF:
31069 icode = CODE_FOR_avx2_gathersiv2df;
31071 case IX86_BUILTIN_GATHERSIV4DF:
31072 icode = CODE_FOR_avx2_gathersiv4df;
31074 case IX86_BUILTIN_GATHERDIV2DF:
31075 icode = CODE_FOR_avx2_gatherdiv2df;
31077 case IX86_BUILTIN_GATHERDIV4DF:
31078 icode = CODE_FOR_avx2_gatherdiv4df;
31080 case IX86_BUILTIN_GATHERSIV4SF:
31081 icode = CODE_FOR_avx2_gathersiv4sf;
31083 case IX86_BUILTIN_GATHERSIV8SF:
31084 icode = CODE_FOR_avx2_gathersiv8sf;
31086 case IX86_BUILTIN_GATHERDIV4SF:
31087 icode = CODE_FOR_avx2_gatherdiv4sf;
31089 case IX86_BUILTIN_GATHERDIV8SF:
31090 icode = CODE_FOR_avx2_gatherdiv8sf;
31092 case IX86_BUILTIN_GATHERSIV2DI:
31093 icode = CODE_FOR_avx2_gathersiv2di;
31095 case IX86_BUILTIN_GATHERSIV4DI:
31096 icode = CODE_FOR_avx2_gathersiv4di;
31098 case IX86_BUILTIN_GATHERDIV2DI:
31099 icode = CODE_FOR_avx2_gatherdiv2di;
31101 case IX86_BUILTIN_GATHERDIV4DI:
31102 icode = CODE_FOR_avx2_gatherdiv4di;
31104 case IX86_BUILTIN_GATHERSIV4SI:
31105 icode = CODE_FOR_avx2_gathersiv4si;
31107 case IX86_BUILTIN_GATHERSIV8SI:
31108 icode = CODE_FOR_avx2_gathersiv8si;
31110 case IX86_BUILTIN_GATHERDIV4SI:
31111 icode = CODE_FOR_avx2_gatherdiv4si;
31113 case IX86_BUILTIN_GATHERDIV8SI:
31114 icode = CODE_FOR_avx2_gatherdiv8si;
31116 case IX86_BUILTIN_GATHERALTSIV4DF:
31117 icode = CODE_FOR_avx2_gathersiv4df;
31119 case IX86_BUILTIN_GATHERALTDIV8SF:
31120 icode = CODE_FOR_avx2_gatherdiv8sf;
31122 case IX86_BUILTIN_GATHERALTSIV4DI:
31123 icode = CODE_FOR_avx2_gathersiv4di;
31125 case IX86_BUILTIN_GATHERALTDIV8SI:
31126 icode = CODE_FOR_avx2_gatherdiv8si;
31130 arg0 = CALL_EXPR_ARG (exp, 0);
31131 arg1 = CALL_EXPR_ARG (exp, 1);
31132 arg2 = CALL_EXPR_ARG (exp, 2);
31133 arg3 = CALL_EXPR_ARG (exp, 3);
31134 arg4 = CALL_EXPR_ARG (exp, 4);
31135 op0 = expand_normal (arg0);
31136 op1 = expand_normal (arg1);
31137 op2 = expand_normal (arg2);
31138 op3 = expand_normal (arg3);
31139 op4 = expand_normal (arg4);
31140 /* Note the arg order is different from the operand order. */
31141 mode0 = insn_data[icode].operand[1].mode;
31142 mode2 = insn_data[icode].operand[3].mode;
31143 mode3 = insn_data[icode].operand[4].mode;
31144 mode4 = insn_data[icode].operand[5].mode;
31146 if (target == NULL_RTX
31147 || GET_MODE (target) != insn_data[icode].operand[0].mode)
31148 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
31150 subtarget = target;
31152 if (fcode == IX86_BUILTIN_GATHERALTSIV4DF
31153 || fcode == IX86_BUILTIN_GATHERALTSIV4DI)
31155 rtx half = gen_reg_rtx (V4SImode);
31156 if (!nonimmediate_operand (op2, V8SImode))
31157 op2 = copy_to_mode_reg (V8SImode, op2);
31158 emit_insn (gen_vec_extract_lo_v8si (half, op2));
31161 else if (fcode == IX86_BUILTIN_GATHERALTDIV8SF
31162 || fcode == IX86_BUILTIN_GATHERALTDIV8SI)
31164 rtx (*gen) (rtx, rtx);
31165 rtx half = gen_reg_rtx (mode0);
31166 if (mode0 == V4SFmode)
31167 gen = gen_vec_extract_lo_v8sf;
31169 gen = gen_vec_extract_lo_v8si;
31170 if (!nonimmediate_operand (op0, GET_MODE (op0)))
31171 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
31172 emit_insn (gen (half, op0));
31174 if (!nonimmediate_operand (op3, GET_MODE (op3)))
31175 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
31176 emit_insn (gen (half, op3));
31180 /* Force memory operand only with base register here. But we
31181 don't want to do it on memory operand for other builtin
31183 if (GET_MODE (op1) != Pmode)
31184 op1 = convert_to_mode (Pmode, op1, 1);
31185 op1 = force_reg (Pmode, op1);
31187 if (!insn_data[icode].operand[1].predicate (op0, mode0))
31188 op0 = copy_to_mode_reg (mode0, op0);
31189 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
31190 op1 = copy_to_mode_reg (Pmode, op1);
31191 if (!insn_data[icode].operand[3].predicate (op2, mode2))
31192 op2 = copy_to_mode_reg (mode2, op2);
31193 if (!insn_data[icode].operand[4].predicate (op3, mode3))
31194 op3 = copy_to_mode_reg (mode3, op3);
31195 if (!insn_data[icode].operand[5].predicate (op4, mode4))
31197 error ("last argument must be scale 1, 2, 4, 8");
31201 /* Optimize. If mask is known to have all high bits set,
31202 replace op0 with pc_rtx to signal that the instruction
31203 overwrites the whole destination and doesn't use its
31204 previous contents. */
31207 if (TREE_CODE (arg3) == VECTOR_CST)
31209 unsigned int negative = 0;
31210 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
31212 tree cst = VECTOR_CST_ELT (arg3, i);
31213 if (TREE_CODE (cst) == INTEGER_CST
31214 && tree_int_cst_sign_bit (cst))
31216 else if (TREE_CODE (cst) == REAL_CST
31217 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
31220 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
31223 else if (TREE_CODE (arg3) == SSA_NAME)
31225 /* Recognize also when mask is like:
31226 __v2df src = _mm_setzero_pd ();
31227 __v2df mask = _mm_cmpeq_pd (src, src);
31229 __v8sf src = _mm256_setzero_ps ();
31230 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
31231 as that is a cheaper way to load all ones into
31232 a register than having to load a constant from
31234 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
31235 if (is_gimple_call (def_stmt))
31237 tree fndecl = gimple_call_fndecl (def_stmt);
31239 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
31240 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
31242 case IX86_BUILTIN_CMPPD:
31243 case IX86_BUILTIN_CMPPS:
31244 case IX86_BUILTIN_CMPPD256:
31245 case IX86_BUILTIN_CMPPS256:
31246 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
31249 case IX86_BUILTIN_CMPEQPD:
31250 case IX86_BUILTIN_CMPEQPS:
31251 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
31252 && initializer_zerop (gimple_call_arg (def_stmt,
31263 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
31268 if (fcode == IX86_BUILTIN_GATHERDIV8SF
31269 || fcode == IX86_BUILTIN_GATHERDIV8SI)
31271 enum machine_mode tmode = GET_MODE (subtarget) == V8SFmode
31272 ? V4SFmode : V4SImode;
31273 if (target == NULL_RTX)
31274 target = gen_reg_rtx (tmode);
31275 if (tmode == V4SFmode)
31276 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
31278 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
31281 target = subtarget;
31285 case IX86_BUILTIN_XABORT:
31286 icode = CODE_FOR_xabort;
31287 arg0 = CALL_EXPR_ARG (exp, 0);
31288 op0 = expand_normal (arg0);
31289 mode0 = insn_data[icode].operand[0].mode;
31290 if (!insn_data[icode].operand[0].predicate (op0, mode0))
31292 error ("the xabort's argument must be an 8-bit immediate");
31295 emit_insn (gen_xabort (op0));
31302 for (i = 0, d = bdesc_special_args;
31303 i < ARRAY_SIZE (bdesc_special_args);
31305 if (d->code == fcode)
31306 return ix86_expand_special_args_builtin (d, exp, target);
31308 for (i = 0, d = bdesc_args;
31309 i < ARRAY_SIZE (bdesc_args);
31311 if (d->code == fcode)
31314 case IX86_BUILTIN_FABSQ:
31315 case IX86_BUILTIN_COPYSIGNQ:
31317 /* Emit a normal call if SSE isn't available. */
31318 return expand_call (exp, target, ignore);
31320 return ix86_expand_args_builtin (d, exp, target);
31323 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
31324 if (d->code == fcode)
31325 return ix86_expand_sse_comi (d, exp, target);
31327 for (i = 0, d = bdesc_pcmpestr;
31328 i < ARRAY_SIZE (bdesc_pcmpestr);
31330 if (d->code == fcode)
31331 return ix86_expand_sse_pcmpestr (d, exp, target);
31333 for (i = 0, d = bdesc_pcmpistr;
31334 i < ARRAY_SIZE (bdesc_pcmpistr);
31336 if (d->code == fcode)
31337 return ix86_expand_sse_pcmpistr (d, exp, target);
31339 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31340 if (d->code == fcode)
31341 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
31342 (enum ix86_builtin_func_type)
31343 d->flag, d->comparison);
31345 gcc_unreachable ();
31348 /* Returns a function decl for a vectorized version of the builtin function
31349 with builtin function code FN and the result vector type TYPE, or NULL_TREE
31350 if it is not available. */
31353 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
31356 enum machine_mode in_mode, out_mode;
31358 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
31360 if (TREE_CODE (type_out) != VECTOR_TYPE
31361 || TREE_CODE (type_in) != VECTOR_TYPE
31362 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
31365 out_mode = TYPE_MODE (TREE_TYPE (type_out));
31366 out_n = TYPE_VECTOR_SUBPARTS (type_out);
31367 in_mode = TYPE_MODE (TREE_TYPE (type_in));
31368 in_n = TYPE_VECTOR_SUBPARTS (type_in);
31372 case BUILT_IN_SQRT:
31373 if (out_mode == DFmode && in_mode == DFmode)
31375 if (out_n == 2 && in_n == 2)
31376 return ix86_builtins[IX86_BUILTIN_SQRTPD];
31377 else if (out_n == 4 && in_n == 4)
31378 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
31382 case BUILT_IN_SQRTF:
31383 if (out_mode == SFmode && in_mode == SFmode)
31385 if (out_n == 4 && in_n == 4)
31386 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
31387 else if (out_n == 8 && in_n == 8)
31388 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
31392 case BUILT_IN_IFLOOR:
31393 case BUILT_IN_LFLOOR:
31394 case BUILT_IN_LLFLOOR:
31395 /* The round insn does not trap on denormals. */
31396 if (flag_trapping_math || !TARGET_ROUND)
31399 if (out_mode == SImode && in_mode == DFmode)
31401 if (out_n == 4 && in_n == 2)
31402 return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX];
31403 else if (out_n == 8 && in_n == 4)
31404 return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256];
31408 case BUILT_IN_IFLOORF:
31409 case BUILT_IN_LFLOORF:
31410 case BUILT_IN_LLFLOORF:
31411 /* The round insn does not trap on denormals. */
31412 if (flag_trapping_math || !TARGET_ROUND)
31415 if (out_mode == SImode && in_mode == SFmode)
31417 if (out_n == 4 && in_n == 4)
31418 return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX];
31419 else if (out_n == 8 && in_n == 8)
31420 return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX256];
31424 case BUILT_IN_ICEIL:
31425 case BUILT_IN_LCEIL:
31426 case BUILT_IN_LLCEIL:
31427 /* The round insn does not trap on denormals. */
31428 if (flag_trapping_math || !TARGET_ROUND)
31431 if (out_mode == SImode && in_mode == DFmode)
31433 if (out_n == 4 && in_n == 2)
31434 return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX];
31435 else if (out_n == 8 && in_n == 4)
31436 return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256];
31440 case BUILT_IN_ICEILF:
31441 case BUILT_IN_LCEILF:
31442 case BUILT_IN_LLCEILF:
31443 /* The round insn does not trap on denormals. */
31444 if (flag_trapping_math || !TARGET_ROUND)
31447 if (out_mode == SImode && in_mode == SFmode)
31449 if (out_n == 4 && in_n == 4)
31450 return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX];
31451 else if (out_n == 8 && in_n == 8)
31452 return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX256];
31456 case BUILT_IN_IRINT:
31457 case BUILT_IN_LRINT:
31458 case BUILT_IN_LLRINT:
31459 if (out_mode == SImode && in_mode == DFmode)
31461 if (out_n == 4 && in_n == 2)
31462 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
31463 else if (out_n == 8 && in_n == 4)
31464 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX256];
31468 case BUILT_IN_IRINTF:
31469 case BUILT_IN_LRINTF:
31470 case BUILT_IN_LLRINTF:
31471 if (out_mode == SImode && in_mode == SFmode)
31473 if (out_n == 4 && in_n == 4)
31474 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
31475 else if (out_n == 8 && in_n == 8)
31476 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
31480 case BUILT_IN_IROUND:
31481 case BUILT_IN_LROUND:
31482 case BUILT_IN_LLROUND:
31483 /* The round insn does not trap on denormals. */
31484 if (flag_trapping_math || !TARGET_ROUND)
31487 if (out_mode == SImode && in_mode == DFmode)
31489 if (out_n == 4 && in_n == 2)
31490 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX];
31491 else if (out_n == 8 && in_n == 4)
31492 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256];
31496 case BUILT_IN_IROUNDF:
31497 case BUILT_IN_LROUNDF:
31498 case BUILT_IN_LLROUNDF:
31499 /* The round insn does not trap on denormals. */
31500 if (flag_trapping_math || !TARGET_ROUND)
31503 if (out_mode == SImode && in_mode == SFmode)
31505 if (out_n == 4 && in_n == 4)
31506 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX];
31507 else if (out_n == 8 && in_n == 8)
31508 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX256];
31512 case BUILT_IN_COPYSIGN:
31513 if (out_mode == DFmode && in_mode == DFmode)
31515 if (out_n == 2 && in_n == 2)
31516 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
31517 else if (out_n == 4 && in_n == 4)
31518 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
31522 case BUILT_IN_COPYSIGNF:
31523 if (out_mode == SFmode && in_mode == SFmode)
31525 if (out_n == 4 && in_n == 4)
31526 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
31527 else if (out_n == 8 && in_n == 8)
31528 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
31532 case BUILT_IN_FLOOR:
31533 /* The round insn does not trap on denormals. */
31534 if (flag_trapping_math || !TARGET_ROUND)
31537 if (out_mode == DFmode && in_mode == DFmode)
31539 if (out_n == 2 && in_n == 2)
31540 return ix86_builtins[IX86_BUILTIN_FLOORPD];
31541 else if (out_n == 4 && in_n == 4)
31542 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
31546 case BUILT_IN_FLOORF:
31547 /* The round insn does not trap on denormals. */
31548 if (flag_trapping_math || !TARGET_ROUND)
31551 if (out_mode == SFmode && in_mode == SFmode)
31553 if (out_n == 4 && in_n == 4)
31554 return ix86_builtins[IX86_BUILTIN_FLOORPS];
31555 else if (out_n == 8 && in_n == 8)
31556 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
31560 case BUILT_IN_CEIL:
31561 /* The round insn does not trap on denormals. */
31562 if (flag_trapping_math || !TARGET_ROUND)
31565 if (out_mode == DFmode && in_mode == DFmode)
31567 if (out_n == 2 && in_n == 2)
31568 return ix86_builtins[IX86_BUILTIN_CEILPD];
31569 else if (out_n == 4 && in_n == 4)
31570 return ix86_builtins[IX86_BUILTIN_CEILPD256];
31574 case BUILT_IN_CEILF:
31575 /* The round insn does not trap on denormals. */
31576 if (flag_trapping_math || !TARGET_ROUND)
31579 if (out_mode == SFmode && in_mode == SFmode)
31581 if (out_n == 4 && in_n == 4)
31582 return ix86_builtins[IX86_BUILTIN_CEILPS];
31583 else if (out_n == 8 && in_n == 8)
31584 return ix86_builtins[IX86_BUILTIN_CEILPS256];
31588 case BUILT_IN_TRUNC:
31589 /* The round insn does not trap on denormals. */
31590 if (flag_trapping_math || !TARGET_ROUND)
31593 if (out_mode == DFmode && in_mode == DFmode)
31595 if (out_n == 2 && in_n == 2)
31596 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
31597 else if (out_n == 4 && in_n == 4)
31598 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
31602 case BUILT_IN_TRUNCF:
31603 /* The round insn does not trap on denormals. */
31604 if (flag_trapping_math || !TARGET_ROUND)
31607 if (out_mode == SFmode && in_mode == SFmode)
31609 if (out_n == 4 && in_n == 4)
31610 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
31611 else if (out_n == 8 && in_n == 8)
31612 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
31616 case BUILT_IN_RINT:
31617 /* The round insn does not trap on denormals. */
31618 if (flag_trapping_math || !TARGET_ROUND)
31621 if (out_mode == DFmode && in_mode == DFmode)
31623 if (out_n == 2 && in_n == 2)
31624 return ix86_builtins[IX86_BUILTIN_RINTPD];
31625 else if (out_n == 4 && in_n == 4)
31626 return ix86_builtins[IX86_BUILTIN_RINTPD256];
31630 case BUILT_IN_RINTF:
31631 /* The round insn does not trap on denormals. */
31632 if (flag_trapping_math || !TARGET_ROUND)
31635 if (out_mode == SFmode && in_mode == SFmode)
31637 if (out_n == 4 && in_n == 4)
31638 return ix86_builtins[IX86_BUILTIN_RINTPS];
31639 else if (out_n == 8 && in_n == 8)
31640 return ix86_builtins[IX86_BUILTIN_RINTPS256];
31644 case BUILT_IN_ROUND:
31645 /* The round insn does not trap on denormals. */
31646 if (flag_trapping_math || !TARGET_ROUND)
31649 if (out_mode == DFmode && in_mode == DFmode)
31651 if (out_n == 2 && in_n == 2)
31652 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ];
31653 else if (out_n == 4 && in_n == 4)
31654 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ256];
31658 case BUILT_IN_ROUNDF:
31659 /* The round insn does not trap on denormals. */
31660 if (flag_trapping_math || !TARGET_ROUND)
31663 if (out_mode == SFmode && in_mode == SFmode)
31665 if (out_n == 4 && in_n == 4)
31666 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ];
31667 else if (out_n == 8 && in_n == 8)
31668 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ256];
31673 if (out_mode == DFmode && in_mode == DFmode)
31675 if (out_n == 2 && in_n == 2)
31676 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
31677 if (out_n == 4 && in_n == 4)
31678 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
31682 case BUILT_IN_FMAF:
31683 if (out_mode == SFmode && in_mode == SFmode)
31685 if (out_n == 4 && in_n == 4)
31686 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
31687 if (out_n == 8 && in_n == 8)
31688 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
31696 /* Dispatch to a handler for a vectorization library. */
31697 if (ix86_veclib_handler)
31698 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
31704 /* Handler for an SVML-style interface to
31705 a library with vectorized intrinsics. */
31708 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
31711 tree fntype, new_fndecl, args;
31714 enum machine_mode el_mode, in_mode;
31717 /* The SVML is suitable for unsafe math only. */
31718 if (!flag_unsafe_math_optimizations)
31721 el_mode = TYPE_MODE (TREE_TYPE (type_out));
31722 n = TYPE_VECTOR_SUBPARTS (type_out);
31723 in_mode = TYPE_MODE (TREE_TYPE (type_in));
31724 in_n = TYPE_VECTOR_SUBPARTS (type_in);
31725 if (el_mode != in_mode
31733 case BUILT_IN_LOG10:
31735 case BUILT_IN_TANH:
31737 case BUILT_IN_ATAN:
31738 case BUILT_IN_ATAN2:
31739 case BUILT_IN_ATANH:
31740 case BUILT_IN_CBRT:
31741 case BUILT_IN_SINH:
31743 case BUILT_IN_ASINH:
31744 case BUILT_IN_ASIN:
31745 case BUILT_IN_COSH:
31747 case BUILT_IN_ACOSH:
31748 case BUILT_IN_ACOS:
31749 if (el_mode != DFmode || n != 2)
31753 case BUILT_IN_EXPF:
31754 case BUILT_IN_LOGF:
31755 case BUILT_IN_LOG10F:
31756 case BUILT_IN_POWF:
31757 case BUILT_IN_TANHF:
31758 case BUILT_IN_TANF:
31759 case BUILT_IN_ATANF:
31760 case BUILT_IN_ATAN2F:
31761 case BUILT_IN_ATANHF:
31762 case BUILT_IN_CBRTF:
31763 case BUILT_IN_SINHF:
31764 case BUILT_IN_SINF:
31765 case BUILT_IN_ASINHF:
31766 case BUILT_IN_ASINF:
31767 case BUILT_IN_COSHF:
31768 case BUILT_IN_COSF:
31769 case BUILT_IN_ACOSHF:
31770 case BUILT_IN_ACOSF:
31771 if (el_mode != SFmode || n != 4)
31779 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
31781 if (fn == BUILT_IN_LOGF)
31782 strcpy (name, "vmlsLn4");
31783 else if (fn == BUILT_IN_LOG)
31784 strcpy (name, "vmldLn2");
31787 sprintf (name, "vmls%s", bname+10);
31788 name[strlen (name)-1] = '4';
31791 sprintf (name, "vmld%s2", bname+10);
31793 /* Convert to uppercase. */
31797 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
31799 args = TREE_CHAIN (args))
31803 fntype = build_function_type_list (type_out, type_in, NULL);
31805 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
31807 /* Build a function declaration for the vectorized function. */
31808 new_fndecl = build_decl (BUILTINS_LOCATION,
31809 FUNCTION_DECL, get_identifier (name), fntype);
31810 TREE_PUBLIC (new_fndecl) = 1;
31811 DECL_EXTERNAL (new_fndecl) = 1;
31812 DECL_IS_NOVOPS (new_fndecl) = 1;
31813 TREE_READONLY (new_fndecl) = 1;
31818 /* Handler for an ACML-style interface to
31819 a library with vectorized intrinsics. */
31822 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
31824 char name[20] = "__vr.._";
31825 tree fntype, new_fndecl, args;
31828 enum machine_mode el_mode, in_mode;
31831 /* The ACML is 64bits only and suitable for unsafe math only as
31832 it does not correctly support parts of IEEE with the required
31833 precision such as denormals. */
31835 || !flag_unsafe_math_optimizations)
31838 el_mode = TYPE_MODE (TREE_TYPE (type_out));
31839 n = TYPE_VECTOR_SUBPARTS (type_out);
31840 in_mode = TYPE_MODE (TREE_TYPE (type_in));
31841 in_n = TYPE_VECTOR_SUBPARTS (type_in);
31842 if (el_mode != in_mode
31852 case BUILT_IN_LOG2:
31853 case BUILT_IN_LOG10:
31856 if (el_mode != DFmode
31861 case BUILT_IN_SINF:
31862 case BUILT_IN_COSF:
31863 case BUILT_IN_EXPF:
31864 case BUILT_IN_POWF:
31865 case BUILT_IN_LOGF:
31866 case BUILT_IN_LOG2F:
31867 case BUILT_IN_LOG10F:
31870 if (el_mode != SFmode
31879 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
31880 sprintf (name + 7, "%s", bname+10);
31883 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
31885 args = TREE_CHAIN (args))
31889 fntype = build_function_type_list (type_out, type_in, NULL);
31891 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
31893 /* Build a function declaration for the vectorized function. */
31894 new_fndecl = build_decl (BUILTINS_LOCATION,
31895 FUNCTION_DECL, get_identifier (name), fntype);
31896 TREE_PUBLIC (new_fndecl) = 1;
31897 DECL_EXTERNAL (new_fndecl) = 1;
31898 DECL_IS_NOVOPS (new_fndecl) = 1;
31899 TREE_READONLY (new_fndecl) = 1;
31904 /* Returns a decl of a function that implements gather load with
31905 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
31906 Return NULL_TREE if it is not available. */
31909 ix86_vectorize_builtin_gather (const_tree mem_vectype,
31910 const_tree index_type, int scale)
31913 enum ix86_builtins code;
31918 if ((TREE_CODE (index_type) != INTEGER_TYPE
31919 && !POINTER_TYPE_P (index_type))
31920 || (TYPE_MODE (index_type) != SImode
31921 && TYPE_MODE (index_type) != DImode))
31924 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
31927 /* v*gather* insn sign extends index to pointer mode. */
31928 if (TYPE_PRECISION (index_type) < POINTER_SIZE
31929 && TYPE_UNSIGNED (index_type))
31934 || (scale & (scale - 1)) != 0)
31937 si = TYPE_MODE (index_type) == SImode;
31938 switch (TYPE_MODE (mem_vectype))
31941 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
31944 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
31947 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
31950 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
31953 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
31956 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
31959 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
31962 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
31968 return ix86_builtins[code];
31971 /* Returns a code for a target-specific builtin that implements
31972 reciprocal of the function, or NULL_TREE if not available. */
31975 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
31976 bool sqrt ATTRIBUTE_UNUSED)
31978 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
31979 && flag_finite_math_only && !flag_trapping_math
31980 && flag_unsafe_math_optimizations))
31984 /* Machine dependent builtins. */
31987 /* Vectorized version of sqrt to rsqrt conversion. */
31988 case IX86_BUILTIN_SQRTPS_NR:
31989 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
31991 case IX86_BUILTIN_SQRTPS_NR256:
31992 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
31998 /* Normal builtins. */
32001 /* Sqrt to rsqrt conversion. */
32002 case BUILT_IN_SQRTF:
32003 return ix86_builtins[IX86_BUILTIN_RSQRTF];
32010 /* Helper for avx_vpermilps256_operand et al. This is also used by
32011 the expansion functions to turn the parallel back into a mask.
32012 The return value is 0 for no match and the imm8+1 for a match. */
32015 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
32017 unsigned i, nelt = GET_MODE_NUNITS (mode);
32019 unsigned char ipar[8];
32021 if (XVECLEN (par, 0) != (int) nelt)
32024 /* Validate that all of the elements are constants, and not totally
32025 out of range. Copy the data into an integral array to make the
32026 subsequent checks easier. */
32027 for (i = 0; i < nelt; ++i)
32029 rtx er = XVECEXP (par, 0, i);
32030 unsigned HOST_WIDE_INT ei;
32032 if (!CONST_INT_P (er))
32043 /* In the 256-bit DFmode case, we can only move elements within
32045 for (i = 0; i < 2; ++i)
32049 mask |= ipar[i] << i;
32051 for (i = 2; i < 4; ++i)
32055 mask |= (ipar[i] - 2) << i;
32060 /* In the 256-bit SFmode case, we have full freedom of movement
32061 within the low 128-bit lane, but the high 128-bit lane must
32062 mirror the exact same pattern. */
32063 for (i = 0; i < 4; ++i)
32064 if (ipar[i] + 4 != ipar[i + 4])
32071 /* In the 128-bit case, we've full freedom in the placement of
32072 the elements from the source operand. */
32073 for (i = 0; i < nelt; ++i)
32074 mask |= ipar[i] << (i * (nelt / 2));
32078 gcc_unreachable ();
32081 /* Make sure success has a non-zero value by adding one. */
32085 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
32086 the expansion functions to turn the parallel back into a mask.
32087 The return value is 0 for no match and the imm8+1 for a match. */
32090 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
32092 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
32094 unsigned char ipar[8];
32096 if (XVECLEN (par, 0) != (int) nelt)
32099 /* Validate that all of the elements are constants, and not totally
32100 out of range. Copy the data into an integral array to make the
32101 subsequent checks easier. */
32102 for (i = 0; i < nelt; ++i)
32104 rtx er = XVECEXP (par, 0, i);
32105 unsigned HOST_WIDE_INT ei;
32107 if (!CONST_INT_P (er))
32110 if (ei >= 2 * nelt)
32115 /* Validate that the halves of the permute are halves. */
32116 for (i = 0; i < nelt2 - 1; ++i)
32117 if (ipar[i] + 1 != ipar[i + 1])
32119 for (i = nelt2; i < nelt - 1; ++i)
32120 if (ipar[i] + 1 != ipar[i + 1])
32123 /* Reconstruct the mask. */
32124 for (i = 0; i < 2; ++i)
32126 unsigned e = ipar[i * nelt2];
32130 mask |= e << (i * 4);
32133 /* Make sure success has a non-zero value by adding one. */
32137 /* Store OPERAND to the memory after reload is completed. This means
32138 that we can't easily use assign_stack_local. */
32140 ix86_force_to_memory (enum machine_mode mode, rtx operand)
32144 gcc_assert (reload_completed);
32145 if (ix86_using_red_zone ())
32147 result = gen_rtx_MEM (mode,
32148 gen_rtx_PLUS (Pmode,
32150 GEN_INT (-RED_ZONE_SIZE)));
32151 emit_move_insn (result, operand);
32153 else if (TARGET_64BIT)
32159 operand = gen_lowpart (DImode, operand);
32163 gen_rtx_SET (VOIDmode,
32164 gen_rtx_MEM (DImode,
32165 gen_rtx_PRE_DEC (DImode,
32166 stack_pointer_rtx)),
32170 gcc_unreachable ();
32172 result = gen_rtx_MEM (mode, stack_pointer_rtx);
32181 split_double_mode (mode, &operand, 1, operands, operands + 1);
32183 gen_rtx_SET (VOIDmode,
32184 gen_rtx_MEM (SImode,
32185 gen_rtx_PRE_DEC (Pmode,
32186 stack_pointer_rtx)),
32189 gen_rtx_SET (VOIDmode,
32190 gen_rtx_MEM (SImode,
32191 gen_rtx_PRE_DEC (Pmode,
32192 stack_pointer_rtx)),
32197 /* Store HImodes as SImodes. */
32198 operand = gen_lowpart (SImode, operand);
32202 gen_rtx_SET (VOIDmode,
32203 gen_rtx_MEM (GET_MODE (operand),
32204 gen_rtx_PRE_DEC (SImode,
32205 stack_pointer_rtx)),
32209 gcc_unreachable ();
32211 result = gen_rtx_MEM (mode, stack_pointer_rtx);
32216 /* Free operand from the memory. */
32218 ix86_free_from_memory (enum machine_mode mode)
32220 if (!ix86_using_red_zone ())
32224 if (mode == DImode || TARGET_64BIT)
32228 /* Use LEA to deallocate stack space. In peephole2 it will be converted
32229 to pop or add instruction if registers are available. */
32230 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
32231 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
32236 /* Return true if we use LRA instead of reload pass. */
32243 /* Return a register priority for hard reg REGNO. */
32245 ix86_register_priority (int hard_regno)
32247 /* ebp and r13 as the base always wants a displacement, r12 as the
32248 base always wants an index. So discourage their usage in an
32250 if (hard_regno == R12_REG || hard_regno == R13_REG)
32252 if (hard_regno == BP_REG)
32254 /* New x86-64 int registers result in bigger code size. Discourage
32256 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
32258 /* New x86-64 SSE registers result in bigger code size. Discourage
32260 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
32262 /* Usage of AX register results in smaller code. Prefer it. */
32263 if (hard_regno == 0)
32268 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
32270 Put float CONST_DOUBLE in the constant pool instead of fp regs.
32271 QImode must go into class Q_REGS.
32272 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
32273 movdf to do mem-to-mem moves through integer regs. */
32276 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
32278 enum machine_mode mode = GET_MODE (x);
32280 /* We're only allowed to return a subclass of CLASS. Many of the
32281 following checks fail for NO_REGS, so eliminate that early. */
32282 if (regclass == NO_REGS)
32285 /* All classes can load zeros. */
32286 if (x == CONST0_RTX (mode))
32289 /* Force constants into memory if we are loading a (nonzero) constant into
32290 an MMX or SSE register. This is because there are no MMX/SSE instructions
32291 to load from a constant. */
32293 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
32296 /* Prefer SSE regs only, if we can use them for math. */
32297 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
32298 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
32300 /* Floating-point constants need more complex checks. */
32301 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
32303 /* General regs can load everything. */
32304 if (reg_class_subset_p (regclass, GENERAL_REGS))
32307 /* Floats can load 0 and 1 plus some others. Note that we eliminated
32308 zero above. We only want to wind up preferring 80387 registers if
32309 we plan on doing computation with them. */
32311 && standard_80387_constant_p (x) > 0)
32313 /* Limit class to non-sse. */
32314 if (regclass == FLOAT_SSE_REGS)
32316 if (regclass == FP_TOP_SSE_REGS)
32318 if (regclass == FP_SECOND_SSE_REGS)
32319 return FP_SECOND_REG;
32320 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
32327 /* Generally when we see PLUS here, it's the function invariant
32328 (plus soft-fp const_int). Which can only be computed into general
32330 if (GET_CODE (x) == PLUS)
32331 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
32333 /* QImode constants are easy to load, but non-constant QImode data
32334 must go into Q_REGS. */
32335 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
32337 if (reg_class_subset_p (regclass, Q_REGS))
32339 if (reg_class_subset_p (Q_REGS, regclass))
32347 /* Discourage putting floating-point values in SSE registers unless
32348 SSE math is being used, and likewise for the 387 registers. */
32350 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
32352 enum machine_mode mode = GET_MODE (x);
32354 /* Restrict the output reload class to the register bank that we are doing
32355 math on. If we would like not to return a subset of CLASS, reject this
32356 alternative: if reload cannot do this, it will still use its choice. */
32357 mode = GET_MODE (x);
32358 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
32359 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
32361 if (X87_FLOAT_MODE_P (mode))
32363 if (regclass == FP_TOP_SSE_REGS)
32365 else if (regclass == FP_SECOND_SSE_REGS)
32366 return FP_SECOND_REG;
32368 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
32375 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
32376 enum machine_mode mode, secondary_reload_info *sri)
32378 /* Double-word spills from general registers to non-offsettable memory
32379 references (zero-extended addresses) require special handling. */
32382 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
32383 && rclass == GENERAL_REGS
32384 && !offsettable_memref_p (x))
32387 ? CODE_FOR_reload_noff_load
32388 : CODE_FOR_reload_noff_store);
32389 /* Add the cost of moving address to a temporary. */
32390 sri->extra_cost = 1;
32395 /* QImode spills from non-QI registers require
32396 intermediate register on 32bit targets. */
32398 && !in_p && mode == QImode
32399 && (rclass == GENERAL_REGS
32400 || rclass == LEGACY_REGS
32401 || rclass == NON_Q_REGS
32404 || rclass == INDEX_REGS))
32413 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
32414 regno = true_regnum (x);
32416 /* Return Q_REGS if the operand is in memory. */
32421 /* This condition handles corner case where an expression involving
32422 pointers gets vectorized. We're trying to use the address of a
32423 stack slot as a vector initializer.
32425 (set (reg:V2DI 74 [ vect_cst_.2 ])
32426 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
32428 Eventually frame gets turned into sp+offset like this:
32430 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
32431 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
32432 (const_int 392 [0x188]))))
32434 That later gets turned into:
32436 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
32437 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
32438 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
32440 We'll have the following reload recorded:
32442 Reload 0: reload_in (DI) =
32443 (plus:DI (reg/f:DI 7 sp)
32444 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
32445 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
32446 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
32447 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
32448 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
32449 reload_reg_rtx: (reg:V2DI 22 xmm1)
32451 Which isn't going to work since SSE instructions can't handle scalar
32452 additions. Returning GENERAL_REGS forces the addition into integer
32453 register and reload can handle subsequent reloads without problems. */
32455 if (in_p && GET_CODE (x) == PLUS
32456 && SSE_CLASS_P (rclass)
32457 && SCALAR_INT_MODE_P (mode))
32458 return GENERAL_REGS;
32463 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
32466 ix86_class_likely_spilled_p (reg_class_t rclass)
32477 case SSE_FIRST_REG:
32479 case FP_SECOND_REG:
32489 /* If we are copying between general and FP registers, we need a memory
32490 location. The same is true for SSE and MMX registers.
32492 To optimize register_move_cost performance, allow inline variant.
32494 The macro can't work reliably when one of the CLASSES is class containing
32495 registers from multiple units (SSE, MMX, integer). We avoid this by never
32496 combining those units in single alternative in the machine description.
32497 Ensure that this constraint holds to avoid unexpected surprises.
32499 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
32500 enforce these sanity checks. */
32503 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
32504 enum machine_mode mode, int strict)
32506 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
32507 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
32508 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
32509 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
32510 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
32511 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
32513 gcc_assert (!strict || lra_in_progress);
32517 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
32520 /* ??? This is a lie. We do have moves between mmx/general, and for
32521 mmx/sse2. But by saying we need secondary memory we discourage the
32522 register allocator from using the mmx registers unless needed. */
32523 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
32526 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
32528 /* SSE1 doesn't have any direct moves from other classes. */
32532 /* If the target says that inter-unit moves are more expensive
32533 than moving through memory, then don't generate them. */
32534 if (!TARGET_INTER_UNIT_MOVES)
32537 /* Between SSE and general, we have moves no larger than word size. */
32538 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
32546 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
32547 enum machine_mode mode, int strict)
32549 return inline_secondary_memory_needed (class1, class2, mode, strict);
32552 /* Implement the TARGET_CLASS_MAX_NREGS hook.
32554 On the 80386, this is the size of MODE in words,
32555 except in the FP regs, where a single reg is always enough. */
32557 static unsigned char
32558 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
32560 if (MAYBE_INTEGER_CLASS_P (rclass))
32562 if (mode == XFmode)
32563 return (TARGET_64BIT ? 2 : 3);
32564 else if (mode == XCmode)
32565 return (TARGET_64BIT ? 4 : 6);
32567 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
32571 if (COMPLEX_MODE_P (mode))
32578 /* Return true if the registers in CLASS cannot represent the change from
32579 modes FROM to TO. */
32582 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
32583 enum reg_class regclass)
32588 /* x87 registers can't do subreg at all, as all values are reformatted
32589 to extended precision. */
32590 if (MAYBE_FLOAT_CLASS_P (regclass))
32593 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
32595 /* Vector registers do not support QI or HImode loads. If we don't
32596 disallow a change to these modes, reload will assume it's ok to
32597 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
32598 the vec_dupv4hi pattern. */
32599 if (GET_MODE_SIZE (from) < 4)
32602 /* Vector registers do not support subreg with nonzero offsets, which
32603 are otherwise valid for integer registers. Since we can't see
32604 whether we have a nonzero offset from here, prohibit all
32605 nonparadoxical subregs changing size. */
32606 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
32613 /* Return the cost of moving data of mode M between a
32614 register and memory. A value of 2 is the default; this cost is
32615 relative to those in `REGISTER_MOVE_COST'.
32617 This function is used extensively by register_move_cost that is used to
32618 build tables at startup. Make it inline in this case.
32619 When IN is 2, return maximum of in and out move cost.
32621 If moving between registers and memory is more expensive than
32622 between two registers, you should define this macro to express the
32625 Model also increased moving costs of QImode registers in non
32629 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
32633 if (FLOAT_CLASS_P (regclass))
32651 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
32652 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
32654 if (SSE_CLASS_P (regclass))
32657 switch (GET_MODE_SIZE (mode))
32672 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
32673 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
32675 if (MMX_CLASS_P (regclass))
32678 switch (GET_MODE_SIZE (mode))
32690 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
32691 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
32693 switch (GET_MODE_SIZE (mode))
32696 if (Q_CLASS_P (regclass) || TARGET_64BIT)
32699 return ix86_cost->int_store[0];
32700 if (TARGET_PARTIAL_REG_DEPENDENCY
32701 && optimize_function_for_speed_p (cfun))
32702 cost = ix86_cost->movzbl_load;
32704 cost = ix86_cost->int_load[0];
32706 return MAX (cost, ix86_cost->int_store[0]);
32712 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
32714 return ix86_cost->movzbl_load;
32716 return ix86_cost->int_store[0] + 4;
32721 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
32722 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
32724 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
32725 if (mode == TFmode)
32728 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
32730 cost = ix86_cost->int_load[2];
32732 cost = ix86_cost->int_store[2];
32733 return (cost * (((int) GET_MODE_SIZE (mode)
32734 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
32739 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
32742 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
32746 /* Return the cost of moving data from a register in class CLASS1 to
32747 one in class CLASS2.
32749 It is not required that the cost always equal 2 when FROM is the same as TO;
32750 on some machines it is expensive to move between registers if they are not
32751 general registers. */
32754 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
32755 reg_class_t class2_i)
32757 enum reg_class class1 = (enum reg_class) class1_i;
32758 enum reg_class class2 = (enum reg_class) class2_i;
32760 /* In case we require secondary memory, compute cost of the store followed
32761 by load. In order to avoid bad register allocation choices, we need
32762 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
32764 if (inline_secondary_memory_needed (class1, class2, mode, 0))
32768 cost += inline_memory_move_cost (mode, class1, 2);
32769 cost += inline_memory_move_cost (mode, class2, 2);
32771 /* In case of copying from general_purpose_register we may emit multiple
32772 stores followed by single load causing memory size mismatch stall.
32773 Count this as arbitrarily high cost of 20. */
32774 if (targetm.class_max_nregs (class1, mode)
32775 > targetm.class_max_nregs (class2, mode))
32778 /* In the case of FP/MMX moves, the registers actually overlap, and we
32779 have to switch modes in order to treat them differently. */
32780 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
32781 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
32787 /* Moves between SSE/MMX and integer unit are expensive. */
32788 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
32789 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
32791 /* ??? By keeping returned value relatively high, we limit the number
32792 of moves between integer and MMX/SSE registers for all targets.
32793 Additionally, high value prevents problem with x86_modes_tieable_p(),
32794 where integer modes in MMX/SSE registers are not tieable
32795 because of missing QImode and HImode moves to, from or between
32796 MMX/SSE registers. */
32797 return MAX (8, ix86_cost->mmxsse_to_integer);
32799 if (MAYBE_FLOAT_CLASS_P (class1))
32800 return ix86_cost->fp_move;
32801 if (MAYBE_SSE_CLASS_P (class1))
32802 return ix86_cost->sse_move;
32803 if (MAYBE_MMX_CLASS_P (class1))
32804 return ix86_cost->mmx_move;
32808 /* Return TRUE if hard register REGNO can hold a value of machine-mode
32812 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
32814 /* Flags and only flags can only hold CCmode values. */
32815 if (CC_REGNO_P (regno))
32816 return GET_MODE_CLASS (mode) == MODE_CC;
32817 if (GET_MODE_CLASS (mode) == MODE_CC
32818 || GET_MODE_CLASS (mode) == MODE_RANDOM
32819 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
32821 if (STACK_REGNO_P (regno))
32822 return VALID_FP_MODE_P (mode);
32823 if (SSE_REGNO_P (regno))
32825 /* We implement the move patterns for all vector modes into and
32826 out of SSE registers, even when no operation instructions
32827 are available. OImode move is available only when AVX is
32829 return ((TARGET_AVX && mode == OImode)
32830 || VALID_AVX256_REG_MODE (mode)
32831 || VALID_SSE_REG_MODE (mode)
32832 || VALID_SSE2_REG_MODE (mode)
32833 || VALID_MMX_REG_MODE (mode)
32834 || VALID_MMX_REG_MODE_3DNOW (mode));
32836 if (MMX_REGNO_P (regno))
32838 /* We implement the move patterns for 3DNOW modes even in MMX mode,
32839 so if the register is available at all, then we can move data of
32840 the given mode into or out of it. */
32841 return (VALID_MMX_REG_MODE (mode)
32842 || VALID_MMX_REG_MODE_3DNOW (mode));
32845 if (mode == QImode)
32847 /* Take care for QImode values - they can be in non-QI regs,
32848 but then they do cause partial register stalls. */
32849 if (TARGET_64BIT || QI_REGNO_P (regno))
32851 if (!TARGET_PARTIAL_REG_STALL)
32853 return !can_create_pseudo_p ();
32855 /* We handle both integer and floats in the general purpose registers. */
32856 else if (VALID_INT_MODE_P (mode))
32858 else if (VALID_FP_MODE_P (mode))
32860 else if (VALID_DFP_MODE_P (mode))
32862 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
32863 on to use that value in smaller contexts, this can easily force a
32864 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
32865 supporting DImode, allow it. */
32866 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
32872 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
32873 tieable integer mode. */
32876 ix86_tieable_integer_mode_p (enum machine_mode mode)
32885 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
32888 return TARGET_64BIT;
32895 /* Return true if MODE1 is accessible in a register that can hold MODE2
32896 without copying. That is, all register classes that can hold MODE2
32897 can also hold MODE1. */
32900 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
32902 if (mode1 == mode2)
32905 if (ix86_tieable_integer_mode_p (mode1)
32906 && ix86_tieable_integer_mode_p (mode2))
32909 /* MODE2 being XFmode implies fp stack or general regs, which means we
32910 can tie any smaller floating point modes to it. Note that we do not
32911 tie this with TFmode. */
32912 if (mode2 == XFmode)
32913 return mode1 == SFmode || mode1 == DFmode;
32915 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
32916 that we can tie it with SFmode. */
32917 if (mode2 == DFmode)
32918 return mode1 == SFmode;
32920 /* If MODE2 is only appropriate for an SSE register, then tie with
32921 any other mode acceptable to SSE registers. */
32922 if (GET_MODE_SIZE (mode2) == 32
32923 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
32924 return (GET_MODE_SIZE (mode1) == 32
32925 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
32926 if (GET_MODE_SIZE (mode2) == 16
32927 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
32928 return (GET_MODE_SIZE (mode1) == 16
32929 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
32931 /* If MODE2 is appropriate for an MMX register, then tie
32932 with any other mode acceptable to MMX registers. */
32933 if (GET_MODE_SIZE (mode2) == 8
32934 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
32935 return (GET_MODE_SIZE (mode1) == 8
32936 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
32941 /* Return the cost of moving between two registers of mode MODE. */
32944 ix86_set_reg_reg_cost (enum machine_mode mode)
32946 unsigned int units = UNITS_PER_WORD;
32948 switch (GET_MODE_CLASS (mode))
32954 units = GET_MODE_SIZE (CCmode);
32958 if ((TARGET_SSE && mode == TFmode)
32959 || (TARGET_80387 && mode == XFmode)
32960 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
32961 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
32962 units = GET_MODE_SIZE (mode);
32965 case MODE_COMPLEX_FLOAT:
32966 if ((TARGET_SSE && mode == TCmode)
32967 || (TARGET_80387 && mode == XCmode)
32968 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
32969 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
32970 units = GET_MODE_SIZE (mode);
32973 case MODE_VECTOR_INT:
32974 case MODE_VECTOR_FLOAT:
32975 if ((TARGET_AVX && VALID_AVX256_REG_MODE (mode))
32976 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
32977 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
32978 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
32979 units = GET_MODE_SIZE (mode);
32982 /* Return the cost of moving between two registers of mode MODE,
32983 assuming that the move will be in pieces of at most UNITS bytes. */
32984 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
32987 /* Compute a (partial) cost for rtx X. Return true if the complete
32988 cost has been computed, and false if subexpressions should be
32989 scanned. In either case, *TOTAL contains the cost result. */
32992 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
32995 enum rtx_code code = (enum rtx_code) code_i;
32996 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
32997 enum machine_mode mode = GET_MODE (x);
32998 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
33003 if (register_operand (SET_DEST (x), VOIDmode)
33004 && reg_or_0_operand (SET_SRC (x), VOIDmode))
33006 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
33015 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
33017 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
33019 else if (flag_pic && SYMBOLIC_CONST (x)
33021 || (!GET_CODE (x) != LABEL_REF
33022 && (GET_CODE (x) != SYMBOL_REF
33023 || !SYMBOL_REF_LOCAL_P (x)))))
33030 if (mode == VOIDmode)
33035 switch (standard_80387_constant_p (x))
33040 default: /* Other constants */
33047 if (SSE_FLOAT_MODE_P (mode))
33050 switch (standard_sse_constant_p (x))
33054 case 1: /* 0: xor eliminates false dependency */
33057 default: /* -1: cmp contains false dependency */
33062 /* Fall back to (MEM (SYMBOL_REF)), since that's where
33063 it'll probably end up. Add a penalty for size. */
33064 *total = (COSTS_N_INSNS (1)
33065 + (flag_pic != 0 && !TARGET_64BIT)
33066 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
33070 /* The zero extensions is often completely free on x86_64, so make
33071 it as cheap as possible. */
33072 if (TARGET_64BIT && mode == DImode
33073 && GET_MODE (XEXP (x, 0)) == SImode)
33075 else if (TARGET_ZERO_EXTEND_WITH_AND)
33076 *total = cost->add;
33078 *total = cost->movzx;
33082 *total = cost->movsx;
33086 if (SCALAR_INT_MODE_P (mode)
33087 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
33088 && CONST_INT_P (XEXP (x, 1)))
33090 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
33093 *total = cost->add;
33096 if ((value == 2 || value == 3)
33097 && cost->lea <= cost->shift_const)
33099 *total = cost->lea;
33109 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
33111 /* ??? Should be SSE vector operation cost. */
33112 /* At least for published AMD latencies, this really is the same
33113 as the latency for a simple fpu operation like fabs. */
33114 /* V*QImode is emulated with 1-11 insns. */
33115 if (mode == V16QImode || mode == V32QImode)
33118 if (TARGET_XOP && mode == V16QImode)
33120 /* For XOP we use vpshab, which requires a broadcast of the
33121 value to the variable shift insn. For constants this
33122 means a V16Q const in mem; even when we can perform the
33123 shift with one insn set the cost to prefer paddb. */
33124 if (CONSTANT_P (XEXP (x, 1)))
33126 *total = (cost->fabs
33127 + rtx_cost (XEXP (x, 0), code, 0, speed)
33128 + (speed ? 2 : COSTS_N_BYTES (16)));
33133 else if (TARGET_SSSE3)
33135 *total = cost->fabs * count;
33138 *total = cost->fabs;
33140 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
33142 if (CONST_INT_P (XEXP (x, 1)))
33144 if (INTVAL (XEXP (x, 1)) > 32)
33145 *total = cost->shift_const + COSTS_N_INSNS (2);
33147 *total = cost->shift_const * 2;
33151 if (GET_CODE (XEXP (x, 1)) == AND)
33152 *total = cost->shift_var * 2;
33154 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
33159 if (CONST_INT_P (XEXP (x, 1)))
33160 *total = cost->shift_const;
33162 *total = cost->shift_var;
33170 gcc_assert (FLOAT_MODE_P (mode));
33171 gcc_assert (TARGET_FMA || TARGET_FMA4);
33173 /* ??? SSE scalar/vector cost should be used here. */
33174 /* ??? Bald assumption that fma has the same cost as fmul. */
33175 *total = cost->fmul;
33176 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
33178 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
33180 if (GET_CODE (sub) == NEG)
33181 sub = XEXP (sub, 0);
33182 *total += rtx_cost (sub, FMA, 0, speed);
33185 if (GET_CODE (sub) == NEG)
33186 sub = XEXP (sub, 0);
33187 *total += rtx_cost (sub, FMA, 2, speed);
33192 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
33194 /* ??? SSE scalar cost should be used here. */
33195 *total = cost->fmul;
33198 else if (X87_FLOAT_MODE_P (mode))
33200 *total = cost->fmul;
33203 else if (FLOAT_MODE_P (mode))
33205 /* ??? SSE vector cost should be used here. */
33206 *total = cost->fmul;
33209 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
33211 /* V*QImode is emulated with 7-13 insns. */
33212 if (mode == V16QImode || mode == V32QImode)
33215 if (TARGET_XOP && mode == V16QImode)
33217 else if (TARGET_SSSE3)
33219 *total = cost->fmul * 2 + cost->fabs * extra;
33221 /* V*DImode is emulated with 5-8 insns. */
33222 else if (mode == V2DImode || mode == V4DImode)
33224 if (TARGET_XOP && mode == V2DImode)
33225 *total = cost->fmul * 2 + cost->fabs * 3;
33227 *total = cost->fmul * 3 + cost->fabs * 5;
33229 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
33230 insns, including two PMULUDQ. */
33231 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
33232 *total = cost->fmul * 2 + cost->fabs * 5;
33234 *total = cost->fmul;
33239 rtx op0 = XEXP (x, 0);
33240 rtx op1 = XEXP (x, 1);
33242 if (CONST_INT_P (XEXP (x, 1)))
33244 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
33245 for (nbits = 0; value != 0; value &= value - 1)
33249 /* This is arbitrary. */
33252 /* Compute costs correctly for widening multiplication. */
33253 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
33254 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
33255 == GET_MODE_SIZE (mode))
33257 int is_mulwiden = 0;
33258 enum machine_mode inner_mode = GET_MODE (op0);
33260 if (GET_CODE (op0) == GET_CODE (op1))
33261 is_mulwiden = 1, op1 = XEXP (op1, 0);
33262 else if (CONST_INT_P (op1))
33264 if (GET_CODE (op0) == SIGN_EXTEND)
33265 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
33268 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
33272 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
33275 *total = (cost->mult_init[MODE_INDEX (mode)]
33276 + nbits * cost->mult_bit
33277 + rtx_cost (op0, outer_code, opno, speed)
33278 + rtx_cost (op1, outer_code, opno, speed));
33287 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
33288 /* ??? SSE cost should be used here. */
33289 *total = cost->fdiv;
33290 else if (X87_FLOAT_MODE_P (mode))
33291 *total = cost->fdiv;
33292 else if (FLOAT_MODE_P (mode))
33293 /* ??? SSE vector cost should be used here. */
33294 *total = cost->fdiv;
33296 *total = cost->divide[MODE_INDEX (mode)];
33300 if (GET_MODE_CLASS (mode) == MODE_INT
33301 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
33303 if (GET_CODE (XEXP (x, 0)) == PLUS
33304 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
33305 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
33306 && CONSTANT_P (XEXP (x, 1)))
33308 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
33309 if (val == 2 || val == 4 || val == 8)
33311 *total = cost->lea;
33312 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
33313 outer_code, opno, speed);
33314 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
33315 outer_code, opno, speed);
33316 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
33320 else if (GET_CODE (XEXP (x, 0)) == MULT
33321 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
33323 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
33324 if (val == 2 || val == 4 || val == 8)
33326 *total = cost->lea;
33327 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
33328 outer_code, opno, speed);
33329 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
33333 else if (GET_CODE (XEXP (x, 0)) == PLUS)
33335 *total = cost->lea;
33336 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
33337 outer_code, opno, speed);
33338 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
33339 outer_code, opno, speed);
33340 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
33347 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
33349 /* ??? SSE cost should be used here. */
33350 *total = cost->fadd;
33353 else if (X87_FLOAT_MODE_P (mode))
33355 *total = cost->fadd;
33358 else if (FLOAT_MODE_P (mode))
33360 /* ??? SSE vector cost should be used here. */
33361 *total = cost->fadd;
33369 if (GET_MODE_CLASS (mode) == MODE_INT
33370 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
33372 *total = (cost->add * 2
33373 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
33374 << (GET_MODE (XEXP (x, 0)) != DImode))
33375 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
33376 << (GET_MODE (XEXP (x, 1)) != DImode)));
33382 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
33384 /* ??? SSE cost should be used here. */
33385 *total = cost->fchs;
33388 else if (X87_FLOAT_MODE_P (mode))
33390 *total = cost->fchs;
33393 else if (FLOAT_MODE_P (mode))
33395 /* ??? SSE vector cost should be used here. */
33396 *total = cost->fchs;
33402 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
33404 /* ??? Should be SSE vector operation cost. */
33405 /* At least for published AMD latencies, this really is the same
33406 as the latency for a simple fpu operation like fabs. */
33407 *total = cost->fabs;
33409 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
33410 *total = cost->add * 2;
33412 *total = cost->add;
33416 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
33417 && XEXP (XEXP (x, 0), 1) == const1_rtx
33418 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
33419 && XEXP (x, 1) == const0_rtx)
33421 /* This kind of construct is implemented using test[bwl].
33422 Treat it as if we had an AND. */
33423 *total = (cost->add
33424 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
33425 + rtx_cost (const1_rtx, outer_code, opno, speed));
33431 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
33436 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
33437 /* ??? SSE cost should be used here. */
33438 *total = cost->fabs;
33439 else if (X87_FLOAT_MODE_P (mode))
33440 *total = cost->fabs;
33441 else if (FLOAT_MODE_P (mode))
33442 /* ??? SSE vector cost should be used here. */
33443 *total = cost->fabs;
33447 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
33448 /* ??? SSE cost should be used here. */
33449 *total = cost->fsqrt;
33450 else if (X87_FLOAT_MODE_P (mode))
33451 *total = cost->fsqrt;
33452 else if (FLOAT_MODE_P (mode))
33453 /* ??? SSE vector cost should be used here. */
33454 *total = cost->fsqrt;
33458 if (XINT (x, 1) == UNSPEC_TP)
33465 case VEC_DUPLICATE:
33466 /* ??? Assume all of these vector manipulation patterns are
33467 recognizable. In which case they all pretty much have the
33469 *total = cost->fabs;
33479 static int current_machopic_label_num;
33481 /* Given a symbol name and its associated stub, write out the
33482 definition of the stub. */
33485 machopic_output_stub (FILE *file, const char *symb, const char *stub)
33487 unsigned int length;
33488 char *binder_name, *symbol_name, lazy_ptr_name[32];
33489 int label = ++current_machopic_label_num;
33491 /* For 64-bit we shouldn't get here. */
33492 gcc_assert (!TARGET_64BIT);
33494 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
33495 symb = targetm.strip_name_encoding (symb);
33497 length = strlen (stub);
33498 binder_name = XALLOCAVEC (char, length + 32);
33499 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
33501 length = strlen (symb);
33502 symbol_name = XALLOCAVEC (char, length + 32);
33503 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
33505 sprintf (lazy_ptr_name, "L%d$lz", label);
33507 if (MACHOPIC_ATT_STUB)
33508 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
33509 else if (MACHOPIC_PURE)
33510 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
33512 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
33514 fprintf (file, "%s:\n", stub);
33515 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33517 if (MACHOPIC_ATT_STUB)
33519 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
33521 else if (MACHOPIC_PURE)
33524 /* 25-byte PIC stub using "CALL get_pc_thunk". */
33525 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
33526 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
33527 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
33528 label, lazy_ptr_name, label);
33529 fprintf (file, "\tjmp\t*%%ecx\n");
33532 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
33534 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
33535 it needs no stub-binding-helper. */
33536 if (MACHOPIC_ATT_STUB)
33539 fprintf (file, "%s:\n", binder_name);
33543 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
33544 fprintf (file, "\tpushl\t%%ecx\n");
33547 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
33549 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
33551 /* N.B. Keep the correspondence of these
33552 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
33553 old-pic/new-pic/non-pic stubs; altering this will break
33554 compatibility with existing dylibs. */
33557 /* 25-byte PIC stub using "CALL get_pc_thunk". */
33558 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
33561 /* 16-byte -mdynamic-no-pic stub. */
33562 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
33564 fprintf (file, "%s:\n", lazy_ptr_name);
33565 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33566 fprintf (file, ASM_LONG "%s\n", binder_name);
33568 #endif /* TARGET_MACHO */
33570 /* Order the registers for register allocator. */
33573 x86_order_regs_for_local_alloc (void)
33578 /* First allocate the local general purpose registers. */
33579 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
33580 if (GENERAL_REGNO_P (i) && call_used_regs[i])
33581 reg_alloc_order [pos++] = i;
33583 /* Global general purpose registers. */
33584 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
33585 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
33586 reg_alloc_order [pos++] = i;
33588 /* x87 registers come first in case we are doing FP math
33590 if (!TARGET_SSE_MATH)
33591 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
33592 reg_alloc_order [pos++] = i;
33594 /* SSE registers. */
33595 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
33596 reg_alloc_order [pos++] = i;
33597 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
33598 reg_alloc_order [pos++] = i;
33600 /* x87 registers. */
33601 if (TARGET_SSE_MATH)
33602 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
33603 reg_alloc_order [pos++] = i;
33605 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
33606 reg_alloc_order [pos++] = i;
33608 /* Initialize the rest of array as we do not allocate some registers
33610 while (pos < FIRST_PSEUDO_REGISTER)
33611 reg_alloc_order [pos++] = 0;
33614 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
33615 in struct attribute_spec handler. */
33617 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
33619 int flags ATTRIBUTE_UNUSED,
33620 bool *no_add_attrs)
33622 if (TREE_CODE (*node) != FUNCTION_TYPE
33623 && TREE_CODE (*node) != METHOD_TYPE
33624 && TREE_CODE (*node) != FIELD_DECL
33625 && TREE_CODE (*node) != TYPE_DECL)
33627 warning (OPT_Wattributes, "%qE attribute only applies to functions",
33629 *no_add_attrs = true;
33634 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
33636 *no_add_attrs = true;
33639 if (is_attribute_p ("callee_pop_aggregate_return", name))
33643 cst = TREE_VALUE (args);
33644 if (TREE_CODE (cst) != INTEGER_CST)
33646 warning (OPT_Wattributes,
33647 "%qE attribute requires an integer constant argument",
33649 *no_add_attrs = true;
33651 else if (compare_tree_int (cst, 0) != 0
33652 && compare_tree_int (cst, 1) != 0)
33654 warning (OPT_Wattributes,
33655 "argument to %qE attribute is neither zero, nor one",
33657 *no_add_attrs = true;
33666 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
33667 struct attribute_spec.handler. */
33669 ix86_handle_abi_attribute (tree *node, tree name,
33670 tree args ATTRIBUTE_UNUSED,
33671 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
33673 if (TREE_CODE (*node) != FUNCTION_TYPE
33674 && TREE_CODE (*node) != METHOD_TYPE
33675 && TREE_CODE (*node) != FIELD_DECL
33676 && TREE_CODE (*node) != TYPE_DECL)
33678 warning (OPT_Wattributes, "%qE attribute only applies to functions",
33680 *no_add_attrs = true;
33684 /* Can combine regparm with all attributes but fastcall. */
33685 if (is_attribute_p ("ms_abi", name))
33687 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
33689 error ("ms_abi and sysv_abi attributes are not compatible");
33694 else if (is_attribute_p ("sysv_abi", name))
33696 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
33698 error ("ms_abi and sysv_abi attributes are not compatible");
33707 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
33708 struct attribute_spec.handler. */
33710 ix86_handle_struct_attribute (tree *node, tree name,
33711 tree args ATTRIBUTE_UNUSED,
33712 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
33715 if (DECL_P (*node))
33717 if (TREE_CODE (*node) == TYPE_DECL)
33718 type = &TREE_TYPE (*node);
33723 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
33725 warning (OPT_Wattributes, "%qE attribute ignored",
33727 *no_add_attrs = true;
33730 else if ((is_attribute_p ("ms_struct", name)
33731 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
33732 || ((is_attribute_p ("gcc_struct", name)
33733 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
33735 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
33737 *no_add_attrs = true;
33744 ix86_handle_fndecl_attribute (tree *node, tree name,
33745 tree args ATTRIBUTE_UNUSED,
33746 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
33748 if (TREE_CODE (*node) != FUNCTION_DECL)
33750 warning (OPT_Wattributes, "%qE attribute only applies to functions",
33752 *no_add_attrs = true;
33758 ix86_ms_bitfield_layout_p (const_tree record_type)
33760 return ((TARGET_MS_BITFIELD_LAYOUT
33761 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
33762 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
33765 /* Returns an expression indicating where the this parameter is
33766 located on entry to the FUNCTION. */
33769 x86_this_parameter (tree function)
33771 tree type = TREE_TYPE (function);
33772 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
33777 const int *parm_regs;
33779 if (ix86_function_type_abi (type) == MS_ABI)
33780 parm_regs = x86_64_ms_abi_int_parameter_registers;
33782 parm_regs = x86_64_int_parameter_registers;
33783 return gen_rtx_REG (Pmode, parm_regs[aggr]);
33786 nregs = ix86_function_regparm (type, function);
33788 if (nregs > 0 && !stdarg_p (type))
33791 unsigned int ccvt = ix86_get_callcvt (type);
33793 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
33794 regno = aggr ? DX_REG : CX_REG;
33795 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
33799 return gen_rtx_MEM (SImode,
33800 plus_constant (Pmode, stack_pointer_rtx, 4));
33809 return gen_rtx_MEM (SImode,
33810 plus_constant (Pmode,
33811 stack_pointer_rtx, 4));
33814 return gen_rtx_REG (SImode, regno);
33817 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
33821 /* Determine whether x86_output_mi_thunk can succeed. */
33824 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
33825 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
33826 HOST_WIDE_INT vcall_offset, const_tree function)
33828 /* 64-bit can handle anything. */
33832 /* For 32-bit, everything's fine if we have one free register. */
33833 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
33836 /* Need a free register for vcall_offset. */
33840 /* Need a free register for GOT references. */
33841 if (flag_pic && !targetm.binds_local_p (function))
33844 /* Otherwise ok. */
33848 /* Output the assembler code for a thunk function. THUNK_DECL is the
33849 declaration for the thunk function itself, FUNCTION is the decl for
33850 the target function. DELTA is an immediate constant offset to be
33851 added to THIS. If VCALL_OFFSET is nonzero, the word at
33852 *(*this + vcall_offset) should be added to THIS. */
33855 x86_output_mi_thunk (FILE *file,
33856 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
33857 HOST_WIDE_INT vcall_offset, tree function)
33859 rtx this_param = x86_this_parameter (function);
33860 rtx this_reg, tmp, fnaddr;
33861 unsigned int tmp_regno;
33864 tmp_regno = R10_REG;
33867 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
33868 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
33869 tmp_regno = AX_REG;
33871 tmp_regno = CX_REG;
33874 emit_note (NOTE_INSN_PROLOGUE_END);
33876 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
33877 pull it in now and let DELTA benefit. */
33878 if (REG_P (this_param))
33879 this_reg = this_param;
33880 else if (vcall_offset)
33882 /* Put the this parameter into %eax. */
33883 this_reg = gen_rtx_REG (Pmode, AX_REG);
33884 emit_move_insn (this_reg, this_param);
33887 this_reg = NULL_RTX;
33889 /* Adjust the this parameter by a fixed constant. */
33892 rtx delta_rtx = GEN_INT (delta);
33893 rtx delta_dst = this_reg ? this_reg : this_param;
33897 if (!x86_64_general_operand (delta_rtx, Pmode))
33899 tmp = gen_rtx_REG (Pmode, tmp_regno);
33900 emit_move_insn (tmp, delta_rtx);
33905 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
33908 /* Adjust the this parameter by a value stored in the vtable. */
33911 rtx vcall_addr, vcall_mem, this_mem;
33913 tmp = gen_rtx_REG (Pmode, tmp_regno);
33915 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
33916 if (Pmode != ptr_mode)
33917 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
33918 emit_move_insn (tmp, this_mem);
33920 /* Adjust the this parameter. */
33921 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
33923 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
33925 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
33926 emit_move_insn (tmp2, GEN_INT (vcall_offset));
33927 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
33930 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
33931 if (Pmode != ptr_mode)
33932 emit_insn (gen_addsi_1_zext (this_reg,
33933 gen_rtx_REG (ptr_mode,
33937 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
33940 /* If necessary, drop THIS back to its stack slot. */
33941 if (this_reg && this_reg != this_param)
33942 emit_move_insn (this_param, this_reg);
33944 fnaddr = XEXP (DECL_RTL (function), 0);
33947 if (!flag_pic || targetm.binds_local_p (function)
33948 || cfun->machine->call_abi == MS_ABI)
33952 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
33953 tmp = gen_rtx_CONST (Pmode, tmp);
33954 fnaddr = gen_rtx_MEM (Pmode, tmp);
33959 if (!flag_pic || targetm.binds_local_p (function))
33962 else if (TARGET_MACHO)
33964 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
33965 fnaddr = XEXP (fnaddr, 0);
33967 #endif /* TARGET_MACHO */
33970 tmp = gen_rtx_REG (Pmode, CX_REG);
33971 output_set_got (tmp, NULL_RTX);
33973 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
33974 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
33975 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
33979 /* Our sibling call patterns do not allow memories, because we have no
33980 predicate that can distinguish between frame and non-frame memory.
33981 For our purposes here, we can get away with (ab)using a jump pattern,
33982 because we're going to do no optimization. */
33983 if (MEM_P (fnaddr))
33984 emit_jump_insn (gen_indirect_jump (fnaddr));
33987 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
33988 fnaddr = legitimize_pic_address (fnaddr,
33989 gen_rtx_REG (Pmode, tmp_regno));
33991 if (!sibcall_insn_operand (fnaddr, word_mode))
33993 tmp = gen_rtx_REG (word_mode, tmp_regno);
33994 if (GET_MODE (fnaddr) != word_mode)
33995 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
33996 emit_move_insn (tmp, fnaddr);
34000 tmp = gen_rtx_MEM (QImode, fnaddr);
34001 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
34002 tmp = emit_call_insn (tmp);
34003 SIBLING_CALL_P (tmp) = 1;
34007 /* Emit just enough of rest_of_compilation to get the insns emitted.
34008 Note that use_thunk calls assemble_start_function et al. */
34009 tmp = get_insns ();
34010 shorten_branches (tmp);
34011 final_start_function (tmp, file, 1);
34012 final (tmp, file, 1);
34013 final_end_function ();
34017 x86_file_start (void)
34019 default_file_start ();
34021 darwin_file_start ();
34023 if (X86_FILE_START_VERSION_DIRECTIVE)
34024 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
34025 if (X86_FILE_START_FLTUSED)
34026 fputs ("\t.global\t__fltused\n", asm_out_file);
34027 if (ix86_asm_dialect == ASM_INTEL)
34028 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
34032 x86_field_alignment (tree field, int computed)
34034 enum machine_mode mode;
34035 tree type = TREE_TYPE (field);
34037 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
34039 mode = TYPE_MODE (strip_array_types (type));
34040 if (mode == DFmode || mode == DCmode
34041 || GET_MODE_CLASS (mode) == MODE_INT
34042 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
34043 return MIN (32, computed);
34047 /* Output assembler code to FILE to increment profiler label # LABELNO
34048 for profiling a function entry. */
34050 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
34052 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
34057 #ifndef NO_PROFILE_COUNTERS
34058 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
34061 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
34062 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
34064 fprintf (file, "\tcall\t%s\n", mcount_name);
34068 #ifndef NO_PROFILE_COUNTERS
34069 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
34072 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
34076 #ifndef NO_PROFILE_COUNTERS
34077 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
34080 fprintf (file, "\tcall\t%s\n", mcount_name);
34084 /* We don't have exact information about the insn sizes, but we may assume
34085 quite safely that we are informed about all 1 byte insns and memory
34086 address sizes. This is enough to eliminate unnecessary padding in
34090 min_insn_size (rtx insn)
34094 if (!INSN_P (insn) || !active_insn_p (insn))
34097 /* Discard alignments we've emit and jump instructions. */
34098 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
34099 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
34101 if (JUMP_TABLE_DATA_P (insn))
34104 /* Important case - calls are always 5 bytes.
34105 It is common to have many calls in the row. */
34107 && symbolic_reference_mentioned_p (PATTERN (insn))
34108 && !SIBLING_CALL_P (insn))
34110 len = get_attr_length (insn);
34114 /* For normal instructions we rely on get_attr_length being exact,
34115 with a few exceptions. */
34116 if (!JUMP_P (insn))
34118 enum attr_type type = get_attr_type (insn);
34123 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
34124 || asm_noperands (PATTERN (insn)) >= 0)
34131 /* Otherwise trust get_attr_length. */
34135 l = get_attr_length_address (insn);
34136 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
34145 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
34147 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
34151 ix86_avoid_jump_mispredicts (void)
34153 rtx insn, start = get_insns ();
34154 int nbytes = 0, njumps = 0;
34157 /* Look for all minimal intervals of instructions containing 4 jumps.
34158 The intervals are bounded by START and INSN. NBYTES is the total
34159 size of instructions in the interval including INSN and not including
34160 START. When the NBYTES is smaller than 16 bytes, it is possible
34161 that the end of START and INSN ends up in the same 16byte page.
34163 The smallest offset in the page INSN can start is the case where START
34164 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
34165 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
34167 for (insn = start; insn; insn = NEXT_INSN (insn))
34171 if (LABEL_P (insn))
34173 int align = label_to_alignment (insn);
34174 int max_skip = label_to_max_skip (insn);
34178 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
34179 already in the current 16 byte page, because otherwise
34180 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
34181 bytes to reach 16 byte boundary. */
34183 || (align <= 3 && max_skip != (1 << align) - 1))
34186 fprintf (dump_file, "Label %i with max_skip %i\n",
34187 INSN_UID (insn), max_skip);
34190 while (nbytes + max_skip >= 16)
34192 start = NEXT_INSN (start);
34193 if ((JUMP_P (start)
34194 && GET_CODE (PATTERN (start)) != ADDR_VEC
34195 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
34197 njumps--, isjump = 1;
34200 nbytes -= min_insn_size (start);
34206 min_size = min_insn_size (insn);
34207 nbytes += min_size;
34209 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
34210 INSN_UID (insn), min_size);
34212 && GET_CODE (PATTERN (insn)) != ADDR_VEC
34213 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
34221 start = NEXT_INSN (start);
34222 if ((JUMP_P (start)
34223 && GET_CODE (PATTERN (start)) != ADDR_VEC
34224 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
34226 njumps--, isjump = 1;
34229 nbytes -= min_insn_size (start);
34231 gcc_assert (njumps >= 0);
34233 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
34234 INSN_UID (start), INSN_UID (insn), nbytes);
34236 if (njumps == 3 && isjump && nbytes < 16)
34238 int padsize = 15 - nbytes + min_insn_size (insn);
34241 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
34242 INSN_UID (insn), padsize);
34243 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
34249 /* AMD Athlon works faster
34250 when RET is not destination of conditional jump or directly preceded
34251 by other jump instruction. We avoid the penalty by inserting NOP just
34252 before the RET instructions in such cases. */
34254 ix86_pad_returns (void)
34259 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
34261 basic_block bb = e->src;
34262 rtx ret = BB_END (bb);
34264 bool replace = false;
34266 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
34267 || optimize_bb_for_size_p (bb))
34269 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
34270 if (active_insn_p (prev) || LABEL_P (prev))
34272 if (prev && LABEL_P (prev))
34277 FOR_EACH_EDGE (e, ei, bb->preds)
34278 if (EDGE_FREQUENCY (e) && e->src->index >= 0
34279 && !(e->flags & EDGE_FALLTHRU))
34284 prev = prev_active_insn (ret);
34286 && ((JUMP_P (prev) && any_condjump_p (prev))
34289 /* Empty functions get branch mispredict even when
34290 the jump destination is not visible to us. */
34291 if (!prev && !optimize_function_for_size_p (cfun))
34296 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
34302 /* Count the minimum number of instructions in BB. Return 4 if the
34303 number of instructions >= 4. */
34306 ix86_count_insn_bb (basic_block bb)
34309 int insn_count = 0;
34311 /* Count number of instructions in this block. Return 4 if the number
34312 of instructions >= 4. */
34313 FOR_BB_INSNS (bb, insn)
34315 /* Only happen in exit blocks. */
34317 && ANY_RETURN_P (PATTERN (insn)))
34320 if (NONDEBUG_INSN_P (insn)
34321 && GET_CODE (PATTERN (insn)) != USE
34322 && GET_CODE (PATTERN (insn)) != CLOBBER)
34325 if (insn_count >= 4)
34334 /* Count the minimum number of instructions in code path in BB.
34335 Return 4 if the number of instructions >= 4. */
34338 ix86_count_insn (basic_block bb)
34342 int min_prev_count;
34344 /* Only bother counting instructions along paths with no
34345 more than 2 basic blocks between entry and exit. Given
34346 that BB has an edge to exit, determine if a predecessor
34347 of BB has an edge from entry. If so, compute the number
34348 of instructions in the predecessor block. If there
34349 happen to be multiple such blocks, compute the minimum. */
34350 min_prev_count = 4;
34351 FOR_EACH_EDGE (e, ei, bb->preds)
34354 edge_iterator prev_ei;
34356 if (e->src == ENTRY_BLOCK_PTR)
34358 min_prev_count = 0;
34361 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
34363 if (prev_e->src == ENTRY_BLOCK_PTR)
34365 int count = ix86_count_insn_bb (e->src);
34366 if (count < min_prev_count)
34367 min_prev_count = count;
34373 if (min_prev_count < 4)
34374 min_prev_count += ix86_count_insn_bb (bb);
34376 return min_prev_count;
34379 /* Pad short function to 4 instructions. */
34382 ix86_pad_short_function (void)
34387 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
34389 rtx ret = BB_END (e->src);
34390 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
34392 int insn_count = ix86_count_insn (e->src);
34394 /* Pad short function. */
34395 if (insn_count < 4)
34399 /* Find epilogue. */
34402 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
34403 insn = PREV_INSN (insn);
34408 /* Two NOPs count as one instruction. */
34409 insn_count = 2 * (4 - insn_count);
34410 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
34416 /* Implement machine specific optimizations. We implement padding of returns
34417 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
34421 /* We are freeing block_for_insn in the toplev to keep compatibility
34422 with old MDEP_REORGS that are not CFG based. Recompute it now. */
34423 compute_bb_for_insn ();
34425 /* Run the vzeroupper optimization if needed. */
34426 if (TARGET_VZEROUPPER)
34427 move_or_delete_vzeroupper ();
34429 if (optimize && optimize_function_for_speed_p (cfun))
34431 if (TARGET_PAD_SHORT_FUNCTION)
34432 ix86_pad_short_function ();
34433 else if (TARGET_PAD_RETURNS)
34434 ix86_pad_returns ();
34435 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
34436 if (TARGET_FOUR_JUMP_LIMIT)
34437 ix86_avoid_jump_mispredicts ();
34442 /* Return nonzero when QImode register that must be represented via REX prefix
34445 x86_extended_QIreg_mentioned_p (rtx insn)
34448 extract_insn_cached (insn);
34449 for (i = 0; i < recog_data.n_operands; i++)
34450 if (GENERAL_REG_P (recog_data.operand[i])
34451 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
34456 /* Return nonzero when P points to register encoded via REX prefix.
34457 Called via for_each_rtx. */
34459 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
34461 unsigned int regno;
34464 regno = REGNO (*p);
34465 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
34468 /* Return true when INSN mentions register that must be encoded using REX
34471 x86_extended_reg_mentioned_p (rtx insn)
34473 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
34474 extended_reg_mentioned_1, NULL);
34477 /* If profitable, negate (without causing overflow) integer constant
34478 of mode MODE at location LOC. Return true in this case. */
34480 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
34484 if (!CONST_INT_P (*loc))
34490 /* DImode x86_64 constants must fit in 32 bits. */
34491 gcc_assert (x86_64_immediate_operand (*loc, mode));
34502 gcc_unreachable ();
34505 /* Avoid overflows. */
34506 if (mode_signbit_p (mode, *loc))
34509 val = INTVAL (*loc);
34511 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
34512 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
34513 if ((val < 0 && val != -128)
34516 *loc = GEN_INT (-val);
34523 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
34524 optabs would emit if we didn't have TFmode patterns. */
34527 x86_emit_floatuns (rtx operands[2])
34529 rtx neglab, donelab, i0, i1, f0, in, out;
34530 enum machine_mode mode, inmode;
34532 inmode = GET_MODE (operands[1]);
34533 gcc_assert (inmode == SImode || inmode == DImode);
34536 in = force_reg (inmode, operands[1]);
34537 mode = GET_MODE (out);
34538 neglab = gen_label_rtx ();
34539 donelab = gen_label_rtx ();
34540 f0 = gen_reg_rtx (mode);
34542 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
34544 expand_float (out, in, 0);
34546 emit_jump_insn (gen_jump (donelab));
34549 emit_label (neglab);
34551 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
34553 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
34555 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
34557 expand_float (f0, i0, 0);
34559 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
34561 emit_label (donelab);
34564 /* AVX2 does support 32-byte integer vector operations,
34565 thus the longest vector we are faced with is V32QImode. */
34566 #define MAX_VECT_LEN 32
34568 struct expand_vec_perm_d
34570 rtx target, op0, op1;
34571 unsigned char perm[MAX_VECT_LEN];
34572 enum machine_mode vmode;
34573 unsigned char nelt;
34574 bool one_operand_p;
34578 static bool canonicalize_perm (struct expand_vec_perm_d *d);
34579 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
34580 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
34582 /* Get a vector mode of the same size as the original but with elements
34583 twice as wide. This is only guaranteed to apply to integral vectors. */
34585 static inline enum machine_mode
34586 get_mode_wider_vector (enum machine_mode o)
34588 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
34589 enum machine_mode n = GET_MODE_WIDER_MODE (o);
34590 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
34591 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
34595 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
34596 with all elements equal to VAR. Return true if successful. */
34599 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
34600 rtx target, rtx val)
34623 /* First attempt to recognize VAL as-is. */
34624 dup = gen_rtx_VEC_DUPLICATE (mode, val);
34625 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
34626 if (recog_memoized (insn) < 0)
34629 /* If that fails, force VAL into a register. */
34632 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
34633 seq = get_insns ();
34636 emit_insn_before (seq, insn);
34638 ok = recog_memoized (insn) >= 0;
34647 if (TARGET_SSE || TARGET_3DNOW_A)
34651 val = gen_lowpart (SImode, val);
34652 x = gen_rtx_TRUNCATE (HImode, val);
34653 x = gen_rtx_VEC_DUPLICATE (mode, x);
34654 emit_insn (gen_rtx_SET (VOIDmode, target, x));
34667 struct expand_vec_perm_d dperm;
34671 memset (&dperm, 0, sizeof (dperm));
34672 dperm.target = target;
34673 dperm.vmode = mode;
34674 dperm.nelt = GET_MODE_NUNITS (mode);
34675 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
34676 dperm.one_operand_p = true;
34678 /* Extend to SImode using a paradoxical SUBREG. */
34679 tmp1 = gen_reg_rtx (SImode);
34680 emit_move_insn (tmp1, gen_lowpart (SImode, val));
34682 /* Insert the SImode value as low element of a V4SImode vector. */
34683 tmp2 = gen_lowpart (V4SImode, dperm.op0);
34684 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
34686 ok = (expand_vec_perm_1 (&dperm)
34687 || expand_vec_perm_broadcast_1 (&dperm));
34699 /* Replicate the value once into the next wider mode and recurse. */
34701 enum machine_mode smode, wsmode, wvmode;
34704 smode = GET_MODE_INNER (mode);
34705 wvmode = get_mode_wider_vector (mode);
34706 wsmode = GET_MODE_INNER (wvmode);
34708 val = convert_modes (wsmode, smode, val, true);
34709 x = expand_simple_binop (wsmode, ASHIFT, val,
34710 GEN_INT (GET_MODE_BITSIZE (smode)),
34711 NULL_RTX, 1, OPTAB_LIB_WIDEN);
34712 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
34714 x = gen_lowpart (wvmode, target);
34715 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
34723 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
34724 rtx x = gen_reg_rtx (hvmode);
34726 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
34729 x = gen_rtx_VEC_CONCAT (mode, x, x);
34730 emit_insn (gen_rtx_SET (VOIDmode, target, x));
34739 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
34740 whose ONE_VAR element is VAR, and other elements are zero. Return true
34744 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
34745 rtx target, rtx var, int one_var)
34747 enum machine_mode vsimode;
34750 bool use_vector_set = false;
34755 /* For SSE4.1, we normally use vector set. But if the second
34756 element is zero and inter-unit moves are OK, we use movq
34758 use_vector_set = (TARGET_64BIT
34760 && !(TARGET_INTER_UNIT_MOVES
34766 use_vector_set = TARGET_SSE4_1;
34769 use_vector_set = TARGET_SSE2;
34772 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
34779 use_vector_set = TARGET_AVX;
34782 /* Use ix86_expand_vector_set in 64bit mode only. */
34783 use_vector_set = TARGET_AVX && TARGET_64BIT;
34789 if (use_vector_set)
34791 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
34792 var = force_reg (GET_MODE_INNER (mode), var);
34793 ix86_expand_vector_set (mmx_ok, target, var, one_var);
34809 var = force_reg (GET_MODE_INNER (mode), var);
34810 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
34811 emit_insn (gen_rtx_SET (VOIDmode, target, x));
34816 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
34817 new_target = gen_reg_rtx (mode);
34819 new_target = target;
34820 var = force_reg (GET_MODE_INNER (mode), var);
34821 x = gen_rtx_VEC_DUPLICATE (mode, var);
34822 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
34823 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
34826 /* We need to shuffle the value to the correct position, so
34827 create a new pseudo to store the intermediate result. */
34829 /* With SSE2, we can use the integer shuffle insns. */
34830 if (mode != V4SFmode && TARGET_SSE2)
34832 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
34834 GEN_INT (one_var == 1 ? 0 : 1),
34835 GEN_INT (one_var == 2 ? 0 : 1),
34836 GEN_INT (one_var == 3 ? 0 : 1)));
34837 if (target != new_target)
34838 emit_move_insn (target, new_target);
34842 /* Otherwise convert the intermediate result to V4SFmode and
34843 use the SSE1 shuffle instructions. */
34844 if (mode != V4SFmode)
34846 tmp = gen_reg_rtx (V4SFmode);
34847 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
34852 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
34854 GEN_INT (one_var == 1 ? 0 : 1),
34855 GEN_INT (one_var == 2 ? 0+4 : 1+4),
34856 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
34858 if (mode != V4SFmode)
34859 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
34860 else if (tmp != target)
34861 emit_move_insn (target, tmp);
34863 else if (target != new_target)
34864 emit_move_insn (target, new_target);
34869 vsimode = V4SImode;
34875 vsimode = V2SImode;
34881 /* Zero extend the variable element to SImode and recurse. */
34882 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
34884 x = gen_reg_rtx (vsimode);
34885 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
34887 gcc_unreachable ();
34889 emit_move_insn (target, gen_lowpart (mode, x));
34897 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
34898 consisting of the values in VALS. It is known that all elements
34899 except ONE_VAR are constants. Return true if successful. */
34902 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
34903 rtx target, rtx vals, int one_var)
34905 rtx var = XVECEXP (vals, 0, one_var);
34906 enum machine_mode wmode;
34909 const_vec = copy_rtx (vals);
34910 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
34911 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
34919 /* For the two element vectors, it's just as easy to use
34920 the general case. */
34924 /* Use ix86_expand_vector_set in 64bit mode only. */
34947 /* There's no way to set one QImode entry easily. Combine
34948 the variable value with its adjacent constant value, and
34949 promote to an HImode set. */
34950 x = XVECEXP (vals, 0, one_var ^ 1);
34953 var = convert_modes (HImode, QImode, var, true);
34954 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
34955 NULL_RTX, 1, OPTAB_LIB_WIDEN);
34956 x = GEN_INT (INTVAL (x) & 0xff);
34960 var = convert_modes (HImode, QImode, var, true);
34961 x = gen_int_mode (INTVAL (x) << 8, HImode);
34963 if (x != const0_rtx)
34964 var = expand_simple_binop (HImode, IOR, var, x, var,
34965 1, OPTAB_LIB_WIDEN);
34967 x = gen_reg_rtx (wmode);
34968 emit_move_insn (x, gen_lowpart (wmode, const_vec));
34969 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
34971 emit_move_insn (target, gen_lowpart (mode, x));
34978 emit_move_insn (target, const_vec);
34979 ix86_expand_vector_set (mmx_ok, target, var, one_var);
34983 /* A subroutine of ix86_expand_vector_init_general. Use vector
34984 concatenate to handle the most general case: all values variable,
34985 and none identical. */
34988 ix86_expand_vector_init_concat (enum machine_mode mode,
34989 rtx target, rtx *ops, int n)
34991 enum machine_mode cmode, hmode = VOIDmode;
34992 rtx first[8], second[4];
35032 gcc_unreachable ();
35035 if (!register_operand (ops[1], cmode))
35036 ops[1] = force_reg (cmode, ops[1]);
35037 if (!register_operand (ops[0], cmode))
35038 ops[0] = force_reg (cmode, ops[0]);
35039 emit_insn (gen_rtx_SET (VOIDmode, target,
35040 gen_rtx_VEC_CONCAT (mode, ops[0],
35060 gcc_unreachable ();
35076 gcc_unreachable ();
35081 /* FIXME: We process inputs backward to help RA. PR 36222. */
35084 for (; i > 0; i -= 2, j--)
35086 first[j] = gen_reg_rtx (cmode);
35087 v = gen_rtvec (2, ops[i - 1], ops[i]);
35088 ix86_expand_vector_init (false, first[j],
35089 gen_rtx_PARALLEL (cmode, v));
35095 gcc_assert (hmode != VOIDmode);
35096 for (i = j = 0; i < n; i += 2, j++)
35098 second[j] = gen_reg_rtx (hmode);
35099 ix86_expand_vector_init_concat (hmode, second [j],
35103 ix86_expand_vector_init_concat (mode, target, second, n);
35106 ix86_expand_vector_init_concat (mode, target, first, n);
35110 gcc_unreachable ();
35114 /* A subroutine of ix86_expand_vector_init_general. Use vector
35115 interleave to handle the most general case: all values variable,
35116 and none identical. */
35119 ix86_expand_vector_init_interleave (enum machine_mode mode,
35120 rtx target, rtx *ops, int n)
35122 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
35125 rtx (*gen_load_even) (rtx, rtx, rtx);
35126 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
35127 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
35132 gen_load_even = gen_vec_setv8hi;
35133 gen_interleave_first_low = gen_vec_interleave_lowv4si;
35134 gen_interleave_second_low = gen_vec_interleave_lowv2di;
35135 inner_mode = HImode;
35136 first_imode = V4SImode;
35137 second_imode = V2DImode;
35138 third_imode = VOIDmode;
35141 gen_load_even = gen_vec_setv16qi;
35142 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
35143 gen_interleave_second_low = gen_vec_interleave_lowv4si;
35144 inner_mode = QImode;
35145 first_imode = V8HImode;
35146 second_imode = V4SImode;
35147 third_imode = V2DImode;
35150 gcc_unreachable ();
35153 for (i = 0; i < n; i++)
35155 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
35156 op0 = gen_reg_rtx (SImode);
35157 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
35159 /* Insert the SImode value as low element of V4SImode vector. */
35160 op1 = gen_reg_rtx (V4SImode);
35161 op0 = gen_rtx_VEC_MERGE (V4SImode,
35162 gen_rtx_VEC_DUPLICATE (V4SImode,
35164 CONST0_RTX (V4SImode),
35166 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
35168 /* Cast the V4SImode vector back to a vector in orignal mode. */
35169 op0 = gen_reg_rtx (mode);
35170 emit_move_insn (op0, gen_lowpart (mode, op1));
35172 /* Load even elements into the second positon. */
35173 emit_insn (gen_load_even (op0,
35174 force_reg (inner_mode,
35178 /* Cast vector to FIRST_IMODE vector. */
35179 ops[i] = gen_reg_rtx (first_imode);
35180 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
35183 /* Interleave low FIRST_IMODE vectors. */
35184 for (i = j = 0; i < n; i += 2, j++)
35186 op0 = gen_reg_rtx (first_imode);
35187 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
35189 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
35190 ops[j] = gen_reg_rtx (second_imode);
35191 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
35194 /* Interleave low SECOND_IMODE vectors. */
35195 switch (second_imode)
35198 for (i = j = 0; i < n / 2; i += 2, j++)
35200 op0 = gen_reg_rtx (second_imode);
35201 emit_insn (gen_interleave_second_low (op0, ops[i],
35204 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
35206 ops[j] = gen_reg_rtx (third_imode);
35207 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
35209 second_imode = V2DImode;
35210 gen_interleave_second_low = gen_vec_interleave_lowv2di;
35214 op0 = gen_reg_rtx (second_imode);
35215 emit_insn (gen_interleave_second_low (op0, ops[0],
35218 /* Cast the SECOND_IMODE vector back to a vector on original
35220 emit_insn (gen_rtx_SET (VOIDmode, target,
35221 gen_lowpart (mode, op0)));
35225 gcc_unreachable ();
35229 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
35230 all values variable, and none identical. */
35233 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
35234 rtx target, rtx vals)
35236 rtx ops[32], op0, op1;
35237 enum machine_mode half_mode = VOIDmode;
35244 if (!mmx_ok && !TARGET_SSE)
35256 n = GET_MODE_NUNITS (mode);
35257 for (i = 0; i < n; i++)
35258 ops[i] = XVECEXP (vals, 0, i);
35259 ix86_expand_vector_init_concat (mode, target, ops, n);
35263 half_mode = V16QImode;
35267 half_mode = V8HImode;
35271 n = GET_MODE_NUNITS (mode);
35272 for (i = 0; i < n; i++)
35273 ops[i] = XVECEXP (vals, 0, i);
35274 op0 = gen_reg_rtx (half_mode);
35275 op1 = gen_reg_rtx (half_mode);
35276 ix86_expand_vector_init_interleave (half_mode, op0, ops,
35278 ix86_expand_vector_init_interleave (half_mode, op1,
35279 &ops [n >> 1], n >> 2);
35280 emit_insn (gen_rtx_SET (VOIDmode, target,
35281 gen_rtx_VEC_CONCAT (mode, op0, op1)));
35285 if (!TARGET_SSE4_1)
35293 /* Don't use ix86_expand_vector_init_interleave if we can't
35294 move from GPR to SSE register directly. */
35295 if (!TARGET_INTER_UNIT_MOVES)
35298 n = GET_MODE_NUNITS (mode);
35299 for (i = 0; i < n; i++)
35300 ops[i] = XVECEXP (vals, 0, i);
35301 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
35309 gcc_unreachable ();
35313 int i, j, n_elts, n_words, n_elt_per_word;
35314 enum machine_mode inner_mode;
35315 rtx words[4], shift;
35317 inner_mode = GET_MODE_INNER (mode);
35318 n_elts = GET_MODE_NUNITS (mode);
35319 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
35320 n_elt_per_word = n_elts / n_words;
35321 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
35323 for (i = 0; i < n_words; ++i)
35325 rtx word = NULL_RTX;
35327 for (j = 0; j < n_elt_per_word; ++j)
35329 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
35330 elt = convert_modes (word_mode, inner_mode, elt, true);
35336 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
35337 word, 1, OPTAB_LIB_WIDEN);
35338 word = expand_simple_binop (word_mode, IOR, word, elt,
35339 word, 1, OPTAB_LIB_WIDEN);
35347 emit_move_insn (target, gen_lowpart (mode, words[0]));
35348 else if (n_words == 2)
35350 rtx tmp = gen_reg_rtx (mode);
35351 emit_clobber (tmp);
35352 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
35353 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
35354 emit_move_insn (target, tmp);
35356 else if (n_words == 4)
35358 rtx tmp = gen_reg_rtx (V4SImode);
35359 gcc_assert (word_mode == SImode);
35360 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
35361 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
35362 emit_move_insn (target, gen_lowpart (mode, tmp));
35365 gcc_unreachable ();
35369 /* Initialize vector TARGET via VALS. Suppress the use of MMX
35370 instructions unless MMX_OK is true. */
35373 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
35375 enum machine_mode mode = GET_MODE (target);
35376 enum machine_mode inner_mode = GET_MODE_INNER (mode);
35377 int n_elts = GET_MODE_NUNITS (mode);
35378 int n_var = 0, one_var = -1;
35379 bool all_same = true, all_const_zero = true;
35383 for (i = 0; i < n_elts; ++i)
35385 x = XVECEXP (vals, 0, i);
35386 if (!(CONST_INT_P (x)
35387 || GET_CODE (x) == CONST_DOUBLE
35388 || GET_CODE (x) == CONST_FIXED))
35389 n_var++, one_var = i;
35390 else if (x != CONST0_RTX (inner_mode))
35391 all_const_zero = false;
35392 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
35396 /* Constants are best loaded from the constant pool. */
35399 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
35403 /* If all values are identical, broadcast the value. */
35405 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
35406 XVECEXP (vals, 0, 0)))
35409 /* Values where only one field is non-constant are best loaded from
35410 the pool and overwritten via move later. */
35414 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
35415 XVECEXP (vals, 0, one_var),
35419 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
35423 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
35427 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
35429 enum machine_mode mode = GET_MODE (target);
35430 enum machine_mode inner_mode = GET_MODE_INNER (mode);
35431 enum machine_mode half_mode;
35432 bool use_vec_merge = false;
35434 static rtx (*gen_extract[6][2]) (rtx, rtx)
35436 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
35437 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
35438 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
35439 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
35440 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
35441 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
35443 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
35445 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
35446 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
35447 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
35448 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
35449 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
35450 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
35460 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
35461 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
35463 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
35465 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
35466 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
35472 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
35476 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
35477 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
35479 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
35481 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
35482 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
35489 /* For the two element vectors, we implement a VEC_CONCAT with
35490 the extraction of the other element. */
35492 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
35493 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
35496 op0 = val, op1 = tmp;
35498 op0 = tmp, op1 = val;
35500 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
35501 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
35506 use_vec_merge = TARGET_SSE4_1;
35513 use_vec_merge = true;
35517 /* tmp = target = A B C D */
35518 tmp = copy_to_reg (target);
35519 /* target = A A B B */
35520 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
35521 /* target = X A B B */
35522 ix86_expand_vector_set (false, target, val, 0);
35523 /* target = A X C D */
35524 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
35525 const1_rtx, const0_rtx,
35526 GEN_INT (2+4), GEN_INT (3+4)));
35530 /* tmp = target = A B C D */
35531 tmp = copy_to_reg (target);
35532 /* tmp = X B C D */
35533 ix86_expand_vector_set (false, tmp, val, 0);
35534 /* target = A B X D */
35535 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
35536 const0_rtx, const1_rtx,
35537 GEN_INT (0+4), GEN_INT (3+4)));
35541 /* tmp = target = A B C D */
35542 tmp = copy_to_reg (target);
35543 /* tmp = X B C D */
35544 ix86_expand_vector_set (false, tmp, val, 0);
35545 /* target = A B X D */
35546 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
35547 const0_rtx, const1_rtx,
35548 GEN_INT (2+4), GEN_INT (0+4)));
35552 gcc_unreachable ();
35557 use_vec_merge = TARGET_SSE4_1;
35561 /* Element 0 handled by vec_merge below. */
35564 use_vec_merge = true;
35570 /* With SSE2, use integer shuffles to swap element 0 and ELT,
35571 store into element 0, then shuffle them back. */
35575 order[0] = GEN_INT (elt);
35576 order[1] = const1_rtx;
35577 order[2] = const2_rtx;
35578 order[3] = GEN_INT (3);
35579 order[elt] = const0_rtx;
35581 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
35582 order[1], order[2], order[3]));
35584 ix86_expand_vector_set (false, target, val, 0);
35586 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
35587 order[1], order[2], order[3]));
35591 /* For SSE1, we have to reuse the V4SF code. */
35592 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
35593 gen_lowpart (SFmode, val), elt);
35598 use_vec_merge = TARGET_SSE2;
35601 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
35605 use_vec_merge = TARGET_SSE4_1;
35612 half_mode = V16QImode;
35618 half_mode = V8HImode;
35624 half_mode = V4SImode;
35630 half_mode = V2DImode;
35636 half_mode = V4SFmode;
35642 half_mode = V2DFmode;
35648 /* Compute offset. */
35652 gcc_assert (i <= 1);
35654 /* Extract the half. */
35655 tmp = gen_reg_rtx (half_mode);
35656 emit_insn (gen_extract[j][i] (tmp, target));
35658 /* Put val in tmp at elt. */
35659 ix86_expand_vector_set (false, tmp, val, elt);
35662 emit_insn (gen_insert[j][i] (target, target, tmp));
35671 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
35672 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
35673 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
35677 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
35679 emit_move_insn (mem, target);
35681 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
35682 emit_move_insn (tmp, val);
35684 emit_move_insn (target, mem);
35689 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
35691 enum machine_mode mode = GET_MODE (vec);
35692 enum machine_mode inner_mode = GET_MODE_INNER (mode);
35693 bool use_vec_extr = false;
35706 use_vec_extr = true;
35710 use_vec_extr = TARGET_SSE4_1;
35722 tmp = gen_reg_rtx (mode);
35723 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
35724 GEN_INT (elt), GEN_INT (elt),
35725 GEN_INT (elt+4), GEN_INT (elt+4)));
35729 tmp = gen_reg_rtx (mode);
35730 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
35734 gcc_unreachable ();
35737 use_vec_extr = true;
35742 use_vec_extr = TARGET_SSE4_1;
35756 tmp = gen_reg_rtx (mode);
35757 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
35758 GEN_INT (elt), GEN_INT (elt),
35759 GEN_INT (elt), GEN_INT (elt)));
35763 tmp = gen_reg_rtx (mode);
35764 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
35768 gcc_unreachable ();
35771 use_vec_extr = true;
35776 /* For SSE1, we have to reuse the V4SF code. */
35777 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
35778 gen_lowpart (V4SFmode, vec), elt);
35784 use_vec_extr = TARGET_SSE2;
35787 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
35791 use_vec_extr = TARGET_SSE4_1;
35797 tmp = gen_reg_rtx (V4SFmode);
35799 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
35801 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
35802 ix86_expand_vector_extract (false, target, tmp, elt & 3);
35810 tmp = gen_reg_rtx (V2DFmode);
35812 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
35814 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
35815 ix86_expand_vector_extract (false, target, tmp, elt & 1);
35823 tmp = gen_reg_rtx (V16QImode);
35825 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
35827 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
35828 ix86_expand_vector_extract (false, target, tmp, elt & 15);
35836 tmp = gen_reg_rtx (V8HImode);
35838 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
35840 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
35841 ix86_expand_vector_extract (false, target, tmp, elt & 7);
35849 tmp = gen_reg_rtx (V4SImode);
35851 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
35853 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
35854 ix86_expand_vector_extract (false, target, tmp, elt & 3);
35862 tmp = gen_reg_rtx (V2DImode);
35864 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
35866 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
35867 ix86_expand_vector_extract (false, target, tmp, elt & 1);
35873 /* ??? Could extract the appropriate HImode element and shift. */
35880 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
35881 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
35883 /* Let the rtl optimizers know about the zero extension performed. */
35884 if (inner_mode == QImode || inner_mode == HImode)
35886 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
35887 target = gen_lowpart (SImode, target);
35890 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
35894 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
35896 emit_move_insn (mem, vec);
35898 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
35899 emit_move_insn (target, tmp);
35903 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
35904 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
35905 The upper bits of DEST are undefined, though they shouldn't cause
35906 exceptions (some bits from src or all zeros are ok). */
35909 emit_reduc_half (rtx dest, rtx src, int i)
35912 switch (GET_MODE (src))
35916 tem = gen_sse_movhlps (dest, src, src);
35918 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
35919 GEN_INT (1 + 4), GEN_INT (1 + 4));
35922 tem = gen_vec_interleave_highv2df (dest, src, src);
35928 tem = gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, dest),
35929 gen_lowpart (V1TImode, src),
35934 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
35936 tem = gen_avx_shufps256 (dest, src, src,
35937 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
35941 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
35943 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
35950 tem = gen_avx2_permv2ti (gen_lowpart (V4DImode, dest),
35951 gen_lowpart (V4DImode, src),
35952 gen_lowpart (V4DImode, src),
35955 tem = gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, dest),
35956 gen_lowpart (V2TImode, src),
35960 gcc_unreachable ();
35965 /* Expand a vector reduction. FN is the binary pattern to reduce;
35966 DEST is the destination; IN is the input vector. */
35969 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
35971 rtx half, dst, vec = in;
35972 enum machine_mode mode = GET_MODE (in);
35975 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
35977 && mode == V8HImode
35978 && fn == gen_uminv8hi3)
35980 emit_insn (gen_sse4_1_phminposuw (dest, in));
35984 for (i = GET_MODE_BITSIZE (mode);
35985 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
35988 half = gen_reg_rtx (mode);
35989 emit_reduc_half (half, vec, i);
35990 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
35993 dst = gen_reg_rtx (mode);
35994 emit_insn (fn (dst, half, vec));
35999 /* Target hook for scalar_mode_supported_p. */
36001 ix86_scalar_mode_supported_p (enum machine_mode mode)
36003 if (DECIMAL_FLOAT_MODE_P (mode))
36004 return default_decimal_float_supported_p ();
36005 else if (mode == TFmode)
36008 return default_scalar_mode_supported_p (mode);
36011 /* Implements target hook vector_mode_supported_p. */
36013 ix86_vector_mode_supported_p (enum machine_mode mode)
36015 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
36017 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
36019 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
36021 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
36023 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
36028 /* Target hook for c_mode_for_suffix. */
36029 static enum machine_mode
36030 ix86_c_mode_for_suffix (char suffix)
36040 /* Worker function for TARGET_MD_ASM_CLOBBERS.
36042 We do this in the new i386 backend to maintain source compatibility
36043 with the old cc0-based compiler. */
36046 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
36047 tree inputs ATTRIBUTE_UNUSED,
36050 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
36052 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
36057 /* Implements target vector targetm.asm.encode_section_info. */
36059 static void ATTRIBUTE_UNUSED
36060 ix86_encode_section_info (tree decl, rtx rtl, int first)
36062 default_encode_section_info (decl, rtl, first);
36064 if (TREE_CODE (decl) == VAR_DECL
36065 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
36066 && ix86_in_large_data_p (decl))
36067 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
36070 /* Worker function for REVERSE_CONDITION. */
36073 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
36075 return (mode != CCFPmode && mode != CCFPUmode
36076 ? reverse_condition (code)
36077 : reverse_condition_maybe_unordered (code));
36080 /* Output code to perform an x87 FP register move, from OPERANDS[1]
36084 output_387_reg_move (rtx insn, rtx *operands)
36086 if (REG_P (operands[0]))
36088 if (REG_P (operands[1])
36089 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
36091 if (REGNO (operands[0]) == FIRST_STACK_REG)
36092 return output_387_ffreep (operands, 0);
36093 return "fstp\t%y0";
36095 if (STACK_TOP_P (operands[0]))
36096 return "fld%Z1\t%y1";
36099 else if (MEM_P (operands[0]))
36101 gcc_assert (REG_P (operands[1]));
36102 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
36103 return "fstp%Z0\t%y0";
36106 /* There is no non-popping store to memory for XFmode.
36107 So if we need one, follow the store with a load. */
36108 if (GET_MODE (operands[0]) == XFmode)
36109 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
36111 return "fst%Z0\t%y0";
36118 /* Output code to perform a conditional jump to LABEL, if C2 flag in
36119 FP status register is set. */
36122 ix86_emit_fp_unordered_jump (rtx label)
36124 rtx reg = gen_reg_rtx (HImode);
36127 emit_insn (gen_x86_fnstsw_1 (reg));
36129 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
36131 emit_insn (gen_x86_sahf_1 (reg));
36133 temp = gen_rtx_REG (CCmode, FLAGS_REG);
36134 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
36138 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
36140 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
36141 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
36144 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
36145 gen_rtx_LABEL_REF (VOIDmode, label),
36147 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
36149 emit_jump_insn (temp);
36150 predict_jump (REG_BR_PROB_BASE * 10 / 100);
36153 /* Output code to perform a log1p XFmode calculation. */
36155 void ix86_emit_i387_log1p (rtx op0, rtx op1)
36157 rtx label1 = gen_label_rtx ();
36158 rtx label2 = gen_label_rtx ();
36160 rtx tmp = gen_reg_rtx (XFmode);
36161 rtx tmp2 = gen_reg_rtx (XFmode);
36164 emit_insn (gen_absxf2 (tmp, op1));
36165 test = gen_rtx_GE (VOIDmode, tmp,
36166 CONST_DOUBLE_FROM_REAL_VALUE (
36167 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
36169 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
36171 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
36172 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
36173 emit_jump (label2);
36175 emit_label (label1);
36176 emit_move_insn (tmp, CONST1_RTX (XFmode));
36177 emit_insn (gen_addxf3 (tmp, op1, tmp));
36178 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
36179 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
36181 emit_label (label2);
36184 /* Emit code for round calculation. */
36185 void ix86_emit_i387_round (rtx op0, rtx op1)
36187 enum machine_mode inmode = GET_MODE (op1);
36188 enum machine_mode outmode = GET_MODE (op0);
36189 rtx e1, e2, res, tmp, tmp1, half;
36190 rtx scratch = gen_reg_rtx (HImode);
36191 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
36192 rtx jump_label = gen_label_rtx ();
36194 rtx (*gen_abs) (rtx, rtx);
36195 rtx (*gen_neg) (rtx, rtx);
36200 gen_abs = gen_abssf2;
36203 gen_abs = gen_absdf2;
36206 gen_abs = gen_absxf2;
36209 gcc_unreachable ();
36215 gen_neg = gen_negsf2;
36218 gen_neg = gen_negdf2;
36221 gen_neg = gen_negxf2;
36224 gen_neg = gen_neghi2;
36227 gen_neg = gen_negsi2;
36230 gen_neg = gen_negdi2;
36233 gcc_unreachable ();
36236 e1 = gen_reg_rtx (inmode);
36237 e2 = gen_reg_rtx (inmode);
36238 res = gen_reg_rtx (outmode);
36240 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
36242 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
36244 /* scratch = fxam(op1) */
36245 emit_insn (gen_rtx_SET (VOIDmode, scratch,
36246 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
36248 /* e1 = fabs(op1) */
36249 emit_insn (gen_abs (e1, op1));
36251 /* e2 = e1 + 0.5 */
36252 half = force_reg (inmode, half);
36253 emit_insn (gen_rtx_SET (VOIDmode, e2,
36254 gen_rtx_PLUS (inmode, e1, half)));
36256 /* res = floor(e2) */
36257 if (inmode != XFmode)
36259 tmp1 = gen_reg_rtx (XFmode);
36261 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
36262 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
36272 rtx tmp0 = gen_reg_rtx (XFmode);
36274 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
36276 emit_insn (gen_rtx_SET (VOIDmode, res,
36277 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
36278 UNSPEC_TRUNC_NOOP)));
36282 emit_insn (gen_frndintxf2_floor (res, tmp1));
36285 emit_insn (gen_lfloorxfhi2 (res, tmp1));
36288 emit_insn (gen_lfloorxfsi2 (res, tmp1));
36291 emit_insn (gen_lfloorxfdi2 (res, tmp1));
36294 gcc_unreachable ();
36297 /* flags = signbit(a) */
36298 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
36300 /* if (flags) then res = -res */
36301 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
36302 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
36303 gen_rtx_LABEL_REF (VOIDmode, jump_label),
36305 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
36306 predict_jump (REG_BR_PROB_BASE * 50 / 100);
36307 JUMP_LABEL (insn) = jump_label;
36309 emit_insn (gen_neg (res, res));
36311 emit_label (jump_label);
36312 LABEL_NUSES (jump_label) = 1;
36314 emit_move_insn (op0, res);
36317 /* Output code to perform a Newton-Rhapson approximation of a single precision
36318 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
36320 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
36322 rtx x0, x1, e0, e1;
36324 x0 = gen_reg_rtx (mode);
36325 e0 = gen_reg_rtx (mode);
36326 e1 = gen_reg_rtx (mode);
36327 x1 = gen_reg_rtx (mode);
36329 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
36331 b = force_reg (mode, b);
36333 /* x0 = rcp(b) estimate */
36334 emit_insn (gen_rtx_SET (VOIDmode, x0,
36335 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
36338 emit_insn (gen_rtx_SET (VOIDmode, e0,
36339 gen_rtx_MULT (mode, x0, b)));
36342 emit_insn (gen_rtx_SET (VOIDmode, e0,
36343 gen_rtx_MULT (mode, x0, e0)));
36346 emit_insn (gen_rtx_SET (VOIDmode, e1,
36347 gen_rtx_PLUS (mode, x0, x0)));
36350 emit_insn (gen_rtx_SET (VOIDmode, x1,
36351 gen_rtx_MINUS (mode, e1, e0)));
36354 emit_insn (gen_rtx_SET (VOIDmode, res,
36355 gen_rtx_MULT (mode, a, x1)));
36358 /* Output code to perform a Newton-Rhapson approximation of a
36359 single precision floating point [reciprocal] square root. */
36361 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
36364 rtx x0, e0, e1, e2, e3, mthree, mhalf;
36367 x0 = gen_reg_rtx (mode);
36368 e0 = gen_reg_rtx (mode);
36369 e1 = gen_reg_rtx (mode);
36370 e2 = gen_reg_rtx (mode);
36371 e3 = gen_reg_rtx (mode);
36373 real_from_integer (&r, VOIDmode, -3, -1, 0);
36374 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
36376 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
36377 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
36379 if (VECTOR_MODE_P (mode))
36381 mthree = ix86_build_const_vector (mode, true, mthree);
36382 mhalf = ix86_build_const_vector (mode, true, mhalf);
36385 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
36386 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
36388 a = force_reg (mode, a);
36390 /* x0 = rsqrt(a) estimate */
36391 emit_insn (gen_rtx_SET (VOIDmode, x0,
36392 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
36395 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
36400 zero = gen_reg_rtx (mode);
36401 mask = gen_reg_rtx (mode);
36403 zero = force_reg (mode, CONST0_RTX(mode));
36404 emit_insn (gen_rtx_SET (VOIDmode, mask,
36405 gen_rtx_NE (mode, zero, a)));
36407 emit_insn (gen_rtx_SET (VOIDmode, x0,
36408 gen_rtx_AND (mode, x0, mask)));
36412 emit_insn (gen_rtx_SET (VOIDmode, e0,
36413 gen_rtx_MULT (mode, x0, a)));
36415 emit_insn (gen_rtx_SET (VOIDmode, e1,
36416 gen_rtx_MULT (mode, e0, x0)));
36419 mthree = force_reg (mode, mthree);
36420 emit_insn (gen_rtx_SET (VOIDmode, e2,
36421 gen_rtx_PLUS (mode, e1, mthree)));
36423 mhalf = force_reg (mode, mhalf);
36425 /* e3 = -.5 * x0 */
36426 emit_insn (gen_rtx_SET (VOIDmode, e3,
36427 gen_rtx_MULT (mode, x0, mhalf)));
36429 /* e3 = -.5 * e0 */
36430 emit_insn (gen_rtx_SET (VOIDmode, e3,
36431 gen_rtx_MULT (mode, e0, mhalf)));
36432 /* ret = e2 * e3 */
36433 emit_insn (gen_rtx_SET (VOIDmode, res,
36434 gen_rtx_MULT (mode, e2, e3)));
36437 #ifdef TARGET_SOLARIS
36438 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
36441 i386_solaris_elf_named_section (const char *name, unsigned int flags,
36444 /* With Binutils 2.15, the "@unwind" marker must be specified on
36445 every occurrence of the ".eh_frame" section, not just the first
36448 && strcmp (name, ".eh_frame") == 0)
36450 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
36451 flags & SECTION_WRITE ? "aw" : "a");
36456 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
36458 solaris_elf_asm_comdat_section (name, flags, decl);
36463 default_elf_asm_named_section (name, flags, decl);
36465 #endif /* TARGET_SOLARIS */
36467 /* Return the mangling of TYPE if it is an extended fundamental type. */
36469 static const char *
36470 ix86_mangle_type (const_tree type)
36472 type = TYPE_MAIN_VARIANT (type);
36474 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
36475 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
36478 switch (TYPE_MODE (type))
36481 /* __float128 is "g". */
36484 /* "long double" or __float80 is "e". */
36491 /* For 32-bit code we can save PIC register setup by using
36492 __stack_chk_fail_local hidden function instead of calling
36493 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
36494 register, so it is better to call __stack_chk_fail directly. */
36496 static tree ATTRIBUTE_UNUSED
36497 ix86_stack_protect_fail (void)
36499 return TARGET_64BIT
36500 ? default_external_stack_protect_fail ()
36501 : default_hidden_stack_protect_fail ();
36504 /* Select a format to encode pointers in exception handling data. CODE
36505 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
36506 true if the symbol may be affected by dynamic relocations.
36508 ??? All x86 object file formats are capable of representing this.
36509 After all, the relocation needed is the same as for the call insn.
36510 Whether or not a particular assembler allows us to enter such, I
36511 guess we'll have to see. */
36513 asm_preferred_eh_data_format (int code, int global)
36517 int type = DW_EH_PE_sdata8;
36519 || ix86_cmodel == CM_SMALL_PIC
36520 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
36521 type = DW_EH_PE_sdata4;
36522 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
36524 if (ix86_cmodel == CM_SMALL
36525 || (ix86_cmodel == CM_MEDIUM && code))
36526 return DW_EH_PE_udata4;
36527 return DW_EH_PE_absptr;
36530 /* Expand copysign from SIGN to the positive value ABS_VALUE
36531 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
36534 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
36536 enum machine_mode mode = GET_MODE (sign);
36537 rtx sgn = gen_reg_rtx (mode);
36538 if (mask == NULL_RTX)
36540 enum machine_mode vmode;
36542 if (mode == SFmode)
36544 else if (mode == DFmode)
36549 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
36550 if (!VECTOR_MODE_P (mode))
36552 /* We need to generate a scalar mode mask in this case. */
36553 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
36554 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
36555 mask = gen_reg_rtx (mode);
36556 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
36560 mask = gen_rtx_NOT (mode, mask);
36561 emit_insn (gen_rtx_SET (VOIDmode, sgn,
36562 gen_rtx_AND (mode, mask, sign)));
36563 emit_insn (gen_rtx_SET (VOIDmode, result,
36564 gen_rtx_IOR (mode, abs_value, sgn)));
36567 /* Expand fabs (OP0) and return a new rtx that holds the result. The
36568 mask for masking out the sign-bit is stored in *SMASK, if that is
36571 ix86_expand_sse_fabs (rtx op0, rtx *smask)
36573 enum machine_mode vmode, mode = GET_MODE (op0);
36576 xa = gen_reg_rtx (mode);
36577 if (mode == SFmode)
36579 else if (mode == DFmode)
36583 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
36584 if (!VECTOR_MODE_P (mode))
36586 /* We need to generate a scalar mode mask in this case. */
36587 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
36588 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
36589 mask = gen_reg_rtx (mode);
36590 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
36592 emit_insn (gen_rtx_SET (VOIDmode, xa,
36593 gen_rtx_AND (mode, op0, mask)));
36601 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
36602 swapping the operands if SWAP_OPERANDS is true. The expanded
36603 code is a forward jump to a newly created label in case the
36604 comparison is true. The generated label rtx is returned. */
36606 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
36607 bool swap_operands)
36618 label = gen_label_rtx ();
36619 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
36620 emit_insn (gen_rtx_SET (VOIDmode, tmp,
36621 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
36622 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
36623 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
36624 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
36625 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
36626 JUMP_LABEL (tmp) = label;
36631 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
36632 using comparison code CODE. Operands are swapped for the comparison if
36633 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
36635 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
36636 bool swap_operands)
36638 rtx (*insn)(rtx, rtx, rtx, rtx);
36639 enum machine_mode mode = GET_MODE (op0);
36640 rtx mask = gen_reg_rtx (mode);
36649 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
36651 emit_insn (insn (mask, op0, op1,
36652 gen_rtx_fmt_ee (code, mode, op0, op1)));
36656 /* Generate and return a rtx of mode MODE for 2**n where n is the number
36657 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
36659 ix86_gen_TWO52 (enum machine_mode mode)
36661 REAL_VALUE_TYPE TWO52r;
36664 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
36665 TWO52 = const_double_from_real_value (TWO52r, mode);
36666 TWO52 = force_reg (mode, TWO52);
36671 /* Expand SSE sequence for computing lround from OP1 storing
36674 ix86_expand_lround (rtx op0, rtx op1)
36676 /* C code for the stuff we're doing below:
36677 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
36680 enum machine_mode mode = GET_MODE (op1);
36681 const struct real_format *fmt;
36682 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
36685 /* load nextafter (0.5, 0.0) */
36686 fmt = REAL_MODE_FORMAT (mode);
36687 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
36688 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
36690 /* adj = copysign (0.5, op1) */
36691 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
36692 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
36694 /* adj = op1 + adj */
36695 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
36697 /* op0 = (imode)adj */
36698 expand_fix (op0, adj, 0);
36701 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
36704 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
36706 /* C code for the stuff we're doing below (for do_floor):
36708 xi -= (double)xi > op1 ? 1 : 0;
36711 enum machine_mode fmode = GET_MODE (op1);
36712 enum machine_mode imode = GET_MODE (op0);
36713 rtx ireg, freg, label, tmp;
36715 /* reg = (long)op1 */
36716 ireg = gen_reg_rtx (imode);
36717 expand_fix (ireg, op1, 0);
36719 /* freg = (double)reg */
36720 freg = gen_reg_rtx (fmode);
36721 expand_float (freg, ireg, 0);
36723 /* ireg = (freg > op1) ? ireg - 1 : ireg */
36724 label = ix86_expand_sse_compare_and_jump (UNLE,
36725 freg, op1, !do_floor);
36726 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
36727 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
36728 emit_move_insn (ireg, tmp);
36730 emit_label (label);
36731 LABEL_NUSES (label) = 1;
36733 emit_move_insn (op0, ireg);
36736 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
36737 result in OPERAND0. */
36739 ix86_expand_rint (rtx operand0, rtx operand1)
36741 /* C code for the stuff we're doing below:
36742 xa = fabs (operand1);
36743 if (!isless (xa, 2**52))
36745 xa = xa + 2**52 - 2**52;
36746 return copysign (xa, operand1);
36748 enum machine_mode mode = GET_MODE (operand0);
36749 rtx res, xa, label, TWO52, mask;
36751 res = gen_reg_rtx (mode);
36752 emit_move_insn (res, operand1);
36754 /* xa = abs (operand1) */
36755 xa = ix86_expand_sse_fabs (res, &mask);
36757 /* if (!isless (xa, TWO52)) goto label; */
36758 TWO52 = ix86_gen_TWO52 (mode);
36759 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
36761 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
36762 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
36764 ix86_sse_copysign_to_positive (res, xa, res, mask);
36766 emit_label (label);
36767 LABEL_NUSES (label) = 1;
36769 emit_move_insn (operand0, res);
36772 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
36775 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
36777 /* C code for the stuff we expand below.
36778 double xa = fabs (x), x2;
36779 if (!isless (xa, TWO52))
36781 xa = xa + TWO52 - TWO52;
36782 x2 = copysign (xa, x);
36791 enum machine_mode mode = GET_MODE (operand0);
36792 rtx xa, TWO52, tmp, label, one, res, mask;
36794 TWO52 = ix86_gen_TWO52 (mode);
36796 /* Temporary for holding the result, initialized to the input
36797 operand to ease control flow. */
36798 res = gen_reg_rtx (mode);
36799 emit_move_insn (res, operand1);
36801 /* xa = abs (operand1) */
36802 xa = ix86_expand_sse_fabs (res, &mask);
36804 /* if (!isless (xa, TWO52)) goto label; */
36805 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
36807 /* xa = xa + TWO52 - TWO52; */
36808 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
36809 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
36811 /* xa = copysign (xa, operand1) */
36812 ix86_sse_copysign_to_positive (xa, xa, res, mask);
36814 /* generate 1.0 or -1.0 */
36815 one = force_reg (mode,
36816 const_double_from_real_value (do_floor
36817 ? dconst1 : dconstm1, mode));
36819 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
36820 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
36821 emit_insn (gen_rtx_SET (VOIDmode, tmp,
36822 gen_rtx_AND (mode, one, tmp)));
36823 /* We always need to subtract here to preserve signed zero. */
36824 tmp = expand_simple_binop (mode, MINUS,
36825 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
36826 emit_move_insn (res, tmp);
36828 emit_label (label);
36829 LABEL_NUSES (label) = 1;
36831 emit_move_insn (operand0, res);
36834 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
36837 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
36839 /* C code for the stuff we expand below.
36840 double xa = fabs (x), x2;
36841 if (!isless (xa, TWO52))
36843 x2 = (double)(long)x;
36850 if (HONOR_SIGNED_ZEROS (mode))
36851 return copysign (x2, x);
36854 enum machine_mode mode = GET_MODE (operand0);
36855 rtx xa, xi, TWO52, tmp, label, one, res, mask;
36857 TWO52 = ix86_gen_TWO52 (mode);
36859 /* Temporary for holding the result, initialized to the input
36860 operand to ease control flow. */
36861 res = gen_reg_rtx (mode);
36862 emit_move_insn (res, operand1);
36864 /* xa = abs (operand1) */
36865 xa = ix86_expand_sse_fabs (res, &mask);
36867 /* if (!isless (xa, TWO52)) goto label; */
36868 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
36870 /* xa = (double)(long)x */
36871 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
36872 expand_fix (xi, res, 0);
36873 expand_float (xa, xi, 0);
36876 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
36878 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
36879 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
36880 emit_insn (gen_rtx_SET (VOIDmode, tmp,
36881 gen_rtx_AND (mode, one, tmp)));
36882 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
36883 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
36884 emit_move_insn (res, tmp);
36886 if (HONOR_SIGNED_ZEROS (mode))
36887 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
36889 emit_label (label);
36890 LABEL_NUSES (label) = 1;
36892 emit_move_insn (operand0, res);
36895 /* Expand SSE sequence for computing round from OPERAND1 storing
36896 into OPERAND0. Sequence that works without relying on DImode truncation
36897 via cvttsd2siq that is only available on 64bit targets. */
36899 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
36901 /* C code for the stuff we expand below.
36902 double xa = fabs (x), xa2, x2;
36903 if (!isless (xa, TWO52))
36905 Using the absolute value and copying back sign makes
36906 -0.0 -> -0.0 correct.
36907 xa2 = xa + TWO52 - TWO52;
36912 else if (dxa > 0.5)
36914 x2 = copysign (xa2, x);
36917 enum machine_mode mode = GET_MODE (operand0);
36918 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
36920 TWO52 = ix86_gen_TWO52 (mode);
36922 /* Temporary for holding the result, initialized to the input
36923 operand to ease control flow. */
36924 res = gen_reg_rtx (mode);
36925 emit_move_insn (res, operand1);
36927 /* xa = abs (operand1) */
36928 xa = ix86_expand_sse_fabs (res, &mask);
36930 /* if (!isless (xa, TWO52)) goto label; */
36931 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
36933 /* xa2 = xa + TWO52 - TWO52; */
36934 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
36935 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
36937 /* dxa = xa2 - xa; */
36938 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
36940 /* generate 0.5, 1.0 and -0.5 */
36941 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
36942 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
36943 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
36947 tmp = gen_reg_rtx (mode);
36948 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
36949 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
36950 emit_insn (gen_rtx_SET (VOIDmode, tmp,
36951 gen_rtx_AND (mode, one, tmp)));
36952 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
36953 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
36954 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
36955 emit_insn (gen_rtx_SET (VOIDmode, tmp,
36956 gen_rtx_AND (mode, one, tmp)));
36957 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
36959 /* res = copysign (xa2, operand1) */
36960 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
36962 emit_label (label);
36963 LABEL_NUSES (label) = 1;
36965 emit_move_insn (operand0, res);
36968 /* Expand SSE sequence for computing trunc from OPERAND1 storing
36971 ix86_expand_trunc (rtx operand0, rtx operand1)
36973 /* C code for SSE variant we expand below.
36974 double xa = fabs (x), x2;
36975 if (!isless (xa, TWO52))
36977 x2 = (double)(long)x;
36978 if (HONOR_SIGNED_ZEROS (mode))
36979 return copysign (x2, x);
36982 enum machine_mode mode = GET_MODE (operand0);
36983 rtx xa, xi, TWO52, label, res, mask;
36985 TWO52 = ix86_gen_TWO52 (mode);
36987 /* Temporary for holding the result, initialized to the input
36988 operand to ease control flow. */
36989 res = gen_reg_rtx (mode);
36990 emit_move_insn (res, operand1);
36992 /* xa = abs (operand1) */
36993 xa = ix86_expand_sse_fabs (res, &mask);
36995 /* if (!isless (xa, TWO52)) goto label; */
36996 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
36998 /* x = (double)(long)x */
36999 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
37000 expand_fix (xi, res, 0);
37001 expand_float (res, xi, 0);
37003 if (HONOR_SIGNED_ZEROS (mode))
37004 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
37006 emit_label (label);
37007 LABEL_NUSES (label) = 1;
37009 emit_move_insn (operand0, res);
37012 /* Expand SSE sequence for computing trunc from OPERAND1 storing
37015 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
37017 enum machine_mode mode = GET_MODE (operand0);
37018 rtx xa, mask, TWO52, label, one, res, smask, tmp;
37020 /* C code for SSE variant we expand below.
37021 double xa = fabs (x), x2;
37022 if (!isless (xa, TWO52))
37024 xa2 = xa + TWO52 - TWO52;
37028 x2 = copysign (xa2, x);
37032 TWO52 = ix86_gen_TWO52 (mode);
37034 /* Temporary for holding the result, initialized to the input
37035 operand to ease control flow. */
37036 res = gen_reg_rtx (mode);
37037 emit_move_insn (res, operand1);
37039 /* xa = abs (operand1) */
37040 xa = ix86_expand_sse_fabs (res, &smask);
37042 /* if (!isless (xa, TWO52)) goto label; */
37043 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
37045 /* res = xa + TWO52 - TWO52; */
37046 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
37047 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
37048 emit_move_insn (res, tmp);
37051 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
37053 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
37054 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
37055 emit_insn (gen_rtx_SET (VOIDmode, mask,
37056 gen_rtx_AND (mode, mask, one)));
37057 tmp = expand_simple_binop (mode, MINUS,
37058 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
37059 emit_move_insn (res, tmp);
37061 /* res = copysign (res, operand1) */
37062 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
37064 emit_label (label);
37065 LABEL_NUSES (label) = 1;
37067 emit_move_insn (operand0, res);
37070 /* Expand SSE sequence for computing round from OPERAND1 storing
37073 ix86_expand_round (rtx operand0, rtx operand1)
37075 /* C code for the stuff we're doing below:
37076 double xa = fabs (x);
37077 if (!isless (xa, TWO52))
37079 xa = (double)(long)(xa + nextafter (0.5, 0.0));
37080 return copysign (xa, x);
37082 enum machine_mode mode = GET_MODE (operand0);
37083 rtx res, TWO52, xa, label, xi, half, mask;
37084 const struct real_format *fmt;
37085 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
37087 /* Temporary for holding the result, initialized to the input
37088 operand to ease control flow. */
37089 res = gen_reg_rtx (mode);
37090 emit_move_insn (res, operand1);
37092 TWO52 = ix86_gen_TWO52 (mode);
37093 xa = ix86_expand_sse_fabs (res, &mask);
37094 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
37096 /* load nextafter (0.5, 0.0) */
37097 fmt = REAL_MODE_FORMAT (mode);
37098 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
37099 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
37101 /* xa = xa + 0.5 */
37102 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
37103 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
37105 /* xa = (double)(int64_t)xa */
37106 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
37107 expand_fix (xi, xa, 0);
37108 expand_float (xa, xi, 0);
37110 /* res = copysign (xa, operand1) */
37111 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
37113 emit_label (label);
37114 LABEL_NUSES (label) = 1;
37116 emit_move_insn (operand0, res);
37119 /* Expand SSE sequence for computing round
37120 from OP1 storing into OP0 using sse4 round insn. */
37122 ix86_expand_round_sse4 (rtx op0, rtx op1)
37124 enum machine_mode mode = GET_MODE (op0);
37125 rtx e1, e2, res, half;
37126 const struct real_format *fmt;
37127 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
37128 rtx (*gen_copysign) (rtx, rtx, rtx);
37129 rtx (*gen_round) (rtx, rtx, rtx);
37134 gen_copysign = gen_copysignsf3;
37135 gen_round = gen_sse4_1_roundsf2;
37138 gen_copysign = gen_copysigndf3;
37139 gen_round = gen_sse4_1_rounddf2;
37142 gcc_unreachable ();
37145 /* round (a) = trunc (a + copysign (0.5, a)) */
37147 /* load nextafter (0.5, 0.0) */
37148 fmt = REAL_MODE_FORMAT (mode);
37149 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
37150 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
37151 half = const_double_from_real_value (pred_half, mode);
37153 /* e1 = copysign (0.5, op1) */
37154 e1 = gen_reg_rtx (mode);
37155 emit_insn (gen_copysign (e1, half, op1));
37157 /* e2 = op1 + e1 */
37158 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
37160 /* res = trunc (e2) */
37161 res = gen_reg_rtx (mode);
37162 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
37164 emit_move_insn (op0, res);
37168 /* Table of valid machine attributes. */
37169 static const struct attribute_spec ix86_attribute_table[] =
37171 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
37172 affects_type_identity } */
37173 /* Stdcall attribute says callee is responsible for popping arguments
37174 if they are not variable. */
37175 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
37177 /* Fastcall attribute says callee is responsible for popping arguments
37178 if they are not variable. */
37179 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
37181 /* Thiscall attribute says callee is responsible for popping arguments
37182 if they are not variable. */
37183 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
37185 /* Cdecl attribute says the callee is a normal C declaration */
37186 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
37188 /* Regparm attribute specifies how many integer arguments are to be
37189 passed in registers. */
37190 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
37192 /* Sseregparm attribute says we are using x86_64 calling conventions
37193 for FP arguments. */
37194 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
37196 /* The transactional memory builtins are implicitly regparm or fastcall
37197 depending on the ABI. Override the generic do-nothing attribute that
37198 these builtins were declared with. */
37199 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
37201 /* force_align_arg_pointer says this function realigns the stack at entry. */
37202 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
37203 false, true, true, ix86_handle_cconv_attribute, false },
37204 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
37205 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
37206 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
37207 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
37210 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
37212 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
37214 #ifdef SUBTARGET_ATTRIBUTE_TABLE
37215 SUBTARGET_ATTRIBUTE_TABLE,
37217 /* ms_abi and sysv_abi calling convention function attributes. */
37218 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
37219 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
37220 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
37222 { "callee_pop_aggregate_return", 1, 1, false, true, true,
37223 ix86_handle_callee_pop_aggregate_return, true },
37225 { NULL, 0, 0, false, false, false, NULL, false }
37228 /* Implement targetm.vectorize.builtin_vectorization_cost. */
37230 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
37232 int misalign ATTRIBUTE_UNUSED)
37236 switch (type_of_cost)
37239 return ix86_cost->scalar_stmt_cost;
37242 return ix86_cost->scalar_load_cost;
37245 return ix86_cost->scalar_store_cost;
37248 return ix86_cost->vec_stmt_cost;
37251 return ix86_cost->vec_align_load_cost;
37254 return ix86_cost->vec_store_cost;
37256 case vec_to_scalar:
37257 return ix86_cost->vec_to_scalar_cost;
37259 case scalar_to_vec:
37260 return ix86_cost->scalar_to_vec_cost;
37262 case unaligned_load:
37263 case unaligned_store:
37264 return ix86_cost->vec_unalign_load_cost;
37266 case cond_branch_taken:
37267 return ix86_cost->cond_taken_branch_cost;
37269 case cond_branch_not_taken:
37270 return ix86_cost->cond_not_taken_branch_cost;
37273 case vec_promote_demote:
37274 return ix86_cost->vec_stmt_cost;
37276 case vec_construct:
37277 elements = TYPE_VECTOR_SUBPARTS (vectype);
37278 return elements / 2 + 1;
37281 gcc_unreachable ();
37285 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
37286 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
37287 insn every time. */
37289 static GTY(()) rtx vselect_insn;
37291 /* Initialize vselect_insn. */
37294 init_vselect_insn (void)
37299 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
37300 for (i = 0; i < MAX_VECT_LEN; ++i)
37301 XVECEXP (x, 0, i) = const0_rtx;
37302 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
37304 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
37306 vselect_insn = emit_insn (x);
37310 /* Construct (set target (vec_select op0 (parallel perm))) and
37311 return true if that's a valid instruction in the active ISA. */
37314 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
37315 unsigned nelt, bool testing_p)
37318 rtx x, save_vconcat;
37321 if (vselect_insn == NULL_RTX)
37322 init_vselect_insn ();
37324 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
37325 PUT_NUM_ELEM (XVEC (x, 0), nelt);
37326 for (i = 0; i < nelt; ++i)
37327 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
37328 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
37329 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
37330 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
37331 SET_DEST (PATTERN (vselect_insn)) = target;
37332 icode = recog_memoized (vselect_insn);
37334 if (icode >= 0 && !testing_p)
37335 emit_insn (copy_rtx (PATTERN (vselect_insn)));
37337 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
37338 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
37339 INSN_CODE (vselect_insn) = -1;
37344 /* Similar, but generate a vec_concat from op0 and op1 as well. */
37347 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
37348 const unsigned char *perm, unsigned nelt,
37351 enum machine_mode v2mode;
37355 if (vselect_insn == NULL_RTX)
37356 init_vselect_insn ();
37358 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
37359 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
37360 PUT_MODE (x, v2mode);
37363 ok = expand_vselect (target, x, perm, nelt, testing_p);
37364 XEXP (x, 0) = const0_rtx;
37365 XEXP (x, 1) = const0_rtx;
37369 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
37370 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
37373 expand_vec_perm_blend (struct expand_vec_perm_d *d)
37375 enum machine_mode vmode = d->vmode;
37376 unsigned i, mask, nelt = d->nelt;
37377 rtx target, op0, op1, x;
37378 rtx rperm[32], vperm;
37380 if (d->one_operand_p)
37382 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
37384 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
37386 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
37391 /* This is a blend, not a permute. Elements must stay in their
37392 respective lanes. */
37393 for (i = 0; i < nelt; ++i)
37395 unsigned e = d->perm[i];
37396 if (!(e == i || e == i + nelt))
37403 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
37404 decision should be extracted elsewhere, so that we only try that
37405 sequence once all budget==3 options have been tried. */
37406 target = d->target;
37419 for (i = 0; i < nelt; ++i)
37420 mask |= (d->perm[i] >= nelt) << i;
37424 for (i = 0; i < 2; ++i)
37425 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
37430 for (i = 0; i < 4; ++i)
37431 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
37436 /* See if bytes move in pairs so we can use pblendw with
37437 an immediate argument, rather than pblendvb with a vector
37439 for (i = 0; i < 16; i += 2)
37440 if (d->perm[i] + 1 != d->perm[i + 1])
37443 for (i = 0; i < nelt; ++i)
37444 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
37447 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
37448 vperm = force_reg (vmode, vperm);
37450 if (GET_MODE_SIZE (vmode) == 16)
37451 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
37453 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
37457 for (i = 0; i < 8; ++i)
37458 mask |= (d->perm[i * 2] >= 16) << i;
37463 target = gen_lowpart (vmode, target);
37464 op0 = gen_lowpart (vmode, op0);
37465 op1 = gen_lowpart (vmode, op1);
37469 /* See if bytes move in pairs. If not, vpblendvb must be used. */
37470 for (i = 0; i < 32; i += 2)
37471 if (d->perm[i] + 1 != d->perm[i + 1])
37473 /* See if bytes move in quadruplets. If yes, vpblendd
37474 with immediate can be used. */
37475 for (i = 0; i < 32; i += 4)
37476 if (d->perm[i] + 2 != d->perm[i + 2])
37480 /* See if bytes move the same in both lanes. If yes,
37481 vpblendw with immediate can be used. */
37482 for (i = 0; i < 16; i += 2)
37483 if (d->perm[i] + 16 != d->perm[i + 16])
37486 /* Use vpblendw. */
37487 for (i = 0; i < 16; ++i)
37488 mask |= (d->perm[i * 2] >= 32) << i;
37493 /* Use vpblendd. */
37494 for (i = 0; i < 8; ++i)
37495 mask |= (d->perm[i * 4] >= 32) << i;
37500 /* See if words move in pairs. If yes, vpblendd can be used. */
37501 for (i = 0; i < 16; i += 2)
37502 if (d->perm[i] + 1 != d->perm[i + 1])
37506 /* See if words move the same in both lanes. If not,
37507 vpblendvb must be used. */
37508 for (i = 0; i < 8; i++)
37509 if (d->perm[i] + 8 != d->perm[i + 8])
37511 /* Use vpblendvb. */
37512 for (i = 0; i < 32; ++i)
37513 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
37517 target = gen_lowpart (vmode, target);
37518 op0 = gen_lowpart (vmode, op0);
37519 op1 = gen_lowpart (vmode, op1);
37520 goto finish_pblendvb;
37523 /* Use vpblendw. */
37524 for (i = 0; i < 16; ++i)
37525 mask |= (d->perm[i] >= 16) << i;
37529 /* Use vpblendd. */
37530 for (i = 0; i < 8; ++i)
37531 mask |= (d->perm[i * 2] >= 16) << i;
37536 /* Use vpblendd. */
37537 for (i = 0; i < 4; ++i)
37538 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
37543 gcc_unreachable ();
37546 /* This matches five different patterns with the different modes. */
37547 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
37548 x = gen_rtx_SET (VOIDmode, target, x);
37554 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
37555 in terms of the variable form of vpermilps.
37557 Note that we will have already failed the immediate input vpermilps,
37558 which requires that the high and low part shuffle be identical; the
37559 variable form doesn't require that. */
37562 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
37564 rtx rperm[8], vperm;
37567 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
37570 /* We can only permute within the 128-bit lane. */
37571 for (i = 0; i < 8; ++i)
37573 unsigned e = d->perm[i];
37574 if (i < 4 ? e >= 4 : e < 4)
37581 for (i = 0; i < 8; ++i)
37583 unsigned e = d->perm[i];
37585 /* Within each 128-bit lane, the elements of op0 are numbered
37586 from 0 and the elements of op1 are numbered from 4. */
37592 rperm[i] = GEN_INT (e);
37595 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
37596 vperm = force_reg (V8SImode, vperm);
37597 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
37602 /* Return true if permutation D can be performed as VMODE permutation
37606 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
37608 unsigned int i, j, chunk;
37610 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
37611 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
37612 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
37615 if (GET_MODE_NUNITS (vmode) >= d->nelt)
37618 chunk = d->nelt / GET_MODE_NUNITS (vmode);
37619 for (i = 0; i < d->nelt; i += chunk)
37620 if (d->perm[i] & (chunk - 1))
37623 for (j = 1; j < chunk; ++j)
37624 if (d->perm[i] + j != d->perm[i + j])
37630 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
37631 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
37634 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
37636 unsigned i, nelt, eltsz, mask;
37637 unsigned char perm[32];
37638 enum machine_mode vmode = V16QImode;
37639 rtx rperm[32], vperm, target, op0, op1;
37643 if (!d->one_operand_p)
37645 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
37648 && valid_perm_using_mode_p (V2TImode, d))
37653 /* Use vperm2i128 insn. The pattern uses
37654 V4DImode instead of V2TImode. */
37655 target = gen_lowpart (V4DImode, d->target);
37656 op0 = gen_lowpart (V4DImode, d->op0);
37657 op1 = gen_lowpart (V4DImode, d->op1);
37659 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
37660 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
37661 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
37669 if (GET_MODE_SIZE (d->vmode) == 16)
37674 else if (GET_MODE_SIZE (d->vmode) == 32)
37679 /* V4DImode should be already handled through
37680 expand_vselect by vpermq instruction. */
37681 gcc_assert (d->vmode != V4DImode);
37684 if (d->vmode == V8SImode
37685 || d->vmode == V16HImode
37686 || d->vmode == V32QImode)
37688 /* First see if vpermq can be used for
37689 V8SImode/V16HImode/V32QImode. */
37690 if (valid_perm_using_mode_p (V4DImode, d))
37692 for (i = 0; i < 4; i++)
37693 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
37696 return expand_vselect (gen_lowpart (V4DImode, d->target),
37697 gen_lowpart (V4DImode, d->op0),
37701 /* Next see if vpermd can be used. */
37702 if (valid_perm_using_mode_p (V8SImode, d))
37705 /* Or if vpermps can be used. */
37706 else if (d->vmode == V8SFmode)
37709 if (vmode == V32QImode)
37711 /* vpshufb only works intra lanes, it is not
37712 possible to shuffle bytes in between the lanes. */
37713 for (i = 0; i < nelt; ++i)
37714 if ((d->perm[i] ^ i) & (nelt / 2))
37725 if (vmode == V8SImode)
37726 for (i = 0; i < 8; ++i)
37727 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
37730 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
37731 if (!d->one_operand_p)
37732 mask = 2 * nelt - 1;
37733 else if (vmode == V16QImode)
37736 mask = nelt / 2 - 1;
37738 for (i = 0; i < nelt; ++i)
37740 unsigned j, e = d->perm[i] & mask;
37741 for (j = 0; j < eltsz; ++j)
37742 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
37746 vperm = gen_rtx_CONST_VECTOR (vmode,
37747 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
37748 vperm = force_reg (vmode, vperm);
37750 target = gen_lowpart (vmode, d->target);
37751 op0 = gen_lowpart (vmode, d->op0);
37752 if (d->one_operand_p)
37754 if (vmode == V16QImode)
37755 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
37756 else if (vmode == V32QImode)
37757 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
37758 else if (vmode == V8SFmode)
37759 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
37761 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
37765 op1 = gen_lowpart (vmode, d->op1);
37766 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
37772 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
37773 in a single instruction. */
37776 expand_vec_perm_1 (struct expand_vec_perm_d *d)
37778 unsigned i, nelt = d->nelt;
37779 unsigned char perm2[MAX_VECT_LEN];
37781 /* Check plain VEC_SELECT first, because AVX has instructions that could
37782 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
37783 input where SEL+CONCAT may not. */
37784 if (d->one_operand_p)
37786 int mask = nelt - 1;
37787 bool identity_perm = true;
37788 bool broadcast_perm = true;
37790 for (i = 0; i < nelt; i++)
37792 perm2[i] = d->perm[i] & mask;
37794 identity_perm = false;
37796 broadcast_perm = false;
37802 emit_move_insn (d->target, d->op0);
37805 else if (broadcast_perm && TARGET_AVX2)
37807 /* Use vpbroadcast{b,w,d}. */
37808 rtx (*gen) (rtx, rtx) = NULL;
37812 gen = gen_avx2_pbroadcastv32qi_1;
37815 gen = gen_avx2_pbroadcastv16hi_1;
37818 gen = gen_avx2_pbroadcastv8si_1;
37821 gen = gen_avx2_pbroadcastv16qi;
37824 gen = gen_avx2_pbroadcastv8hi;
37827 gen = gen_avx2_vec_dupv8sf_1;
37829 /* For other modes prefer other shuffles this function creates. */
37835 emit_insn (gen (d->target, d->op0));
37840 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
37843 /* There are plenty of patterns in sse.md that are written for
37844 SEL+CONCAT and are not replicated for a single op. Perhaps
37845 that should be changed, to avoid the nastiness here. */
37847 /* Recognize interleave style patterns, which means incrementing
37848 every other permutation operand. */
37849 for (i = 0; i < nelt; i += 2)
37851 perm2[i] = d->perm[i] & mask;
37852 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
37854 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
37858 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
37861 for (i = 0; i < nelt; i += 4)
37863 perm2[i + 0] = d->perm[i + 0] & mask;
37864 perm2[i + 1] = d->perm[i + 1] & mask;
37865 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
37866 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
37869 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
37875 /* Finally, try the fully general two operand permute. */
37876 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
37880 /* Recognize interleave style patterns with reversed operands. */
37881 if (!d->one_operand_p)
37883 for (i = 0; i < nelt; ++i)
37885 unsigned e = d->perm[i];
37893 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
37898 /* Try the SSE4.1 blend variable merge instructions. */
37899 if (expand_vec_perm_blend (d))
37902 /* Try one of the AVX vpermil variable permutations. */
37903 if (expand_vec_perm_vpermil (d))
37906 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
37907 vpshufb, vpermd, vpermps or vpermq variable permutation. */
37908 if (expand_vec_perm_pshufb (d))
37914 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
37915 in terms of a pair of pshuflw + pshufhw instructions. */
37918 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
37920 unsigned char perm2[MAX_VECT_LEN];
37924 if (d->vmode != V8HImode || !d->one_operand_p)
37927 /* The two permutations only operate in 64-bit lanes. */
37928 for (i = 0; i < 4; ++i)
37929 if (d->perm[i] >= 4)
37931 for (i = 4; i < 8; ++i)
37932 if (d->perm[i] < 4)
37938 /* Emit the pshuflw. */
37939 memcpy (perm2, d->perm, 4);
37940 for (i = 4; i < 8; ++i)
37942 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
37945 /* Emit the pshufhw. */
37946 memcpy (perm2 + 4, d->perm + 4, 4);
37947 for (i = 0; i < 4; ++i)
37949 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
37955 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
37956 the permutation using the SSSE3 palignr instruction. This succeeds
37957 when all of the elements in PERM fit within one vector and we merely
37958 need to shift them down so that a single vector permutation has a
37959 chance to succeed. */
37962 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
37964 unsigned i, nelt = d->nelt;
37969 /* Even with AVX, palignr only operates on 128-bit vectors. */
37970 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
37973 min = nelt, max = 0;
37974 for (i = 0; i < nelt; ++i)
37976 unsigned e = d->perm[i];
37982 if (min == 0 || max - min >= nelt)
37985 /* Given that we have SSSE3, we know we'll be able to implement the
37986 single operand permutation after the palignr with pshufb. */
37990 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
37991 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
37992 gen_lowpart (TImode, d->op1),
37993 gen_lowpart (TImode, d->op0), shift));
37995 d->op0 = d->op1 = d->target;
37996 d->one_operand_p = true;
37999 for (i = 0; i < nelt; ++i)
38001 unsigned e = d->perm[i] - min;
38007 /* Test for the degenerate case where the alignment by itself
38008 produces the desired permutation. */
38012 ok = expand_vec_perm_1 (d);
38018 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
38020 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
38021 a two vector permutation into a single vector permutation by using
38022 an interleave operation to merge the vectors. */
38025 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
38027 struct expand_vec_perm_d dremap, dfinal;
38028 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
38029 unsigned HOST_WIDE_INT contents;
38030 unsigned char remap[2 * MAX_VECT_LEN];
38032 bool ok, same_halves = false;
38034 if (GET_MODE_SIZE (d->vmode) == 16)
38036 if (d->one_operand_p)
38039 else if (GET_MODE_SIZE (d->vmode) == 32)
38043 /* For 32-byte modes allow even d->one_operand_p.
38044 The lack of cross-lane shuffling in some instructions
38045 might prevent a single insn shuffle. */
38047 dfinal.testing_p = true;
38048 /* If expand_vec_perm_interleave3 can expand this into
38049 a 3 insn sequence, give up and let it be expanded as
38050 3 insn sequence. While that is one insn longer,
38051 it doesn't need a memory operand and in the common
38052 case that both interleave low and high permutations
38053 with the same operands are adjacent needs 4 insns
38054 for both after CSE. */
38055 if (expand_vec_perm_interleave3 (&dfinal))
38061 /* Examine from whence the elements come. */
38063 for (i = 0; i < nelt; ++i)
38064 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
38066 memset (remap, 0xff, sizeof (remap));
38069 if (GET_MODE_SIZE (d->vmode) == 16)
38071 unsigned HOST_WIDE_INT h1, h2, h3, h4;
38073 /* Split the two input vectors into 4 halves. */
38074 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
38079 /* If the elements from the low halves use interleave low, and similarly
38080 for interleave high. If the elements are from mis-matched halves, we
38081 can use shufps for V4SF/V4SI or do a DImode shuffle. */
38082 if ((contents & (h1 | h3)) == contents)
38085 for (i = 0; i < nelt2; ++i)
38088 remap[i + nelt] = i * 2 + 1;
38089 dremap.perm[i * 2] = i;
38090 dremap.perm[i * 2 + 1] = i + nelt;
38092 if (!TARGET_SSE2 && d->vmode == V4SImode)
38093 dremap.vmode = V4SFmode;
38095 else if ((contents & (h2 | h4)) == contents)
38098 for (i = 0; i < nelt2; ++i)
38100 remap[i + nelt2] = i * 2;
38101 remap[i + nelt + nelt2] = i * 2 + 1;
38102 dremap.perm[i * 2] = i + nelt2;
38103 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
38105 if (!TARGET_SSE2 && d->vmode == V4SImode)
38106 dremap.vmode = V4SFmode;
38108 else if ((contents & (h1 | h4)) == contents)
38111 for (i = 0; i < nelt2; ++i)
38114 remap[i + nelt + nelt2] = i + nelt2;
38115 dremap.perm[i] = i;
38116 dremap.perm[i + nelt2] = i + nelt + nelt2;
38121 dremap.vmode = V2DImode;
38123 dremap.perm[0] = 0;
38124 dremap.perm[1] = 3;
38127 else if ((contents & (h2 | h3)) == contents)
38130 for (i = 0; i < nelt2; ++i)
38132 remap[i + nelt2] = i;
38133 remap[i + nelt] = i + nelt2;
38134 dremap.perm[i] = i + nelt2;
38135 dremap.perm[i + nelt2] = i + nelt;
38140 dremap.vmode = V2DImode;
38142 dremap.perm[0] = 1;
38143 dremap.perm[1] = 2;
38151 unsigned int nelt4 = nelt / 4, nzcnt = 0;
38152 unsigned HOST_WIDE_INT q[8];
38153 unsigned int nonzero_halves[4];
38155 /* Split the two input vectors into 8 quarters. */
38156 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
38157 for (i = 1; i < 8; ++i)
38158 q[i] = q[0] << (nelt4 * i);
38159 for (i = 0; i < 4; ++i)
38160 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
38162 nonzero_halves[nzcnt] = i;
38168 gcc_assert (d->one_operand_p);
38169 nonzero_halves[1] = nonzero_halves[0];
38170 same_halves = true;
38172 else if (d->one_operand_p)
38174 gcc_assert (nonzero_halves[0] == 0);
38175 gcc_assert (nonzero_halves[1] == 1);
38180 if (d->perm[0] / nelt2 == nonzero_halves[1])
38182 /* Attempt to increase the likelihood that dfinal
38183 shuffle will be intra-lane. */
38184 char tmph = nonzero_halves[0];
38185 nonzero_halves[0] = nonzero_halves[1];
38186 nonzero_halves[1] = tmph;
38189 /* vperm2f128 or vperm2i128. */
38190 for (i = 0; i < nelt2; ++i)
38192 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
38193 remap[i + nonzero_halves[0] * nelt2] = i;
38194 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
38195 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
38198 if (d->vmode != V8SFmode
38199 && d->vmode != V4DFmode
38200 && d->vmode != V8SImode)
38202 dremap.vmode = V8SImode;
38204 for (i = 0; i < 4; ++i)
38206 dremap.perm[i] = i + nonzero_halves[0] * 4;
38207 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
38211 else if (d->one_operand_p)
38213 else if (TARGET_AVX2
38214 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
38217 for (i = 0; i < nelt4; ++i)
38220 remap[i + nelt] = i * 2 + 1;
38221 remap[i + nelt2] = i * 2 + nelt2;
38222 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
38223 dremap.perm[i * 2] = i;
38224 dremap.perm[i * 2 + 1] = i + nelt;
38225 dremap.perm[i * 2 + nelt2] = i + nelt2;
38226 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
38229 else if (TARGET_AVX2
38230 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
38233 for (i = 0; i < nelt4; ++i)
38235 remap[i + nelt4] = i * 2;
38236 remap[i + nelt + nelt4] = i * 2 + 1;
38237 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
38238 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
38239 dremap.perm[i * 2] = i + nelt4;
38240 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
38241 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
38242 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
38249 /* Use the remapping array set up above to move the elements from their
38250 swizzled locations into their final destinations. */
38252 for (i = 0; i < nelt; ++i)
38254 unsigned e = remap[d->perm[i]];
38255 gcc_assert (e < nelt);
38256 /* If same_halves is true, both halves of the remapped vector are the
38257 same. Avoid cross-lane accesses if possible. */
38258 if (same_halves && i >= nelt2)
38260 gcc_assert (e < nelt2);
38261 dfinal.perm[i] = e + nelt2;
38264 dfinal.perm[i] = e;
38266 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
38267 dfinal.op1 = dfinal.op0;
38268 dfinal.one_operand_p = true;
38269 dremap.target = dfinal.op0;
38271 /* Test if the final remap can be done with a single insn. For V4SFmode or
38272 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
38274 ok = expand_vec_perm_1 (&dfinal);
38275 seq = get_insns ();
38284 if (dremap.vmode != dfinal.vmode)
38286 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
38287 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
38288 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
38291 ok = expand_vec_perm_1 (&dremap);
38298 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
38299 a single vector cross-lane permutation into vpermq followed
38300 by any of the single insn permutations. */
38303 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
38305 struct expand_vec_perm_d dremap, dfinal;
38306 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
38307 unsigned contents[2];
38311 && (d->vmode == V32QImode || d->vmode == V16HImode)
38312 && d->one_operand_p))
38317 for (i = 0; i < nelt2; ++i)
38319 contents[0] |= 1u << (d->perm[i] / nelt4);
38320 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
38323 for (i = 0; i < 2; ++i)
38325 unsigned int cnt = 0;
38326 for (j = 0; j < 4; ++j)
38327 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
38335 dremap.vmode = V4DImode;
38337 dremap.target = gen_reg_rtx (V4DImode);
38338 dremap.op0 = gen_lowpart (V4DImode, d->op0);
38339 dremap.op1 = dremap.op0;
38340 dremap.one_operand_p = true;
38341 for (i = 0; i < 2; ++i)
38343 unsigned int cnt = 0;
38344 for (j = 0; j < 4; ++j)
38345 if ((contents[i] & (1u << j)) != 0)
38346 dremap.perm[2 * i + cnt++] = j;
38347 for (; cnt < 2; ++cnt)
38348 dremap.perm[2 * i + cnt] = 0;
38352 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
38353 dfinal.op1 = dfinal.op0;
38354 dfinal.one_operand_p = true;
38355 for (i = 0, j = 0; i < nelt; ++i)
38359 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
38360 if ((d->perm[i] / nelt4) == dremap.perm[j])
38362 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
38363 dfinal.perm[i] |= nelt4;
38365 gcc_unreachable ();
38368 ok = expand_vec_perm_1 (&dremap);
38371 ok = expand_vec_perm_1 (&dfinal);
38377 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
38378 a vector permutation using two instructions, vperm2f128 resp.
38379 vperm2i128 followed by any single in-lane permutation. */
38382 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
38384 struct expand_vec_perm_d dfirst, dsecond;
38385 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
38389 || GET_MODE_SIZE (d->vmode) != 32
38390 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
38394 dsecond.one_operand_p = false;
38395 dsecond.testing_p = true;
38397 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
38398 immediate. For perm < 16 the second permutation uses
38399 d->op0 as first operand, for perm >= 16 it uses d->op1
38400 as first operand. The second operand is the result of
38402 for (perm = 0; perm < 32; perm++)
38404 /* Ignore permutations which do not move anything cross-lane. */
38407 /* The second shuffle for e.g. V4DFmode has
38408 0123 and ABCD operands.
38409 Ignore AB23, as 23 is already in the second lane
38410 of the first operand. */
38411 if ((perm & 0xc) == (1 << 2)) continue;
38412 /* And 01CD, as 01 is in the first lane of the first
38414 if ((perm & 3) == 0) continue;
38415 /* And 4567, as then the vperm2[fi]128 doesn't change
38416 anything on the original 4567 second operand. */
38417 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
38421 /* The second shuffle for e.g. V4DFmode has
38422 4567 and ABCD operands.
38423 Ignore AB67, as 67 is already in the second lane
38424 of the first operand. */
38425 if ((perm & 0xc) == (3 << 2)) continue;
38426 /* And 45CD, as 45 is in the first lane of the first
38428 if ((perm & 3) == 2) continue;
38429 /* And 0123, as then the vperm2[fi]128 doesn't change
38430 anything on the original 0123 first operand. */
38431 if ((perm & 0xf) == (1 << 2)) continue;
38434 for (i = 0; i < nelt; i++)
38436 j = d->perm[i] / nelt2;
38437 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
38438 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
38439 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
38440 dsecond.perm[i] = d->perm[i] & (nelt - 1);
38448 ok = expand_vec_perm_1 (&dsecond);
38459 /* Found a usable second shuffle. dfirst will be
38460 vperm2f128 on d->op0 and d->op1. */
38461 dsecond.testing_p = false;
38463 dfirst.target = gen_reg_rtx (d->vmode);
38464 for (i = 0; i < nelt; i++)
38465 dfirst.perm[i] = (i & (nelt2 - 1))
38466 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
38468 ok = expand_vec_perm_1 (&dfirst);
38471 /* And dsecond is some single insn shuffle, taking
38472 d->op0 and result of vperm2f128 (if perm < 16) or
38473 d->op1 and result of vperm2f128 (otherwise). */
38474 dsecond.op1 = dfirst.target;
38476 dsecond.op0 = dfirst.op1;
38478 ok = expand_vec_perm_1 (&dsecond);
38484 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
38485 if (d->one_operand_p)
38492 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
38493 a two vector permutation using 2 intra-lane interleave insns
38494 and cross-lane shuffle for 32-byte vectors. */
38497 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
38500 rtx (*gen) (rtx, rtx, rtx);
38502 if (d->one_operand_p)
38504 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
38506 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
38512 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
38514 for (i = 0; i < nelt; i += 2)
38515 if (d->perm[i] != d->perm[0] + i / 2
38516 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
38526 gen = gen_vec_interleave_highv32qi;
38528 gen = gen_vec_interleave_lowv32qi;
38532 gen = gen_vec_interleave_highv16hi;
38534 gen = gen_vec_interleave_lowv16hi;
38538 gen = gen_vec_interleave_highv8si;
38540 gen = gen_vec_interleave_lowv8si;
38544 gen = gen_vec_interleave_highv4di;
38546 gen = gen_vec_interleave_lowv4di;
38550 gen = gen_vec_interleave_highv8sf;
38552 gen = gen_vec_interleave_lowv8sf;
38556 gen = gen_vec_interleave_highv4df;
38558 gen = gen_vec_interleave_lowv4df;
38561 gcc_unreachable ();
38564 emit_insn (gen (d->target, d->op0, d->op1));
38568 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
38569 a single vector permutation using a single intra-lane vector
38570 permutation, vperm2f128 swapping the lanes and vblend* insn blending
38571 the non-swapped and swapped vectors together. */
38574 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
38576 struct expand_vec_perm_d dfirst, dsecond;
38577 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
38580 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
38584 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
38585 || !d->one_operand_p)
38589 for (i = 0; i < nelt; i++)
38590 dfirst.perm[i] = 0xff;
38591 for (i = 0, msk = 0; i < nelt; i++)
38593 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
38594 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
38596 dfirst.perm[j] = d->perm[i];
38600 for (i = 0; i < nelt; i++)
38601 if (dfirst.perm[i] == 0xff)
38602 dfirst.perm[i] = i;
38605 dfirst.target = gen_reg_rtx (dfirst.vmode);
38608 ok = expand_vec_perm_1 (&dfirst);
38609 seq = get_insns ();
38621 dsecond.op0 = dfirst.target;
38622 dsecond.op1 = dfirst.target;
38623 dsecond.one_operand_p = true;
38624 dsecond.target = gen_reg_rtx (dsecond.vmode);
38625 for (i = 0; i < nelt; i++)
38626 dsecond.perm[i] = i ^ nelt2;
38628 ok = expand_vec_perm_1 (&dsecond);
38631 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
38632 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
38636 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
38637 permutation using two vperm2f128, followed by a vshufpd insn blending
38638 the two vectors together. */
38641 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
38643 struct expand_vec_perm_d dfirst, dsecond, dthird;
38646 if (!TARGET_AVX || (d->vmode != V4DFmode))
38656 dfirst.perm[0] = (d->perm[0] & ~1);
38657 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
38658 dfirst.perm[2] = (d->perm[2] & ~1);
38659 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
38660 dsecond.perm[0] = (d->perm[1] & ~1);
38661 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
38662 dsecond.perm[2] = (d->perm[3] & ~1);
38663 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
38664 dthird.perm[0] = (d->perm[0] % 2);
38665 dthird.perm[1] = (d->perm[1] % 2) + 4;
38666 dthird.perm[2] = (d->perm[2] % 2) + 2;
38667 dthird.perm[3] = (d->perm[3] % 2) + 6;
38669 dfirst.target = gen_reg_rtx (dfirst.vmode);
38670 dsecond.target = gen_reg_rtx (dsecond.vmode);
38671 dthird.op0 = dfirst.target;
38672 dthird.op1 = dsecond.target;
38673 dthird.one_operand_p = false;
38675 canonicalize_perm (&dfirst);
38676 canonicalize_perm (&dsecond);
38678 ok = expand_vec_perm_1 (&dfirst)
38679 && expand_vec_perm_1 (&dsecond)
38680 && expand_vec_perm_1 (&dthird);
38687 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
38688 permutation with two pshufb insns and an ior. We should have already
38689 failed all two instruction sequences. */
38692 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
38694 rtx rperm[2][16], vperm, l, h, op, m128;
38695 unsigned int i, nelt, eltsz;
38697 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
38699 gcc_assert (!d->one_operand_p);
38702 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
38704 /* Generate two permutation masks. If the required element is within
38705 the given vector it is shuffled into the proper lane. If the required
38706 element is in the other vector, force a zero into the lane by setting
38707 bit 7 in the permutation mask. */
38708 m128 = GEN_INT (-128);
38709 for (i = 0; i < nelt; ++i)
38711 unsigned j, e = d->perm[i];
38712 unsigned which = (e >= nelt);
38716 for (j = 0; j < eltsz; ++j)
38718 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
38719 rperm[1-which][i*eltsz + j] = m128;
38723 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
38724 vperm = force_reg (V16QImode, vperm);
38726 l = gen_reg_rtx (V16QImode);
38727 op = gen_lowpart (V16QImode, d->op0);
38728 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
38730 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
38731 vperm = force_reg (V16QImode, vperm);
38733 h = gen_reg_rtx (V16QImode);
38734 op = gen_lowpart (V16QImode, d->op1);
38735 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
38737 op = gen_lowpart (V16QImode, d->target);
38738 emit_insn (gen_iorv16qi3 (op, l, h));
38743 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
38744 with two vpshufb insns, vpermq and vpor. We should have already failed
38745 all two or three instruction sequences. */
38748 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
38750 rtx rperm[2][32], vperm, l, h, hp, op, m128;
38751 unsigned int i, nelt, eltsz;
38754 || !d->one_operand_p
38755 || (d->vmode != V32QImode && d->vmode != V16HImode))
38762 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
38764 /* Generate two permutation masks. If the required element is within
38765 the same lane, it is shuffled in. If the required element from the
38766 other lane, force a zero by setting bit 7 in the permutation mask.
38767 In the other mask the mask has non-negative elements if element
38768 is requested from the other lane, but also moved to the other lane,
38769 so that the result of vpshufb can have the two V2TImode halves
38771 m128 = GEN_INT (-128);
38772 for (i = 0; i < nelt; ++i)
38774 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
38775 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
38777 for (j = 0; j < eltsz; ++j)
38779 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
38780 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
38784 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
38785 vperm = force_reg (V32QImode, vperm);
38787 h = gen_reg_rtx (V32QImode);
38788 op = gen_lowpart (V32QImode, d->op0);
38789 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
38791 /* Swap the 128-byte lanes of h into hp. */
38792 hp = gen_reg_rtx (V4DImode);
38793 op = gen_lowpart (V4DImode, h);
38794 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
38797 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
38798 vperm = force_reg (V32QImode, vperm);
38800 l = gen_reg_rtx (V32QImode);
38801 op = gen_lowpart (V32QImode, d->op0);
38802 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
38804 op = gen_lowpart (V32QImode, d->target);
38805 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
38810 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
38811 and extract-odd permutations of two V32QImode and V16QImode operand
38812 with two vpshufb insns, vpor and vpermq. We should have already
38813 failed all two or three instruction sequences. */
38816 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
38818 rtx rperm[2][32], vperm, l, h, ior, op, m128;
38819 unsigned int i, nelt, eltsz;
38822 || d->one_operand_p
38823 || (d->vmode != V32QImode && d->vmode != V16HImode))
38826 for (i = 0; i < d->nelt; ++i)
38827 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
38834 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
38836 /* Generate two permutation masks. In the first permutation mask
38837 the first quarter will contain indexes for the first half
38838 of the op0, the second quarter will contain bit 7 set, third quarter
38839 will contain indexes for the second half of the op0 and the
38840 last quarter bit 7 set. In the second permutation mask
38841 the first quarter will contain bit 7 set, the second quarter
38842 indexes for the first half of the op1, the third quarter bit 7 set
38843 and last quarter indexes for the second half of the op1.
38844 I.e. the first mask e.g. for V32QImode extract even will be:
38845 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
38846 (all values masked with 0xf except for -128) and second mask
38847 for extract even will be
38848 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
38849 m128 = GEN_INT (-128);
38850 for (i = 0; i < nelt; ++i)
38852 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
38853 unsigned which = d->perm[i] >= nelt;
38854 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
38856 for (j = 0; j < eltsz; ++j)
38858 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
38859 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
38863 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
38864 vperm = force_reg (V32QImode, vperm);
38866 l = gen_reg_rtx (V32QImode);
38867 op = gen_lowpart (V32QImode, d->op0);
38868 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
38870 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
38871 vperm = force_reg (V32QImode, vperm);
38873 h = gen_reg_rtx (V32QImode);
38874 op = gen_lowpart (V32QImode, d->op1);
38875 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
38877 ior = gen_reg_rtx (V32QImode);
38878 emit_insn (gen_iorv32qi3 (ior, l, h));
38880 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
38881 op = gen_lowpart (V4DImode, d->target);
38882 ior = gen_lowpart (V4DImode, ior);
38883 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
38884 const1_rtx, GEN_INT (3)));
38889 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
38890 and extract-odd permutations. */
38893 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
38900 t1 = gen_reg_rtx (V4DFmode);
38901 t2 = gen_reg_rtx (V4DFmode);
38903 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
38904 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
38905 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
38907 /* Now an unpck[lh]pd will produce the result required. */
38909 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
38911 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
38917 int mask = odd ? 0xdd : 0x88;
38919 t1 = gen_reg_rtx (V8SFmode);
38920 t2 = gen_reg_rtx (V8SFmode);
38921 t3 = gen_reg_rtx (V8SFmode);
38923 /* Shuffle within the 128-bit lanes to produce:
38924 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
38925 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
38928 /* Shuffle the lanes around to produce:
38929 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
38930 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
38933 /* Shuffle within the 128-bit lanes to produce:
38934 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
38935 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
38937 /* Shuffle within the 128-bit lanes to produce:
38938 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
38939 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
38941 /* Shuffle the lanes around to produce:
38942 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
38943 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
38952 /* These are always directly implementable by expand_vec_perm_1. */
38953 gcc_unreachable ();
38957 return expand_vec_perm_pshufb2 (d);
38960 /* We need 2*log2(N)-1 operations to achieve odd/even
38961 with interleave. */
38962 t1 = gen_reg_rtx (V8HImode);
38963 t2 = gen_reg_rtx (V8HImode);
38964 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
38965 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
38966 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
38967 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
38969 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
38971 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
38978 return expand_vec_perm_pshufb2 (d);
38981 t1 = gen_reg_rtx (V16QImode);
38982 t2 = gen_reg_rtx (V16QImode);
38983 t3 = gen_reg_rtx (V16QImode);
38984 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
38985 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
38986 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
38987 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
38988 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
38989 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
38991 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
38993 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
39000 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
39005 struct expand_vec_perm_d d_copy = *d;
39006 d_copy.vmode = V4DFmode;
39007 d_copy.target = gen_lowpart (V4DFmode, d->target);
39008 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
39009 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
39010 return expand_vec_perm_even_odd_1 (&d_copy, odd);
39013 t1 = gen_reg_rtx (V4DImode);
39014 t2 = gen_reg_rtx (V4DImode);
39016 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
39017 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
39018 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
39020 /* Now an vpunpck[lh]qdq will produce the result required. */
39022 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
39024 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
39031 struct expand_vec_perm_d d_copy = *d;
39032 d_copy.vmode = V8SFmode;
39033 d_copy.target = gen_lowpart (V8SFmode, d->target);
39034 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
39035 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
39036 return expand_vec_perm_even_odd_1 (&d_copy, odd);
39039 t1 = gen_reg_rtx (V8SImode);
39040 t2 = gen_reg_rtx (V8SImode);
39042 /* Shuffle the lanes around into
39043 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
39044 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t1),
39045 gen_lowpart (V4DImode, d->op0),
39046 gen_lowpart (V4DImode, d->op1),
39048 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t2),
39049 gen_lowpart (V4DImode, d->op0),
39050 gen_lowpart (V4DImode, d->op1),
39053 /* Swap the 2nd and 3rd position in each lane into
39054 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
39055 emit_insn (gen_avx2_pshufdv3 (t1, t1,
39056 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
39057 emit_insn (gen_avx2_pshufdv3 (t2, t2,
39058 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
39060 /* Now an vpunpck[lh]qdq will produce
39061 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
39063 t3 = gen_avx2_interleave_highv4di (gen_lowpart (V4DImode, d->target),
39064 gen_lowpart (V4DImode, t1),
39065 gen_lowpart (V4DImode, t2));
39067 t3 = gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode, d->target),
39068 gen_lowpart (V4DImode, t1),
39069 gen_lowpart (V4DImode, t2));
39074 gcc_unreachable ();
39080 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
39081 extract-even and extract-odd permutations. */
39084 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
39086 unsigned i, odd, nelt = d->nelt;
39089 if (odd != 0 && odd != 1)
39092 for (i = 1; i < nelt; ++i)
39093 if (d->perm[i] != 2 * i + odd)
39096 return expand_vec_perm_even_odd_1 (d, odd);
39099 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
39100 permutations. We assume that expand_vec_perm_1 has already failed. */
39103 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
39105 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
39106 enum machine_mode vmode = d->vmode;
39107 unsigned char perm2[4];
39115 /* These are special-cased in sse.md so that we can optionally
39116 use the vbroadcast instruction. They expand to two insns
39117 if the input happens to be in a register. */
39118 gcc_unreachable ();
39124 /* These are always implementable using standard shuffle patterns. */
39125 gcc_unreachable ();
39129 /* These can be implemented via interleave. We save one insn by
39130 stopping once we have promoted to V4SImode and then use pshufd. */
39134 rtx (*gen) (rtx, rtx, rtx)
39135 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
39136 : gen_vec_interleave_lowv8hi;
39140 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
39141 : gen_vec_interleave_highv8hi;
39146 dest = gen_reg_rtx (vmode);
39147 emit_insn (gen (dest, op0, op0));
39148 vmode = get_mode_wider_vector (vmode);
39149 op0 = gen_lowpart (vmode, dest);
39151 while (vmode != V4SImode);
39153 memset (perm2, elt, 4);
39154 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4,
39163 /* For AVX2 broadcasts of the first element vpbroadcast* or
39164 vpermq should be used by expand_vec_perm_1. */
39165 gcc_assert (!TARGET_AVX2 || d->perm[0]);
39169 gcc_unreachable ();
39173 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
39174 broadcast permutations. */
39177 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
39179 unsigned i, elt, nelt = d->nelt;
39181 if (!d->one_operand_p)
39185 for (i = 1; i < nelt; ++i)
39186 if (d->perm[i] != elt)
39189 return expand_vec_perm_broadcast_1 (d);
39192 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
39193 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
39194 all the shorter instruction sequences. */
39197 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
39199 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
39200 unsigned int i, nelt, eltsz;
39204 || d->one_operand_p
39205 || (d->vmode != V32QImode && d->vmode != V16HImode))
39212 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
39214 /* Generate 4 permutation masks. If the required element is within
39215 the same lane, it is shuffled in. If the required element from the
39216 other lane, force a zero by setting bit 7 in the permutation mask.
39217 In the other mask the mask has non-negative elements if element
39218 is requested from the other lane, but also moved to the other lane,
39219 so that the result of vpshufb can have the two V2TImode halves
39221 m128 = GEN_INT (-128);
39222 for (i = 0; i < 32; ++i)
39224 rperm[0][i] = m128;
39225 rperm[1][i] = m128;
39226 rperm[2][i] = m128;
39227 rperm[3][i] = m128;
39233 for (i = 0; i < nelt; ++i)
39235 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
39236 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
39237 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
39239 for (j = 0; j < eltsz; ++j)
39240 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
39241 used[which] = true;
39244 for (i = 0; i < 2; ++i)
39246 if (!used[2 * i + 1])
39251 vperm = gen_rtx_CONST_VECTOR (V32QImode,
39252 gen_rtvec_v (32, rperm[2 * i + 1]));
39253 vperm = force_reg (V32QImode, vperm);
39254 h[i] = gen_reg_rtx (V32QImode);
39255 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
39256 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
39259 /* Swap the 128-byte lanes of h[X]. */
39260 for (i = 0; i < 2; ++i)
39262 if (h[i] == NULL_RTX)
39264 op = gen_reg_rtx (V4DImode);
39265 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
39266 const2_rtx, GEN_INT (3), const0_rtx,
39268 h[i] = gen_lowpart (V32QImode, op);
39271 for (i = 0; i < 2; ++i)
39278 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
39279 vperm = force_reg (V32QImode, vperm);
39280 l[i] = gen_reg_rtx (V32QImode);
39281 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
39282 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
39285 for (i = 0; i < 2; ++i)
39289 op = gen_reg_rtx (V32QImode);
39290 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
39297 gcc_assert (l[0] && l[1]);
39298 op = gen_lowpart (V32QImode, d->target);
39299 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
39303 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
39304 With all of the interface bits taken care of, perform the expansion
39305 in D and return true on success. */
39308 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
39310 /* Try a single instruction expansion. */
39311 if (expand_vec_perm_1 (d))
39314 /* Try sequences of two instructions. */
39316 if (expand_vec_perm_pshuflw_pshufhw (d))
39319 if (expand_vec_perm_palignr (d))
39322 if (expand_vec_perm_interleave2 (d))
39325 if (expand_vec_perm_broadcast (d))
39328 if (expand_vec_perm_vpermq_perm_1 (d))
39331 if (expand_vec_perm_vperm2f128 (d))
39334 /* Try sequences of three instructions. */
39336 if (expand_vec_perm_2vperm2f128_vshuf (d))
39339 if (expand_vec_perm_pshufb2 (d))
39342 if (expand_vec_perm_interleave3 (d))
39345 if (expand_vec_perm_vperm2f128_vblend (d))
39348 /* Try sequences of four instructions. */
39350 if (expand_vec_perm_vpshufb2_vpermq (d))
39353 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
39356 /* ??? Look for narrow permutations whose element orderings would
39357 allow the promotion to a wider mode. */
39359 /* ??? Look for sequences of interleave or a wider permute that place
39360 the data into the correct lanes for a half-vector shuffle like
39361 pshuf[lh]w or vpermilps. */
39363 /* ??? Look for sequences of interleave that produce the desired results.
39364 The combinatorics of punpck[lh] get pretty ugly... */
39366 if (expand_vec_perm_even_odd (d))
39369 /* Even longer sequences. */
39370 if (expand_vec_perm_vpshufb4_vpermq2 (d))
39376 /* If a permutation only uses one operand, make it clear. Returns true
39377 if the permutation references both operands. */
39380 canonicalize_perm (struct expand_vec_perm_d *d)
39382 int i, which, nelt = d->nelt;
39384 for (i = which = 0; i < nelt; ++i)
39385 which |= (d->perm[i] < nelt ? 1 : 2);
39387 d->one_operand_p = true;
39394 if (!rtx_equal_p (d->op0, d->op1))
39396 d->one_operand_p = false;
39399 /* The elements of PERM do not suggest that only the first operand
39400 is used, but both operands are identical. Allow easier matching
39401 of the permutation by folding the permutation into the single
39406 for (i = 0; i < nelt; ++i)
39407 d->perm[i] &= nelt - 1;
39416 return (which == 3);
39420 ix86_expand_vec_perm_const (rtx operands[4])
39422 struct expand_vec_perm_d d;
39423 unsigned char perm[MAX_VECT_LEN];
39428 d.target = operands[0];
39429 d.op0 = operands[1];
39430 d.op1 = operands[2];
39433 d.vmode = GET_MODE (d.target);
39434 gcc_assert (VECTOR_MODE_P (d.vmode));
39435 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
39436 d.testing_p = false;
39438 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
39439 gcc_assert (XVECLEN (sel, 0) == nelt);
39440 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
39442 for (i = 0; i < nelt; ++i)
39444 rtx e = XVECEXP (sel, 0, i);
39445 int ei = INTVAL (e) & (2 * nelt - 1);
39450 two_args = canonicalize_perm (&d);
39452 if (ix86_expand_vec_perm_const_1 (&d))
39455 /* If the selector says both arguments are needed, but the operands are the
39456 same, the above tried to expand with one_operand_p and flattened selector.
39457 If that didn't work, retry without one_operand_p; we succeeded with that
39459 if (two_args && d.one_operand_p)
39461 d.one_operand_p = false;
39462 memcpy (d.perm, perm, sizeof (perm));
39463 return ix86_expand_vec_perm_const_1 (&d);
39469 /* Implement targetm.vectorize.vec_perm_const_ok. */
39472 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
39473 const unsigned char *sel)
39475 struct expand_vec_perm_d d;
39476 unsigned int i, nelt, which;
39480 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
39481 d.testing_p = true;
39483 /* Given sufficient ISA support we can just return true here
39484 for selected vector modes. */
39485 if (GET_MODE_SIZE (d.vmode) == 16)
39487 /* All implementable with a single vpperm insn. */
39490 /* All implementable with 2 pshufb + 1 ior. */
39493 /* All implementable with shufpd or unpck[lh]pd. */
39498 /* Extract the values from the vector CST into the permutation
39500 memcpy (d.perm, sel, nelt);
39501 for (i = which = 0; i < nelt; ++i)
39503 unsigned char e = d.perm[i];
39504 gcc_assert (e < 2 * nelt);
39505 which |= (e < nelt ? 1 : 2);
39508 /* For all elements from second vector, fold the elements to first. */
39510 for (i = 0; i < nelt; ++i)
39513 /* Check whether the mask can be applied to the vector type. */
39514 d.one_operand_p = (which != 3);
39516 /* Implementable with shufps or pshufd. */
39517 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
39520 /* Otherwise we have to go through the motions and see if we can
39521 figure out how to generate the requested permutation. */
39522 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
39523 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
39524 if (!d.one_operand_p)
39525 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
39528 ret = ix86_expand_vec_perm_const_1 (&d);
39535 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
39537 struct expand_vec_perm_d d;
39543 d.vmode = GET_MODE (targ);
39544 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
39545 d.one_operand_p = false;
39546 d.testing_p = false;
39548 for (i = 0; i < nelt; ++i)
39549 d.perm[i] = i * 2 + odd;
39551 /* We'll either be able to implement the permutation directly... */
39552 if (expand_vec_perm_1 (&d))
39555 /* ... or we use the special-case patterns. */
39556 expand_vec_perm_even_odd_1 (&d, odd);
39560 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
39562 struct expand_vec_perm_d d;
39563 unsigned i, nelt, base;
39569 d.vmode = GET_MODE (targ);
39570 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
39571 d.one_operand_p = false;
39572 d.testing_p = false;
39574 base = high_p ? nelt / 2 : 0;
39575 for (i = 0; i < nelt / 2; ++i)
39577 d.perm[i * 2] = i + base;
39578 d.perm[i * 2 + 1] = i + base + nelt;
39581 /* Note that for AVX this isn't one instruction. */
39582 ok = ix86_expand_vec_perm_const_1 (&d);
39587 /* Expand a vector operation CODE for a V*QImode in terms of the
39588 same operation on V*HImode. */
39591 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
39593 enum machine_mode qimode = GET_MODE (dest);
39594 enum machine_mode himode;
39595 rtx (*gen_il) (rtx, rtx, rtx);
39596 rtx (*gen_ih) (rtx, rtx, rtx);
39597 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
39598 struct expand_vec_perm_d d;
39599 bool ok, full_interleave;
39600 bool uns_p = false;
39607 gen_il = gen_vec_interleave_lowv16qi;
39608 gen_ih = gen_vec_interleave_highv16qi;
39611 himode = V16HImode;
39612 gen_il = gen_avx2_interleave_lowv32qi;
39613 gen_ih = gen_avx2_interleave_highv32qi;
39616 gcc_unreachable ();
39619 op2_l = op2_h = op2;
39623 /* Unpack data such that we've got a source byte in each low byte of
39624 each word. We don't care what goes into the high byte of each word.
39625 Rather than trying to get zero in there, most convenient is to let
39626 it be a copy of the low byte. */
39627 op2_l = gen_reg_rtx (qimode);
39628 op2_h = gen_reg_rtx (qimode);
39629 emit_insn (gen_il (op2_l, op2, op2));
39630 emit_insn (gen_ih (op2_h, op2, op2));
39633 op1_l = gen_reg_rtx (qimode);
39634 op1_h = gen_reg_rtx (qimode);
39635 emit_insn (gen_il (op1_l, op1, op1));
39636 emit_insn (gen_ih (op1_h, op1, op1));
39637 full_interleave = qimode == V16QImode;
39645 op1_l = gen_reg_rtx (himode);
39646 op1_h = gen_reg_rtx (himode);
39647 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
39648 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
39649 full_interleave = true;
39652 gcc_unreachable ();
39655 /* Perform the operation. */
39656 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
39658 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
39660 gcc_assert (res_l && res_h);
39662 /* Merge the data back into the right place. */
39664 d.op0 = gen_lowpart (qimode, res_l);
39665 d.op1 = gen_lowpart (qimode, res_h);
39667 d.nelt = GET_MODE_NUNITS (qimode);
39668 d.one_operand_p = false;
39669 d.testing_p = false;
39671 if (full_interleave)
39673 /* For SSE2, we used an full interleave, so the desired
39674 results are in the even elements. */
39675 for (i = 0; i < 32; ++i)
39680 /* For AVX, the interleave used above was not cross-lane. So the
39681 extraction is evens but with the second and third quarter swapped.
39682 Happily, that is even one insn shorter than even extraction. */
39683 for (i = 0; i < 32; ++i)
39684 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
39687 ok = ix86_expand_vec_perm_const_1 (&d);
39690 set_unique_reg_note (get_last_insn (), REG_EQUAL,
39691 gen_rtx_fmt_ee (code, qimode, op1, op2));
39695 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
39696 bool uns_p, bool odd_p)
39698 enum machine_mode mode = GET_MODE (op1);
39699 enum machine_mode wmode = GET_MODE (dest);
39702 /* We only play even/odd games with vectors of SImode. */
39703 gcc_assert (mode == V4SImode || mode == V8SImode);
39705 /* If we're looking for the odd results, shift those members down to
39706 the even slots. For some cpus this is faster than a PSHUFD. */
39709 if (TARGET_XOP && mode == V4SImode)
39711 x = force_reg (wmode, CONST0_RTX (wmode));
39712 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
39716 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
39717 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
39718 x, NULL, 1, OPTAB_DIRECT);
39719 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
39720 x, NULL, 1, OPTAB_DIRECT);
39721 op1 = gen_lowpart (mode, op1);
39722 op2 = gen_lowpart (mode, op2);
39725 if (mode == V8SImode)
39728 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
39730 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
39733 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
39734 else if (TARGET_SSE4_1)
39735 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
39738 rtx s1, s2, t0, t1, t2;
39740 /* The easiest way to implement this without PMULDQ is to go through
39741 the motions as if we are performing a full 64-bit multiply. With
39742 the exception that we need to do less shuffling of the elements. */
39744 /* Compute the sign-extension, aka highparts, of the two operands. */
39745 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
39746 op1, pc_rtx, pc_rtx);
39747 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
39748 op2, pc_rtx, pc_rtx);
39750 /* Multiply LO(A) * HI(B), and vice-versa. */
39751 t1 = gen_reg_rtx (wmode);
39752 t2 = gen_reg_rtx (wmode);
39753 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
39754 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
39756 /* Multiply LO(A) * LO(B). */
39757 t0 = gen_reg_rtx (wmode);
39758 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
39760 /* Combine and shift the highparts into place. */
39761 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
39762 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
39765 /* Combine high and low parts. */
39766 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
39773 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
39774 bool uns_p, bool high_p)
39776 enum machine_mode wmode = GET_MODE (dest);
39777 enum machine_mode mode = GET_MODE (op1);
39778 rtx t1, t2, t3, t4, mask;
39783 t1 = gen_reg_rtx (mode);
39784 t2 = gen_reg_rtx (mode);
39785 if (TARGET_XOP && !uns_p)
39787 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
39788 shuffle the elements once so that all elements are in the right
39789 place for immediate use: { A C B D }. */
39790 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
39791 const1_rtx, GEN_INT (3)));
39792 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
39793 const1_rtx, GEN_INT (3)));
39797 /* Put the elements into place for the multiply. */
39798 ix86_expand_vec_interleave (t1, op1, op1, high_p);
39799 ix86_expand_vec_interleave (t2, op2, op2, high_p);
39802 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
39806 /* Shuffle the elements between the lanes. After this we
39807 have { A B E F | C D G H } for each operand. */
39808 t1 = gen_reg_rtx (V4DImode);
39809 t2 = gen_reg_rtx (V4DImode);
39810 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
39811 const0_rtx, const2_rtx,
39812 const1_rtx, GEN_INT (3)));
39813 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
39814 const0_rtx, const2_rtx,
39815 const1_rtx, GEN_INT (3)));
39817 /* Shuffle the elements within the lanes. After this we
39818 have { A A B B | C C D D } or { E E F F | G G H H }. */
39819 t3 = gen_reg_rtx (V8SImode);
39820 t4 = gen_reg_rtx (V8SImode);
39821 mask = GEN_INT (high_p
39822 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
39823 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
39824 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
39825 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
39827 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
39832 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
39833 uns_p, OPTAB_DIRECT);
39834 t2 = expand_binop (mode,
39835 uns_p ? umul_highpart_optab : smul_highpart_optab,
39836 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
39837 gcc_assert (t1 && t2);
39839 ix86_expand_vec_interleave (gen_lowpart (mode, dest), t1, t2, high_p);
39844 t1 = gen_reg_rtx (wmode);
39845 t2 = gen_reg_rtx (wmode);
39846 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
39847 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
39849 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
39853 gcc_unreachable ();
39858 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
39862 res_1 = gen_reg_rtx (V4SImode);
39863 res_2 = gen_reg_rtx (V4SImode);
39864 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode, res_1),
39865 op1, op2, true, false);
39866 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode, res_2),
39867 op1, op2, true, true);
39869 /* Move the results in element 2 down to element 1; we don't care
39870 what goes in elements 2 and 3. Then we can merge the parts
39871 back together with an interleave.
39873 Note that two other sequences were tried:
39874 (1) Use interleaves at the start instead of psrldq, which allows
39875 us to use a single shufps to merge things back at the end.
39876 (2) Use shufps here to combine the two vectors, then pshufd to
39877 put the elements in the correct order.
39878 In both cases the cost of the reformatting stall was too high
39879 and the overall sequence slower. */
39881 emit_insn (gen_sse2_pshufd_1 (res_1, res_1, const0_rtx, const2_rtx,
39882 const0_rtx, const0_rtx));
39883 emit_insn (gen_sse2_pshufd_1 (res_2, res_2, const0_rtx, const2_rtx,
39884 const0_rtx, const0_rtx));
39885 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
39887 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
39891 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
39893 enum machine_mode mode = GET_MODE (op0);
39894 rtx t1, t2, t3, t4, t5, t6;
39896 if (TARGET_XOP && mode == V2DImode)
39898 /* op1: A,B,C,D, op2: E,F,G,H */
39899 op1 = gen_lowpart (V4SImode, op1);
39900 op2 = gen_lowpart (V4SImode, op2);
39902 t1 = gen_reg_rtx (V4SImode);
39903 t2 = gen_reg_rtx (V4SImode);
39904 t3 = gen_reg_rtx (V2DImode);
39905 t4 = gen_reg_rtx (V2DImode);
39908 emit_insn (gen_sse2_pshufd_1 (t1, op1,
39914 /* t2: (B*E),(A*F),(D*G),(C*H) */
39915 emit_insn (gen_mulv4si3 (t2, t1, op2));
39917 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
39918 emit_insn (gen_xop_phadddq (t3, t2));
39920 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
39921 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
39923 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
39924 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
39928 enum machine_mode nmode;
39929 rtx (*umul) (rtx, rtx, rtx);
39931 if (mode == V2DImode)
39933 umul = gen_vec_widen_umult_even_v4si;
39936 else if (mode == V4DImode)
39938 umul = gen_vec_widen_umult_even_v8si;
39942 gcc_unreachable ();
39945 /* Multiply low parts. */
39946 t1 = gen_reg_rtx (mode);
39947 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
39949 /* Shift input vectors right 32 bits so we can multiply high parts. */
39951 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
39952 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
39954 /* Multiply high parts by low parts. */
39955 t4 = gen_reg_rtx (mode);
39956 t5 = gen_reg_rtx (mode);
39957 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
39958 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
39960 /* Combine and shift the highparts back. */
39961 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
39962 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
39964 /* Combine high and low parts. */
39965 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
39968 set_unique_reg_note (get_last_insn (), REG_EQUAL,
39969 gen_rtx_MULT (mode, op1, op2));
39972 /* Expand an insert into a vector register through pinsr insn.
39973 Return true if successful. */
39976 ix86_expand_pinsr (rtx *operands)
39978 rtx dst = operands[0];
39979 rtx src = operands[3];
39981 unsigned int size = INTVAL (operands[1]);
39982 unsigned int pos = INTVAL (operands[2]);
39984 if (GET_CODE (dst) == SUBREG)
39986 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
39987 dst = SUBREG_REG (dst);
39990 if (GET_CODE (src) == SUBREG)
39991 src = SUBREG_REG (src);
39993 switch (GET_MODE (dst))
40000 enum machine_mode srcmode, dstmode;
40001 rtx (*pinsr)(rtx, rtx, rtx, rtx);
40003 srcmode = mode_for_size (size, MODE_INT, 0);
40008 if (!TARGET_SSE4_1)
40010 dstmode = V16QImode;
40011 pinsr = gen_sse4_1_pinsrb;
40017 dstmode = V8HImode;
40018 pinsr = gen_sse2_pinsrw;
40022 if (!TARGET_SSE4_1)
40024 dstmode = V4SImode;
40025 pinsr = gen_sse4_1_pinsrd;
40029 gcc_assert (TARGET_64BIT);
40030 if (!TARGET_SSE4_1)
40032 dstmode = V2DImode;
40033 pinsr = gen_sse4_1_pinsrq;
40040 dst = gen_lowpart (dstmode, dst);
40041 src = gen_lowpart (srcmode, src);
40045 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
40054 /* This function returns the calling abi specific va_list type node.
40055 It returns the FNDECL specific va_list type. */
40058 ix86_fn_abi_va_list (tree fndecl)
40061 return va_list_type_node;
40062 gcc_assert (fndecl != NULL_TREE);
40064 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
40065 return ms_va_list_type_node;
40067 return sysv_va_list_type_node;
40070 /* Returns the canonical va_list type specified by TYPE. If there
40071 is no valid TYPE provided, it return NULL_TREE. */
40074 ix86_canonical_va_list_type (tree type)
40078 /* Resolve references and pointers to va_list type. */
40079 if (TREE_CODE (type) == MEM_REF)
40080 type = TREE_TYPE (type);
40081 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
40082 type = TREE_TYPE (type);
40083 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
40084 type = TREE_TYPE (type);
40086 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
40088 wtype = va_list_type_node;
40089 gcc_assert (wtype != NULL_TREE);
40091 if (TREE_CODE (wtype) == ARRAY_TYPE)
40093 /* If va_list is an array type, the argument may have decayed
40094 to a pointer type, e.g. by being passed to another function.
40095 In that case, unwrap both types so that we can compare the
40096 underlying records. */
40097 if (TREE_CODE (htype) == ARRAY_TYPE
40098 || POINTER_TYPE_P (htype))
40100 wtype = TREE_TYPE (wtype);
40101 htype = TREE_TYPE (htype);
40104 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
40105 return va_list_type_node;
40106 wtype = sysv_va_list_type_node;
40107 gcc_assert (wtype != NULL_TREE);
40109 if (TREE_CODE (wtype) == ARRAY_TYPE)
40111 /* If va_list is an array type, the argument may have decayed
40112 to a pointer type, e.g. by being passed to another function.
40113 In that case, unwrap both types so that we can compare the
40114 underlying records. */
40115 if (TREE_CODE (htype) == ARRAY_TYPE
40116 || POINTER_TYPE_P (htype))
40118 wtype = TREE_TYPE (wtype);
40119 htype = TREE_TYPE (htype);
40122 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
40123 return sysv_va_list_type_node;
40124 wtype = ms_va_list_type_node;
40125 gcc_assert (wtype != NULL_TREE);
40127 if (TREE_CODE (wtype) == ARRAY_TYPE)
40129 /* If va_list is an array type, the argument may have decayed
40130 to a pointer type, e.g. by being passed to another function.
40131 In that case, unwrap both types so that we can compare the
40132 underlying records. */
40133 if (TREE_CODE (htype) == ARRAY_TYPE
40134 || POINTER_TYPE_P (htype))
40136 wtype = TREE_TYPE (wtype);
40137 htype = TREE_TYPE (htype);
40140 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
40141 return ms_va_list_type_node;
40144 return std_canonical_va_list_type (type);
40147 /* Iterate through the target-specific builtin types for va_list.
40148 IDX denotes the iterator, *PTREE is set to the result type of
40149 the va_list builtin, and *PNAME to its internal type.
40150 Returns zero if there is no element for this index, otherwise
40151 IDX should be increased upon the next call.
40152 Note, do not iterate a base builtin's name like __builtin_va_list.
40153 Used from c_common_nodes_and_builtins. */
40156 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
40166 *ptree = ms_va_list_type_node;
40167 *pname = "__builtin_ms_va_list";
40171 *ptree = sysv_va_list_type_node;
40172 *pname = "__builtin_sysv_va_list";
40180 #undef TARGET_SCHED_DISPATCH
40181 #define TARGET_SCHED_DISPATCH has_dispatch
40182 #undef TARGET_SCHED_DISPATCH_DO
40183 #define TARGET_SCHED_DISPATCH_DO do_dispatch
40184 #undef TARGET_SCHED_REASSOCIATION_WIDTH
40185 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
40186 #undef TARGET_SCHED_REORDER
40187 #define TARGET_SCHED_REORDER ix86_sched_reorder
40188 #undef TARGET_SCHED_ADJUST_PRIORITY
40189 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
40190 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
40191 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ix86_dependencies_evaluation_hook
40193 /* The size of the dispatch window is the total number of bytes of
40194 object code allowed in a window. */
40195 #define DISPATCH_WINDOW_SIZE 16
40197 /* Number of dispatch windows considered for scheduling. */
40198 #define MAX_DISPATCH_WINDOWS 3
40200 /* Maximum number of instructions in a window. */
40203 /* Maximum number of immediate operands in a window. */
40206 /* Maximum number of immediate bits allowed in a window. */
40207 #define MAX_IMM_SIZE 128
40209 /* Maximum number of 32 bit immediates allowed in a window. */
40210 #define MAX_IMM_32 4
40212 /* Maximum number of 64 bit immediates allowed in a window. */
40213 #define MAX_IMM_64 2
40215 /* Maximum total of loads or prefetches allowed in a window. */
40218 /* Maximum total of stores allowed in a window. */
40219 #define MAX_STORE 1
40225 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
40226 enum dispatch_group {
40241 /* Number of allowable groups in a dispatch window. It is an array
40242 indexed by dispatch_group enum. 100 is used as a big number,
40243 because the number of these kind of operations does not have any
40244 effect in dispatch window, but we need them for other reasons in
40246 static unsigned int num_allowable_groups[disp_last] = {
40247 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
40250 char group_name[disp_last + 1][16] = {
40251 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
40252 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
40253 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
40256 /* Instruction path. */
40259 path_single, /* Single micro op. */
40260 path_double, /* Double micro op. */
40261 path_multi, /* Instructions with more than 2 micro op.. */
40265 /* sched_insn_info defines a window to the instructions scheduled in
40266 the basic block. It contains a pointer to the insn_info table and
40267 the instruction scheduled.
40269 Windows are allocated for each basic block and are linked
40271 typedef struct sched_insn_info_s {
40273 enum dispatch_group group;
40274 enum insn_path path;
40279 /* Linked list of dispatch windows. This is a two way list of
40280 dispatch windows of a basic block. It contains information about
40281 the number of uops in the window and the total number of
40282 instructions and of bytes in the object code for this dispatch
40284 typedef struct dispatch_windows_s {
40285 int num_insn; /* Number of insn in the window. */
40286 int num_uops; /* Number of uops in the window. */
40287 int window_size; /* Number of bytes in the window. */
40288 int window_num; /* Window number between 0 or 1. */
40289 int num_imm; /* Number of immediates in an insn. */
40290 int num_imm_32; /* Number of 32 bit immediates in an insn. */
40291 int num_imm_64; /* Number of 64 bit immediates in an insn. */
40292 int imm_size; /* Total immediates in the window. */
40293 int num_loads; /* Total memory loads in the window. */
40294 int num_stores; /* Total memory stores in the window. */
40295 int violation; /* Violation exists in window. */
40296 sched_insn_info *window; /* Pointer to the window. */
40297 struct dispatch_windows_s *next;
40298 struct dispatch_windows_s *prev;
40299 } dispatch_windows;
40301 /* Immediate valuse used in an insn. */
40302 typedef struct imm_info_s
40309 static dispatch_windows *dispatch_window_list;
40310 static dispatch_windows *dispatch_window_list1;
40312 /* Get dispatch group of insn. */
40314 static enum dispatch_group
40315 get_mem_group (rtx insn)
40317 enum attr_memory memory;
40319 if (INSN_CODE (insn) < 0)
40320 return disp_no_group;
40321 memory = get_attr_memory (insn);
40322 if (memory == MEMORY_STORE)
40325 if (memory == MEMORY_LOAD)
40328 if (memory == MEMORY_BOTH)
40329 return disp_load_store;
40331 return disp_no_group;
40334 /* Return true if insn is a compare instruction. */
40339 enum attr_type type;
40341 type = get_attr_type (insn);
40342 return (type == TYPE_TEST
40343 || type == TYPE_ICMP
40344 || type == TYPE_FCMP
40345 || GET_CODE (PATTERN (insn)) == COMPARE);
40348 /* Return true if a dispatch violation encountered. */
40351 dispatch_violation (void)
40353 if (dispatch_window_list->next)
40354 return dispatch_window_list->next->violation;
40355 return dispatch_window_list->violation;
40358 /* Return true if insn is a branch instruction. */
40361 is_branch (rtx insn)
40363 return (CALL_P (insn) || JUMP_P (insn));
40366 /* Return true if insn is a prefetch instruction. */
40369 is_prefetch (rtx insn)
40371 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
40374 /* This function initializes a dispatch window and the list container holding a
40375 pointer to the window. */
40378 init_window (int window_num)
40381 dispatch_windows *new_list;
40383 if (window_num == 0)
40384 new_list = dispatch_window_list;
40386 new_list = dispatch_window_list1;
40388 new_list->num_insn = 0;
40389 new_list->num_uops = 0;
40390 new_list->window_size = 0;
40391 new_list->next = NULL;
40392 new_list->prev = NULL;
40393 new_list->window_num = window_num;
40394 new_list->num_imm = 0;
40395 new_list->num_imm_32 = 0;
40396 new_list->num_imm_64 = 0;
40397 new_list->imm_size = 0;
40398 new_list->num_loads = 0;
40399 new_list->num_stores = 0;
40400 new_list->violation = false;
40402 for (i = 0; i < MAX_INSN; i++)
40404 new_list->window[i].insn = NULL;
40405 new_list->window[i].group = disp_no_group;
40406 new_list->window[i].path = no_path;
40407 new_list->window[i].byte_len = 0;
40408 new_list->window[i].imm_bytes = 0;
40413 /* This function allocates and initializes a dispatch window and the
40414 list container holding a pointer to the window. */
40416 static dispatch_windows *
40417 allocate_window (void)
40419 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
40420 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
40425 /* This routine initializes the dispatch scheduling information. It
40426 initiates building dispatch scheduler tables and constructs the
40427 first dispatch window. */
40430 init_dispatch_sched (void)
40432 /* Allocate a dispatch list and a window. */
40433 dispatch_window_list = allocate_window ();
40434 dispatch_window_list1 = allocate_window ();
40439 /* This function returns true if a branch is detected. End of a basic block
40440 does not have to be a branch, but here we assume only branches end a
40444 is_end_basic_block (enum dispatch_group group)
40446 return group == disp_branch;
40449 /* This function is called when the end of a window processing is reached. */
40452 process_end_window (void)
40454 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
40455 if (dispatch_window_list->next)
40457 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
40458 gcc_assert (dispatch_window_list->window_size
40459 + dispatch_window_list1->window_size <= 48);
40465 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
40466 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
40467 for 48 bytes of instructions. Note that these windows are not dispatch
40468 windows that their sizes are DISPATCH_WINDOW_SIZE. */
40470 static dispatch_windows *
40471 allocate_next_window (int window_num)
40473 if (window_num == 0)
40475 if (dispatch_window_list->next)
40478 return dispatch_window_list;
40481 dispatch_window_list->next = dispatch_window_list1;
40482 dispatch_window_list1->prev = dispatch_window_list;
40484 return dispatch_window_list1;
40487 /* Increment the number of immediate operands of an instruction. */
40490 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
40495 switch ( GET_CODE (*in_rtx))
40500 (imm_values->imm)++;
40501 if (x86_64_immediate_operand (*in_rtx, SImode))
40502 (imm_values->imm32)++;
40504 (imm_values->imm64)++;
40508 (imm_values->imm)++;
40509 (imm_values->imm64)++;
40513 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
40515 (imm_values->imm)++;
40516 (imm_values->imm32)++;
40527 /* Compute number of immediate operands of an instruction. */
40530 find_constant (rtx in_rtx, imm_info *imm_values)
40532 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
40533 (rtx_function) find_constant_1, (void *) imm_values);
40536 /* Return total size of immediate operands of an instruction along with number
40537 of corresponding immediate-operands. It initializes its parameters to zero
40538 befor calling FIND_CONSTANT.
40539 INSN is the input instruction. IMM is the total of immediates.
40540 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
40544 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
40546 imm_info imm_values = {0, 0, 0};
40548 find_constant (insn, &imm_values);
40549 *imm = imm_values.imm;
40550 *imm32 = imm_values.imm32;
40551 *imm64 = imm_values.imm64;
40552 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
40555 /* This function indicates if an operand of an instruction is an
40559 has_immediate (rtx insn)
40561 int num_imm_operand;
40562 int num_imm32_operand;
40563 int num_imm64_operand;
40566 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
40567 &num_imm64_operand);
40571 /* Return single or double path for instructions. */
40573 static enum insn_path
40574 get_insn_path (rtx insn)
40576 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
40578 if ((int)path == 0)
40579 return path_single;
40581 if ((int)path == 1)
40582 return path_double;
40587 /* Return insn dispatch group. */
40589 static enum dispatch_group
40590 get_insn_group (rtx insn)
40592 enum dispatch_group group = get_mem_group (insn);
40596 if (is_branch (insn))
40597 return disp_branch;
40602 if (has_immediate (insn))
40605 if (is_prefetch (insn))
40606 return disp_prefetch;
40608 return disp_no_group;
40611 /* Count number of GROUP restricted instructions in a dispatch
40612 window WINDOW_LIST. */
40615 count_num_restricted (rtx insn, dispatch_windows *window_list)
40617 enum dispatch_group group = get_insn_group (insn);
40619 int num_imm_operand;
40620 int num_imm32_operand;
40621 int num_imm64_operand;
40623 if (group == disp_no_group)
40626 if (group == disp_imm)
40628 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
40629 &num_imm64_operand);
40630 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
40631 || num_imm_operand + window_list->num_imm > MAX_IMM
40632 || (num_imm32_operand > 0
40633 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
40634 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
40635 || (num_imm64_operand > 0
40636 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
40637 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
40638 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
40639 && num_imm64_operand > 0
40640 && ((window_list->num_imm_64 > 0
40641 && window_list->num_insn >= 2)
40642 || window_list->num_insn >= 3)))
40648 if ((group == disp_load_store
40649 && (window_list->num_loads >= MAX_LOAD
40650 || window_list->num_stores >= MAX_STORE))
40651 || ((group == disp_load
40652 || group == disp_prefetch)
40653 && window_list->num_loads >= MAX_LOAD)
40654 || (group == disp_store
40655 && window_list->num_stores >= MAX_STORE))
40661 /* This function returns true if insn satisfies dispatch rules on the
40662 last window scheduled. */
40665 fits_dispatch_window (rtx insn)
40667 dispatch_windows *window_list = dispatch_window_list;
40668 dispatch_windows *window_list_next = dispatch_window_list->next;
40669 unsigned int num_restrict;
40670 enum dispatch_group group = get_insn_group (insn);
40671 enum insn_path path = get_insn_path (insn);
40674 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
40675 instructions should be given the lowest priority in the
40676 scheduling process in Haifa scheduler to make sure they will be
40677 scheduled in the same dispatch window as the reference to them. */
40678 if (group == disp_jcc || group == disp_cmp)
40681 /* Check nonrestricted. */
40682 if (group == disp_no_group || group == disp_branch)
40685 /* Get last dispatch window. */
40686 if (window_list_next)
40687 window_list = window_list_next;
40689 if (window_list->window_num == 1)
40691 sum = window_list->prev->window_size + window_list->window_size;
40694 || (min_insn_size (insn) + sum) >= 48)
40695 /* Window 1 is full. Go for next window. */
40699 num_restrict = count_num_restricted (insn, window_list);
40701 if (num_restrict > num_allowable_groups[group])
40704 /* See if it fits in the first window. */
40705 if (window_list->window_num == 0)
40707 /* The first widow should have only single and double path
40709 if (path == path_double
40710 && (window_list->num_uops + 2) > MAX_INSN)
40712 else if (path != path_single)
40718 /* Add an instruction INSN with NUM_UOPS micro-operations to the
40719 dispatch window WINDOW_LIST. */
40722 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
40724 int byte_len = min_insn_size (insn);
40725 int num_insn = window_list->num_insn;
40727 sched_insn_info *window = window_list->window;
40728 enum dispatch_group group = get_insn_group (insn);
40729 enum insn_path path = get_insn_path (insn);
40730 int num_imm_operand;
40731 int num_imm32_operand;
40732 int num_imm64_operand;
40734 if (!window_list->violation && group != disp_cmp
40735 && !fits_dispatch_window (insn))
40736 window_list->violation = true;
40738 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
40739 &num_imm64_operand);
40741 /* Initialize window with new instruction. */
40742 window[num_insn].insn = insn;
40743 window[num_insn].byte_len = byte_len;
40744 window[num_insn].group = group;
40745 window[num_insn].path = path;
40746 window[num_insn].imm_bytes = imm_size;
40748 window_list->window_size += byte_len;
40749 window_list->num_insn = num_insn + 1;
40750 window_list->num_uops = window_list->num_uops + num_uops;
40751 window_list->imm_size += imm_size;
40752 window_list->num_imm += num_imm_operand;
40753 window_list->num_imm_32 += num_imm32_operand;
40754 window_list->num_imm_64 += num_imm64_operand;
40756 if (group == disp_store)
40757 window_list->num_stores += 1;
40758 else if (group == disp_load
40759 || group == disp_prefetch)
40760 window_list->num_loads += 1;
40761 else if (group == disp_load_store)
40763 window_list->num_stores += 1;
40764 window_list->num_loads += 1;
40768 /* Adds a scheduled instruction, INSN, to the current dispatch window.
40769 If the total bytes of instructions or the number of instructions in
40770 the window exceed allowable, it allocates a new window. */
40773 add_to_dispatch_window (rtx insn)
40776 dispatch_windows *window_list;
40777 dispatch_windows *next_list;
40778 dispatch_windows *window0_list;
40779 enum insn_path path;
40780 enum dispatch_group insn_group;
40788 if (INSN_CODE (insn) < 0)
40791 byte_len = min_insn_size (insn);
40792 window_list = dispatch_window_list;
40793 next_list = window_list->next;
40794 path = get_insn_path (insn);
40795 insn_group = get_insn_group (insn);
40797 /* Get the last dispatch window. */
40799 window_list = dispatch_window_list->next;
40801 if (path == path_single)
40803 else if (path == path_double)
40806 insn_num_uops = (int) path;
40808 /* If current window is full, get a new window.
40809 Window number zero is full, if MAX_INSN uops are scheduled in it.
40810 Window number one is full, if window zero's bytes plus window
40811 one's bytes is 32, or if the bytes of the new instruction added
40812 to the total makes it greater than 48, or it has already MAX_INSN
40813 instructions in it. */
40814 num_insn = window_list->num_insn;
40815 num_uops = window_list->num_uops;
40816 window_num = window_list->window_num;
40817 insn_fits = fits_dispatch_window (insn);
40819 if (num_insn >= MAX_INSN
40820 || num_uops + insn_num_uops > MAX_INSN
40823 window_num = ~window_num & 1;
40824 window_list = allocate_next_window (window_num);
40827 if (window_num == 0)
40829 add_insn_window (insn, window_list, insn_num_uops);
40830 if (window_list->num_insn >= MAX_INSN
40831 && insn_group == disp_branch)
40833 process_end_window ();
40837 else if (window_num == 1)
40839 window0_list = window_list->prev;
40840 sum = window0_list->window_size + window_list->window_size;
40842 || (byte_len + sum) >= 48)
40844 process_end_window ();
40845 window_list = dispatch_window_list;
40848 add_insn_window (insn, window_list, insn_num_uops);
40851 gcc_unreachable ();
40853 if (is_end_basic_block (insn_group))
40855 /* End of basic block is reached do end-basic-block process. */
40856 process_end_window ();
40861 /* Print the dispatch window, WINDOW_NUM, to FILE. */
40863 DEBUG_FUNCTION static void
40864 debug_dispatch_window_file (FILE *file, int window_num)
40866 dispatch_windows *list;
40869 if (window_num == 0)
40870 list = dispatch_window_list;
40872 list = dispatch_window_list1;
40874 fprintf (file, "Window #%d:\n", list->window_num);
40875 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
40876 list->num_insn, list->num_uops, list->window_size);
40877 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
40878 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
40880 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
40882 fprintf (file, " insn info:\n");
40884 for (i = 0; i < MAX_INSN; i++)
40886 if (!list->window[i].insn)
40888 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
40889 i, group_name[list->window[i].group],
40890 i, (void *)list->window[i].insn,
40891 i, list->window[i].path,
40892 i, list->window[i].byte_len,
40893 i, list->window[i].imm_bytes);
40897 /* Print to stdout a dispatch window. */
40899 DEBUG_FUNCTION void
40900 debug_dispatch_window (int window_num)
40902 debug_dispatch_window_file (stdout, window_num);
40905 /* Print INSN dispatch information to FILE. */
40907 DEBUG_FUNCTION static void
40908 debug_insn_dispatch_info_file (FILE *file, rtx insn)
40911 enum insn_path path;
40912 enum dispatch_group group;
40914 int num_imm_operand;
40915 int num_imm32_operand;
40916 int num_imm64_operand;
40918 if (INSN_CODE (insn) < 0)
40921 byte_len = min_insn_size (insn);
40922 path = get_insn_path (insn);
40923 group = get_insn_group (insn);
40924 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
40925 &num_imm64_operand);
40927 fprintf (file, " insn info:\n");
40928 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
40929 group_name[group], path, byte_len);
40930 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
40931 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
40934 /* Print to STDERR the status of the ready list with respect to
40935 dispatch windows. */
40937 DEBUG_FUNCTION void
40938 debug_ready_dispatch (void)
40941 int no_ready = number_in_ready ();
40943 fprintf (stdout, "Number of ready: %d\n", no_ready);
40945 for (i = 0; i < no_ready; i++)
40946 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
40949 /* This routine is the driver of the dispatch scheduler. */
40952 do_dispatch (rtx insn, int mode)
40954 if (mode == DISPATCH_INIT)
40955 init_dispatch_sched ();
40956 else if (mode == ADD_TO_DISPATCH_WINDOW)
40957 add_to_dispatch_window (insn);
40960 /* Return TRUE if Dispatch Scheduling is supported. */
40963 has_dispatch (rtx insn, int action)
40965 if ((TARGET_BDVER1 || TARGET_BDVER2)
40966 && flag_dispatch_scheduler)
40972 case IS_DISPATCH_ON:
40977 return is_cmp (insn);
40979 case DISPATCH_VIOLATION:
40980 return dispatch_violation ();
40982 case FITS_DISPATCH_WINDOW:
40983 return fits_dispatch_window (insn);
40989 /* Implementation of reassociation_width target hook used by
40990 reassoc phase to identify parallelism level in reassociated
40991 tree. Statements tree_code is passed in OPC. Arguments type
40994 Currently parallel reassociation is enabled for Atom
40995 processors only and we set reassociation width to be 2
40996 because Atom may issue up to 2 instructions per cycle.
40998 Return value should be fixed if parallel reassociation is
40999 enabled for other processors. */
41002 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
41003 enum machine_mode mode)
41007 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
41009 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
41015 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
41016 place emms and femms instructions. */
41018 static enum machine_mode
41019 ix86_preferred_simd_mode (enum machine_mode mode)
41027 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
41029 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
41031 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
41033 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
41036 if (TARGET_AVX && !TARGET_PREFER_AVX128)
41042 if (!TARGET_VECTORIZE_DOUBLE)
41044 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
41046 else if (TARGET_SSE2)
41055 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
41058 static unsigned int
41059 ix86_autovectorize_vector_sizes (void)
41061 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
41066 /* Return class of registers which could be used for pseudo of MODE
41067 and of class RCLASS for spilling instead of memory. Return NO_REGS
41068 if it is not possible or non-profitable. */
41070 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
41072 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
41073 && hard_reg_set_subset_p (reg_class_contents[rclass],
41074 reg_class_contents[GENERAL_REGS])
41075 && (mode == SImode || (TARGET_64BIT && mode == DImode)))
41080 /* Implement targetm.vectorize.init_cost. */
41083 ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
41085 unsigned *cost = XNEWVEC (unsigned, 3);
41086 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
41090 /* Implement targetm.vectorize.add_stmt_cost. */
41093 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
41094 struct _stmt_vec_info *stmt_info, int misalign,
41095 enum vect_cost_model_location where)
41097 unsigned *cost = (unsigned *) data;
41098 unsigned retval = 0;
41100 if (flag_vect_cost_model)
41102 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
41103 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
41105 /* Statements in an inner loop relative to the loop being
41106 vectorized are weighted more heavily. The value here is
41107 arbitrary and could potentially be improved with analysis. */
41108 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
41109 count *= 50; /* FIXME. */
41111 retval = (unsigned) (count * stmt_cost);
41112 cost[where] += retval;
41118 /* Implement targetm.vectorize.finish_cost. */
41121 ix86_finish_cost (void *data, unsigned *prologue_cost,
41122 unsigned *body_cost, unsigned *epilogue_cost)
41124 unsigned *cost = (unsigned *) data;
41125 *prologue_cost = cost[vect_prologue];
41126 *body_cost = cost[vect_body];
41127 *epilogue_cost = cost[vect_epilogue];
41130 /* Implement targetm.vectorize.destroy_cost_data. */
41133 ix86_destroy_cost_data (void *data)
41138 /* Validate target specific memory model bits in VAL. */
41140 static unsigned HOST_WIDE_INT
41141 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
41143 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
41144 unsigned HOST_WIDE_INT strong;
41146 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
41148 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
41150 warning (OPT_Winvalid_memory_model,
41151 "Unknown architecture specific memory model");
41152 return MEMMODEL_SEQ_CST;
41154 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
41155 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
41157 warning (OPT_Winvalid_memory_model,
41158 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
41159 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
41161 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
41163 warning (OPT_Winvalid_memory_model,
41164 "HLE_RELEASE not used with RELEASE or stronger memory model");
41165 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
41170 /* Initialize the GCC target structure. */
41171 #undef TARGET_RETURN_IN_MEMORY
41172 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
41174 #undef TARGET_LEGITIMIZE_ADDRESS
41175 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
41177 #undef TARGET_ATTRIBUTE_TABLE
41178 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
41179 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
41180 # undef TARGET_MERGE_DECL_ATTRIBUTES
41181 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
41184 #undef TARGET_COMP_TYPE_ATTRIBUTES
41185 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
41187 #undef TARGET_INIT_BUILTINS
41188 #define TARGET_INIT_BUILTINS ix86_init_builtins
41189 #undef TARGET_BUILTIN_DECL
41190 #define TARGET_BUILTIN_DECL ix86_builtin_decl
41191 #undef TARGET_EXPAND_BUILTIN
41192 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
41194 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
41195 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
41196 ix86_builtin_vectorized_function
41198 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
41199 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
41201 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
41202 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
41204 #undef TARGET_VECTORIZE_BUILTIN_GATHER
41205 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
41207 #undef TARGET_BUILTIN_RECIPROCAL
41208 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
41210 #undef TARGET_ASM_FUNCTION_EPILOGUE
41211 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
41213 #undef TARGET_ENCODE_SECTION_INFO
41214 #ifndef SUBTARGET_ENCODE_SECTION_INFO
41215 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
41217 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
41220 #undef TARGET_ASM_OPEN_PAREN
41221 #define TARGET_ASM_OPEN_PAREN ""
41222 #undef TARGET_ASM_CLOSE_PAREN
41223 #define TARGET_ASM_CLOSE_PAREN ""
41225 #undef TARGET_ASM_BYTE_OP
41226 #define TARGET_ASM_BYTE_OP ASM_BYTE
41228 #undef TARGET_ASM_ALIGNED_HI_OP
41229 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
41230 #undef TARGET_ASM_ALIGNED_SI_OP
41231 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
41233 #undef TARGET_ASM_ALIGNED_DI_OP
41234 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
41237 #undef TARGET_PROFILE_BEFORE_PROLOGUE
41238 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
41240 #undef TARGET_ASM_UNALIGNED_HI_OP
41241 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
41242 #undef TARGET_ASM_UNALIGNED_SI_OP
41243 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
41244 #undef TARGET_ASM_UNALIGNED_DI_OP
41245 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
41247 #undef TARGET_PRINT_OPERAND
41248 #define TARGET_PRINT_OPERAND ix86_print_operand
41249 #undef TARGET_PRINT_OPERAND_ADDRESS
41250 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
41251 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
41252 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
41253 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
41254 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
41256 #undef TARGET_SCHED_INIT_GLOBAL
41257 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
41258 #undef TARGET_SCHED_ADJUST_COST
41259 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
41260 #undef TARGET_SCHED_ISSUE_RATE
41261 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
41262 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
41263 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
41264 ia32_multipass_dfa_lookahead
41266 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
41267 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
41269 #undef TARGET_MEMMODEL_CHECK
41270 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
41273 #undef TARGET_HAVE_TLS
41274 #define TARGET_HAVE_TLS true
41276 #undef TARGET_CANNOT_FORCE_CONST_MEM
41277 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
41278 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
41279 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
41281 #undef TARGET_DELEGITIMIZE_ADDRESS
41282 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
41284 #undef TARGET_MS_BITFIELD_LAYOUT_P
41285 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
41288 #undef TARGET_BINDS_LOCAL_P
41289 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
41291 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
41292 #undef TARGET_BINDS_LOCAL_P
41293 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
41296 #undef TARGET_ASM_OUTPUT_MI_THUNK
41297 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
41298 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
41299 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
41301 #undef TARGET_ASM_FILE_START
41302 #define TARGET_ASM_FILE_START x86_file_start
41304 #undef TARGET_OPTION_OVERRIDE
41305 #define TARGET_OPTION_OVERRIDE ix86_option_override
41307 #undef TARGET_REGISTER_MOVE_COST
41308 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
41309 #undef TARGET_MEMORY_MOVE_COST
41310 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
41311 #undef TARGET_RTX_COSTS
41312 #define TARGET_RTX_COSTS ix86_rtx_costs
41313 #undef TARGET_ADDRESS_COST
41314 #define TARGET_ADDRESS_COST ix86_address_cost
41316 #undef TARGET_FIXED_CONDITION_CODE_REGS
41317 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
41318 #undef TARGET_CC_MODES_COMPATIBLE
41319 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
41321 #undef TARGET_MACHINE_DEPENDENT_REORG
41322 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
41324 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
41325 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
41327 #undef TARGET_BUILD_BUILTIN_VA_LIST
41328 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
41330 #undef TARGET_FOLD_BUILTIN
41331 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
41333 #undef TARGET_ENUM_VA_LIST_P
41334 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
41336 #undef TARGET_FN_ABI_VA_LIST
41337 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
41339 #undef TARGET_CANONICAL_VA_LIST_TYPE
41340 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
41342 #undef TARGET_EXPAND_BUILTIN_VA_START
41343 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
41345 #undef TARGET_MD_ASM_CLOBBERS
41346 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
41348 #undef TARGET_PROMOTE_PROTOTYPES
41349 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
41350 #undef TARGET_STRUCT_VALUE_RTX
41351 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
41352 #undef TARGET_SETUP_INCOMING_VARARGS
41353 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
41354 #undef TARGET_MUST_PASS_IN_STACK
41355 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
41356 #undef TARGET_FUNCTION_ARG_ADVANCE
41357 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
41358 #undef TARGET_FUNCTION_ARG
41359 #define TARGET_FUNCTION_ARG ix86_function_arg
41360 #undef TARGET_FUNCTION_ARG_BOUNDARY
41361 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
41362 #undef TARGET_PASS_BY_REFERENCE
41363 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
41364 #undef TARGET_INTERNAL_ARG_POINTER
41365 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
41366 #undef TARGET_UPDATE_STACK_BOUNDARY
41367 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
41368 #undef TARGET_GET_DRAP_RTX
41369 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
41370 #undef TARGET_STRICT_ARGUMENT_NAMING
41371 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
41372 #undef TARGET_STATIC_CHAIN
41373 #define TARGET_STATIC_CHAIN ix86_static_chain
41374 #undef TARGET_TRAMPOLINE_INIT
41375 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
41376 #undef TARGET_RETURN_POPS_ARGS
41377 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
41379 #undef TARGET_LEGITIMATE_COMBINED_INSN
41380 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
41382 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
41383 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
41385 #undef TARGET_SCALAR_MODE_SUPPORTED_P
41386 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
41388 #undef TARGET_VECTOR_MODE_SUPPORTED_P
41389 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
41391 #undef TARGET_C_MODE_FOR_SUFFIX
41392 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
41395 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
41396 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
41399 #ifdef SUBTARGET_INSERT_ATTRIBUTES
41400 #undef TARGET_INSERT_ATTRIBUTES
41401 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
41404 #undef TARGET_MANGLE_TYPE
41405 #define TARGET_MANGLE_TYPE ix86_mangle_type
41408 #undef TARGET_STACK_PROTECT_FAIL
41409 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
41412 #undef TARGET_FUNCTION_VALUE
41413 #define TARGET_FUNCTION_VALUE ix86_function_value
41415 #undef TARGET_FUNCTION_VALUE_REGNO_P
41416 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
41418 #undef TARGET_PROMOTE_FUNCTION_MODE
41419 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
41421 #undef TARGET_MEMBER_TYPE_FORCES_BLK
41422 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
41424 #undef TARGET_SECONDARY_RELOAD
41425 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
41427 #undef TARGET_CLASS_MAX_NREGS
41428 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
41430 #undef TARGET_PREFERRED_RELOAD_CLASS
41431 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
41432 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
41433 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
41434 #undef TARGET_CLASS_LIKELY_SPILLED_P
41435 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
41437 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
41438 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
41439 ix86_builtin_vectorization_cost
41440 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
41441 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
41442 ix86_vectorize_vec_perm_const_ok
41443 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
41444 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
41445 ix86_preferred_simd_mode
41446 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
41447 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
41448 ix86_autovectorize_vector_sizes
41449 #undef TARGET_VECTORIZE_INIT_COST
41450 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
41451 #undef TARGET_VECTORIZE_ADD_STMT_COST
41452 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
41453 #undef TARGET_VECTORIZE_FINISH_COST
41454 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
41455 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
41456 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
41458 #undef TARGET_SET_CURRENT_FUNCTION
41459 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
41461 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
41462 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
41464 #undef TARGET_OPTION_SAVE
41465 #define TARGET_OPTION_SAVE ix86_function_specific_save
41467 #undef TARGET_OPTION_RESTORE
41468 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
41470 #undef TARGET_OPTION_PRINT
41471 #define TARGET_OPTION_PRINT ix86_function_specific_print
41473 #undef TARGET_CAN_INLINE_P
41474 #define TARGET_CAN_INLINE_P ix86_can_inline_p
41476 #undef TARGET_EXPAND_TO_RTL_HOOK
41477 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
41479 #undef TARGET_LEGITIMATE_ADDRESS_P
41480 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
41482 #undef TARGET_LRA_P
41483 #define TARGET_LRA_P ix86_lra_p
41485 #undef TARGET_REGISTER_PRIORITY
41486 #define TARGET_REGISTER_PRIORITY ix86_register_priority
41488 #undef TARGET_LEGITIMATE_CONSTANT_P
41489 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
41491 #undef TARGET_FRAME_POINTER_REQUIRED
41492 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
41494 #undef TARGET_CAN_ELIMINATE
41495 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
41497 #undef TARGET_EXTRA_LIVE_ON_ENTRY
41498 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
41500 #undef TARGET_ASM_CODE_END
41501 #define TARGET_ASM_CODE_END ix86_code_end
41503 #undef TARGET_CONDITIONAL_REGISTER_USAGE
41504 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
41507 #undef TARGET_INIT_LIBFUNCS
41508 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
41511 #undef TARGET_SPILL_CLASS
41512 #define TARGET_SPILL_CLASS ix86_spill_class
41514 struct gcc_target targetm = TARGET_INITIALIZER;
41516 #include "gt-i386.h"