1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 #include "sched-int.h"
62 #include "diagnostic.h"
64 enum upper_128bits_state
71 typedef struct block_info_def
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
78 /* TRUE if block has been processed. */
80 /* TRUE if block has been scanned. */
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev;
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
88 enum call_avx256_state
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256 = -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256,
94 /* Callee passes 256bit AVX register. */
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
99 /* vzeroupper intrinsic. */
103 /* Check if a 256bit AVX register is referenced in stores. */
106 check_avx256_stores (rtx dest, const_rtx set, void *data)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
110 || (GET_CODE (set) == SET
111 && REG_P (SET_SRC (set))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
114 enum upper_128bits_state *state
115 = (enum upper_128bits_state *) data;
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
124 STATE is state of the upper 128bits of AVX registers at entry. */
127 move_or_delete_vzeroupper_2 (basic_block bb,
128 enum upper_128bits_state state)
131 rtx vzeroupper_insn = NULL_RTX;
136 if (BLOCK_INFO (bb)->unchanged)
139 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
142 BLOCK_INFO (bb)->state = state;
146 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
149 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
150 bb->index, BLOCK_INFO (bb)->state);
154 BLOCK_INFO (bb)->prev = state;
157 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
162 /* BB_END changes when it is deleted. */
163 bb_end = BB_END (bb);
165 while (insn != bb_end)
167 insn = NEXT_INSN (insn);
169 if (!NONDEBUG_INSN_P (insn))
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn) || CALL_P (insn))
175 if (!vzeroupper_insn)
178 if (PREV_INSN (insn) != vzeroupper_insn)
182 fprintf (dump_file, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file, PREV_INSN (insn));
184 fprintf (dump_file, "before:\n");
185 print_rtl_single (dump_file, insn);
187 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
190 vzeroupper_insn = NULL_RTX;
194 pat = PATTERN (insn);
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat) == UNSPEC_VOLATILE
198 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file, "Found vzeroupper:\n");
204 print_rtl_single (dump_file, insn);
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat) == PARALLEL
211 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
217 /* Delete pending vzeroupper insertion. */
220 delete_insn (vzeroupper_insn);
221 vzeroupper_insn = NULL_RTX;
224 else if (state != used)
226 note_stores (pat, check_avx256_stores, &state);
233 /* Process vzeroupper intrinsic. */
234 avx256 = INTVAL (XVECEXP (pat, 0, 0));
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256 == callee_return_avx256)
247 /* Remove unnecessary vzeroupper since upper 128bits are
251 fprintf (dump_file, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file, insn);
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
260 if (avx256 != callee_return_pass_avx256)
263 if (avx256 == callee_return_pass_avx256
264 || avx256 == callee_pass_avx256)
266 /* Must remove vzeroupper since callee passes in 256bit
270 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file, insn);
277 vzeroupper_insn = insn;
283 BLOCK_INFO (bb)->state = state;
284 BLOCK_INFO (bb)->unchanged = unchanged;
285 BLOCK_INFO (bb)->scanned = true;
288 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb->index, unchanged ? "unchanged" : "changed",
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
299 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
303 enum upper_128bits_state state, old_state, new_state;
307 fprintf (dump_file, " Process [bb %i]: status: %d\n",
308 block->index, BLOCK_INFO (block)->processed);
310 if (BLOCK_INFO (block)->processed)
315 /* Check all predecessor edges of this block. */
316 seen_unknown = false;
317 FOR_EACH_EDGE (e, ei, block->preds)
321 switch (BLOCK_INFO (e->src)->state)
324 if (!unknown_is_unused)
338 old_state = BLOCK_INFO (block)->state;
339 move_or_delete_vzeroupper_2 (block, state);
340 new_state = BLOCK_INFO (block)->state;
342 if (state != unknown || new_state == used)
343 BLOCK_INFO (block)->processed = true;
345 /* Need to rescan if the upper 128bits of AVX registers are changed
347 if (new_state != old_state)
349 if (new_state == used)
350 cfun->machine->rescan_vzeroupper_p = 1;
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
362 move_or_delete_vzeroupper (void)
367 fibheap_t worklist, pending, fibheap_swap;
368 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def));
376 /* Process outgoing edges of entry point. */
378 fprintf (dump_file, "Process outgoing edges of entry point\n");
380 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
382 move_or_delete_vzeroupper_2 (e->dest,
383 cfun->machine->caller_pass_avx256_p
385 BLOCK_INFO (e->dest)->processed = true;
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
391 bb_order = XNEWVEC (int, last_basic_block);
392 pre_and_rev_post_order_compute (NULL, rc_order, false);
393 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
394 bb_order[rc_order[i]] = i;
397 worklist = fibheap_new ();
398 pending = fibheap_new ();
399 visited = sbitmap_alloc (last_basic_block);
400 in_worklist = sbitmap_alloc (last_basic_block);
401 in_pending = sbitmap_alloc (last_basic_block);
402 sbitmap_zero (in_worklist);
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending);
407 if (BLOCK_INFO (bb)->processed)
408 RESET_BIT (in_pending, bb->index);
411 move_or_delete_vzeroupper_1 (bb, false);
412 fibheap_insert (pending, bb_order[bb->index], bb);
416 fprintf (dump_file, "Check remaining basic blocks\n");
418 while (!fibheap_empty (pending))
420 fibheap_swap = pending;
422 worklist = fibheap_swap;
423 sbitmap_swap = in_pending;
424 in_pending = in_worklist;
425 in_worklist = sbitmap_swap;
427 sbitmap_zero (visited);
429 cfun->machine->rescan_vzeroupper_p = 0;
431 while (!fibheap_empty (worklist))
433 bb = (basic_block) fibheap_extract_min (worklist);
434 RESET_BIT (in_worklist, bb->index);
435 gcc_assert (!TEST_BIT (visited, bb->index));
436 if (!TEST_BIT (visited, bb->index))
440 SET_BIT (visited, bb->index);
442 if (move_or_delete_vzeroupper_1 (bb, false))
443 FOR_EACH_EDGE (e, ei, bb->succs)
445 if (e->dest == EXIT_BLOCK_PTR
446 || BLOCK_INFO (e->dest)->processed)
449 if (TEST_BIT (visited, e->dest->index))
451 if (!TEST_BIT (in_pending, e->dest->index))
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending, e->dest->index);
455 fibheap_insert (pending,
456 bb_order[e->dest->index],
460 else if (!TEST_BIT (in_worklist, e->dest->index))
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist, e->dest->index);
464 fibheap_insert (worklist, bb_order[e->dest->index],
471 if (!cfun->machine->rescan_vzeroupper_p)
476 fibheap_delete (worklist);
477 fibheap_delete (pending);
478 sbitmap_free (visited);
479 sbitmap_free (in_worklist);
480 sbitmap_free (in_pending);
483 fprintf (dump_file, "Process remaining basic blocks\n");
486 move_or_delete_vzeroupper_1 (bb, true);
488 free_aux_for_blocks ();
491 static rtx legitimize_dllimport_symbol (rtx, bool);
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
512 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
565 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
566 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
567 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
581 /* Processor costs (relative to an add) */
583 struct processor_costs i386_cost = { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
636 DUMMY_STRINGOP_ALGS},
637 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
653 struct processor_costs i486_cost = { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
708 DUMMY_STRINGOP_ALGS},
709 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
710 DUMMY_STRINGOP_ALGS},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
725 struct processor_costs pentium_cost = {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
778 DUMMY_STRINGOP_ALGS},
779 {{libcall, {{-1, rep_prefix_4_byte}}},
780 DUMMY_STRINGOP_ALGS},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
795 struct processor_costs pentiumpro_cost = {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
853 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
854 DUMMY_STRINGOP_ALGS},
855 {{rep_prefix_4_byte, {{1024, unrolled_loop},
856 {8192, rep_prefix_4_byte}, {-1, libcall}}},
857 DUMMY_STRINGOP_ALGS},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
872 struct processor_costs geode_cost = {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
926 DUMMY_STRINGOP_ALGS},
927 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
928 DUMMY_STRINGOP_ALGS},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
943 struct processor_costs k6_cost = {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
999 DUMMY_STRINGOP_ALGS},
1000 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1001 DUMMY_STRINGOP_ALGS},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1016 struct processor_costs athlon_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1072 DUMMY_STRINGOP_ALGS},
1073 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1074 DUMMY_STRINGOP_ALGS},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1089 struct processor_costs k8_cost = {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1150 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1151 {{libcall, {{8, loop}, {24, unrolled_loop},
1152 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1153 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1167 struct processor_costs amdfam10_cost = {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1212 MOVD reg64, xmmreg Double FADD 3
1214 MOVD reg32, xmmreg Double FADD 3
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1237 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1238 {{libcall, {{8, loop}, {24, unrolled_loop},
1239 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1240 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1254 struct processor_costs bdver1_cost = {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1299 MOVD reg64, xmmreg Double FADD 3
1301 MOVD reg32, xmmreg Double FADD 3
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1324 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1325 {{libcall, {{8, loop}, {24, unrolled_loop},
1326 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1327 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1341 struct processor_costs bdver2_cost = {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (1), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (4), /* SI */
1349 COSTS_N_INSNS (6), /* DI */
1350 COSTS_N_INSNS (6)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1361 4, /* cost for loading QImode using movzbl */
1362 {5, 5, 4}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {4, 4, 4}, /* cost of storing integer registers */
1366 2, /* cost of reg,reg fld/fst */
1367 {5, 5, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {4, 4, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {4, 4}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 4}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 4}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 2, /* MMX or SSE register to integer */
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1386 MOVD reg64, xmmreg Double FADD 3
1388 MOVD reg32, xmmreg Double FADD 3
1390 16, /* size of l1 cache. */
1391 2048, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 /* New AMD processors never drop prefetches; if they cannot be performed
1394 immediately, they are queued. We set number of simultaneous prefetches
1395 to a large constant to reflect this (it probably is not a good idea not
1396 to limit number of prefetches at all, as their execution also takes some
1398 100, /* number of parallel prefetches */
1399 2, /* Branch cost */
1400 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1401 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1402 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1403 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1404 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1405 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1407 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1408 very small blocks it is better to use loop. For large blocks, libcall
1409 can do nontemporary accesses and beat inline considerably. */
1410 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1411 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1412 {{libcall, {{8, loop}, {24, unrolled_loop},
1413 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1414 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1415 6, /* scalar_stmt_cost. */
1416 4, /* scalar load_cost. */
1417 4, /* scalar_store_cost. */
1418 6, /* vec_stmt_cost. */
1419 0, /* vec_to_scalar_cost. */
1420 2, /* scalar_to_vec_cost. */
1421 4, /* vec_align_load_cost. */
1422 4, /* vec_unalign_load_cost. */
1423 4, /* vec_store_cost. */
1424 2, /* cond_taken_branch_cost. */
1425 1, /* cond_not_taken_branch_cost. */
1428 struct processor_costs btver1_cost = {
1429 COSTS_N_INSNS (1), /* cost of an add instruction */
1430 COSTS_N_INSNS (2), /* cost of a lea instruction */
1431 COSTS_N_INSNS (1), /* variable shift costs */
1432 COSTS_N_INSNS (1), /* constant shift costs */
1433 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1434 COSTS_N_INSNS (4), /* HI */
1435 COSTS_N_INSNS (3), /* SI */
1436 COSTS_N_INSNS (4), /* DI */
1437 COSTS_N_INSNS (5)}, /* other */
1438 0, /* cost of multiply per each bit set */
1439 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1440 COSTS_N_INSNS (35), /* HI */
1441 COSTS_N_INSNS (51), /* SI */
1442 COSTS_N_INSNS (83), /* DI */
1443 COSTS_N_INSNS (83)}, /* other */
1444 COSTS_N_INSNS (1), /* cost of movsx */
1445 COSTS_N_INSNS (1), /* cost of movzx */
1446 8, /* "large" insn */
1448 4, /* cost for loading QImode using movzbl */
1449 {3, 4, 3}, /* cost of loading integer registers
1450 in QImode, HImode and SImode.
1451 Relative to reg-reg move (2). */
1452 {3, 4, 3}, /* cost of storing integer registers */
1453 4, /* cost of reg,reg fld/fst */
1454 {4, 4, 12}, /* cost of loading fp registers
1455 in SFmode, DFmode and XFmode */
1456 {6, 6, 8}, /* cost of storing fp registers
1457 in SFmode, DFmode and XFmode */
1458 2, /* cost of moving MMX register */
1459 {3, 3}, /* cost of loading MMX registers
1460 in SImode and DImode */
1461 {4, 4}, /* cost of storing MMX registers
1462 in SImode and DImode */
1463 2, /* cost of moving SSE register */
1464 {4, 4, 3}, /* cost of loading SSE registers
1465 in SImode, DImode and TImode */
1466 {4, 4, 5}, /* cost of storing SSE registers
1467 in SImode, DImode and TImode */
1468 3, /* MMX or SSE register to integer */
1470 MOVD reg64, xmmreg Double FSTORE 4
1471 MOVD reg32, xmmreg Double FSTORE 4
1473 MOVD reg64, xmmreg Double FADD 3
1475 MOVD reg32, xmmreg Double FADD 3
1477 32, /* size of l1 cache. */
1478 512, /* size of l2 cache. */
1479 64, /* size of prefetch block */
1480 100, /* number of parallel prefetches */
1481 2, /* Branch cost */
1482 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1483 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1484 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1485 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1486 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1487 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1489 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1490 very small blocks it is better to use loop. For large blocks, libcall can
1491 do nontemporary accesses and beat inline considerably. */
1492 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1493 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1494 {{libcall, {{8, loop}, {24, unrolled_loop},
1495 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1496 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1497 4, /* scalar_stmt_cost. */
1498 2, /* scalar load_cost. */
1499 2, /* scalar_store_cost. */
1500 6, /* vec_stmt_cost. */
1501 0, /* vec_to_scalar_cost. */
1502 2, /* scalar_to_vec_cost. */
1503 2, /* vec_align_load_cost. */
1504 2, /* vec_unalign_load_cost. */
1505 2, /* vec_store_cost. */
1506 2, /* cond_taken_branch_cost. */
1507 1, /* cond_not_taken_branch_cost. */
1511 struct processor_costs pentium4_cost = {
1512 COSTS_N_INSNS (1), /* cost of an add instruction */
1513 COSTS_N_INSNS (3), /* cost of a lea instruction */
1514 COSTS_N_INSNS (4), /* variable shift costs */
1515 COSTS_N_INSNS (4), /* constant shift costs */
1516 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1517 COSTS_N_INSNS (15), /* HI */
1518 COSTS_N_INSNS (15), /* SI */
1519 COSTS_N_INSNS (15), /* DI */
1520 COSTS_N_INSNS (15)}, /* other */
1521 0, /* cost of multiply per each bit set */
1522 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1523 COSTS_N_INSNS (56), /* HI */
1524 COSTS_N_INSNS (56), /* SI */
1525 COSTS_N_INSNS (56), /* DI */
1526 COSTS_N_INSNS (56)}, /* other */
1527 COSTS_N_INSNS (1), /* cost of movsx */
1528 COSTS_N_INSNS (1), /* cost of movzx */
1529 16, /* "large" insn */
1531 2, /* cost for loading QImode using movzbl */
1532 {4, 5, 4}, /* cost of loading integer registers
1533 in QImode, HImode and SImode.
1534 Relative to reg-reg move (2). */
1535 {2, 3, 2}, /* cost of storing integer registers */
1536 2, /* cost of reg,reg fld/fst */
1537 {2, 2, 6}, /* cost of loading fp registers
1538 in SFmode, DFmode and XFmode */
1539 {4, 4, 6}, /* cost of storing fp registers
1540 in SFmode, DFmode and XFmode */
1541 2, /* cost of moving MMX register */
1542 {2, 2}, /* cost of loading MMX registers
1543 in SImode and DImode */
1544 {2, 2}, /* cost of storing MMX registers
1545 in SImode and DImode */
1546 12, /* cost of moving SSE register */
1547 {12, 12, 12}, /* cost of loading SSE registers
1548 in SImode, DImode and TImode */
1549 {2, 2, 8}, /* cost of storing SSE registers
1550 in SImode, DImode and TImode */
1551 10, /* MMX or SSE register to integer */
1552 8, /* size of l1 cache. */
1553 256, /* size of l2 cache. */
1554 64, /* size of prefetch block */
1555 6, /* number of parallel prefetches */
1556 2, /* Branch cost */
1557 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1558 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1559 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1560 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1561 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1562 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1563 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1564 DUMMY_STRINGOP_ALGS},
1565 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1567 DUMMY_STRINGOP_ALGS},
1568 1, /* scalar_stmt_cost. */
1569 1, /* scalar load_cost. */
1570 1, /* scalar_store_cost. */
1571 1, /* vec_stmt_cost. */
1572 1, /* vec_to_scalar_cost. */
1573 1, /* scalar_to_vec_cost. */
1574 1, /* vec_align_load_cost. */
1575 2, /* vec_unalign_load_cost. */
1576 1, /* vec_store_cost. */
1577 3, /* cond_taken_branch_cost. */
1578 1, /* cond_not_taken_branch_cost. */
1582 struct processor_costs nocona_cost = {
1583 COSTS_N_INSNS (1), /* cost of an add instruction */
1584 COSTS_N_INSNS (1), /* cost of a lea instruction */
1585 COSTS_N_INSNS (1), /* variable shift costs */
1586 COSTS_N_INSNS (1), /* constant shift costs */
1587 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1588 COSTS_N_INSNS (10), /* HI */
1589 COSTS_N_INSNS (10), /* SI */
1590 COSTS_N_INSNS (10), /* DI */
1591 COSTS_N_INSNS (10)}, /* other */
1592 0, /* cost of multiply per each bit set */
1593 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1594 COSTS_N_INSNS (66), /* HI */
1595 COSTS_N_INSNS (66), /* SI */
1596 COSTS_N_INSNS (66), /* DI */
1597 COSTS_N_INSNS (66)}, /* other */
1598 COSTS_N_INSNS (1), /* cost of movsx */
1599 COSTS_N_INSNS (1), /* cost of movzx */
1600 16, /* "large" insn */
1601 17, /* MOVE_RATIO */
1602 4, /* cost for loading QImode using movzbl */
1603 {4, 4, 4}, /* cost of loading integer registers
1604 in QImode, HImode and SImode.
1605 Relative to reg-reg move (2). */
1606 {4, 4, 4}, /* cost of storing integer registers */
1607 3, /* cost of reg,reg fld/fst */
1608 {12, 12, 12}, /* cost of loading fp registers
1609 in SFmode, DFmode and XFmode */
1610 {4, 4, 4}, /* cost of storing fp registers
1611 in SFmode, DFmode and XFmode */
1612 6, /* cost of moving MMX register */
1613 {12, 12}, /* cost of loading MMX registers
1614 in SImode and DImode */
1615 {12, 12}, /* cost of storing MMX registers
1616 in SImode and DImode */
1617 6, /* cost of moving SSE register */
1618 {12, 12, 12}, /* cost of loading SSE registers
1619 in SImode, DImode and TImode */
1620 {12, 12, 12}, /* cost of storing SSE registers
1621 in SImode, DImode and TImode */
1622 8, /* MMX or SSE register to integer */
1623 8, /* size of l1 cache. */
1624 1024, /* size of l2 cache. */
1625 128, /* size of prefetch block */
1626 8, /* number of parallel prefetches */
1627 1, /* Branch cost */
1628 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1629 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1630 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1631 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1632 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1633 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1634 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1635 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1636 {100000, unrolled_loop}, {-1, libcall}}}},
1637 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1639 {libcall, {{24, loop}, {64, unrolled_loop},
1640 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1641 1, /* scalar_stmt_cost. */
1642 1, /* scalar load_cost. */
1643 1, /* scalar_store_cost. */
1644 1, /* vec_stmt_cost. */
1645 1, /* vec_to_scalar_cost. */
1646 1, /* scalar_to_vec_cost. */
1647 1, /* vec_align_load_cost. */
1648 2, /* vec_unalign_load_cost. */
1649 1, /* vec_store_cost. */
1650 3, /* cond_taken_branch_cost. */
1651 1, /* cond_not_taken_branch_cost. */
1655 struct processor_costs atom_cost = {
1656 COSTS_N_INSNS (1), /* cost of an add instruction */
1657 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1658 COSTS_N_INSNS (1), /* variable shift costs */
1659 COSTS_N_INSNS (1), /* constant shift costs */
1660 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1661 COSTS_N_INSNS (4), /* HI */
1662 COSTS_N_INSNS (3), /* SI */
1663 COSTS_N_INSNS (4), /* DI */
1664 COSTS_N_INSNS (2)}, /* other */
1665 0, /* cost of multiply per each bit set */
1666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1667 COSTS_N_INSNS (26), /* HI */
1668 COSTS_N_INSNS (42), /* SI */
1669 COSTS_N_INSNS (74), /* DI */
1670 COSTS_N_INSNS (74)}, /* other */
1671 COSTS_N_INSNS (1), /* cost of movsx */
1672 COSTS_N_INSNS (1), /* cost of movzx */
1673 8, /* "large" insn */
1674 17, /* MOVE_RATIO */
1675 2, /* cost for loading QImode using movzbl */
1676 {4, 4, 4}, /* cost of loading integer registers
1677 in QImode, HImode and SImode.
1678 Relative to reg-reg move (2). */
1679 {4, 4, 4}, /* cost of storing integer registers */
1680 4, /* cost of reg,reg fld/fst */
1681 {12, 12, 12}, /* cost of loading fp registers
1682 in SFmode, DFmode and XFmode */
1683 {6, 6, 8}, /* cost of storing fp registers
1684 in SFmode, DFmode and XFmode */
1685 2, /* cost of moving MMX register */
1686 {8, 8}, /* cost of loading MMX registers
1687 in SImode and DImode */
1688 {8, 8}, /* cost of storing MMX registers
1689 in SImode and DImode */
1690 2, /* cost of moving SSE register */
1691 {8, 8, 8}, /* cost of loading SSE registers
1692 in SImode, DImode and TImode */
1693 {8, 8, 8}, /* cost of storing SSE registers
1694 in SImode, DImode and TImode */
1695 5, /* MMX or SSE register to integer */
1696 32, /* size of l1 cache. */
1697 256, /* size of l2 cache. */
1698 64, /* size of prefetch block */
1699 6, /* number of parallel prefetches */
1700 3, /* Branch cost */
1701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1702 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1703 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1704 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1705 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1706 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1707 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1708 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1709 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1710 {{libcall, {{8, loop}, {15, unrolled_loop},
1711 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1712 {libcall, {{24, loop}, {32, unrolled_loop},
1713 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1714 1, /* scalar_stmt_cost. */
1715 1, /* scalar load_cost. */
1716 1, /* scalar_store_cost. */
1717 1, /* vec_stmt_cost. */
1718 1, /* vec_to_scalar_cost. */
1719 1, /* scalar_to_vec_cost. */
1720 1, /* vec_align_load_cost. */
1721 2, /* vec_unalign_load_cost. */
1722 1, /* vec_store_cost. */
1723 3, /* cond_taken_branch_cost. */
1724 1, /* cond_not_taken_branch_cost. */
1727 /* Generic64 should produce code tuned for Nocona and K8. */
1729 struct processor_costs generic64_cost = {
1730 COSTS_N_INSNS (1), /* cost of an add instruction */
1731 /* On all chips taken into consideration lea is 2 cycles and more. With
1732 this cost however our current implementation of synth_mult results in
1733 use of unnecessary temporary registers causing regression on several
1734 SPECfp benchmarks. */
1735 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1736 COSTS_N_INSNS (1), /* variable shift costs */
1737 COSTS_N_INSNS (1), /* constant shift costs */
1738 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1739 COSTS_N_INSNS (4), /* HI */
1740 COSTS_N_INSNS (3), /* SI */
1741 COSTS_N_INSNS (4), /* DI */
1742 COSTS_N_INSNS (2)}, /* other */
1743 0, /* cost of multiply per each bit set */
1744 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1745 COSTS_N_INSNS (26), /* HI */
1746 COSTS_N_INSNS (42), /* SI */
1747 COSTS_N_INSNS (74), /* DI */
1748 COSTS_N_INSNS (74)}, /* other */
1749 COSTS_N_INSNS (1), /* cost of movsx */
1750 COSTS_N_INSNS (1), /* cost of movzx */
1751 8, /* "large" insn */
1752 17, /* MOVE_RATIO */
1753 4, /* cost for loading QImode using movzbl */
1754 {4, 4, 4}, /* cost of loading integer registers
1755 in QImode, HImode and SImode.
1756 Relative to reg-reg move (2). */
1757 {4, 4, 4}, /* cost of storing integer registers */
1758 4, /* cost of reg,reg fld/fst */
1759 {12, 12, 12}, /* cost of loading fp registers
1760 in SFmode, DFmode and XFmode */
1761 {6, 6, 8}, /* cost of storing fp registers
1762 in SFmode, DFmode and XFmode */
1763 2, /* cost of moving MMX register */
1764 {8, 8}, /* cost of loading MMX registers
1765 in SImode and DImode */
1766 {8, 8}, /* cost of storing MMX registers
1767 in SImode and DImode */
1768 2, /* cost of moving SSE register */
1769 {8, 8, 8}, /* cost of loading SSE registers
1770 in SImode, DImode and TImode */
1771 {8, 8, 8}, /* cost of storing SSE registers
1772 in SImode, DImode and TImode */
1773 5, /* MMX or SSE register to integer */
1774 32, /* size of l1 cache. */
1775 512, /* size of l2 cache. */
1776 64, /* size of prefetch block */
1777 6, /* number of parallel prefetches */
1778 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1779 value is increased to perhaps more appropriate value of 5. */
1780 3, /* Branch cost */
1781 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1782 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1783 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1784 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1785 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1786 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1787 {DUMMY_STRINGOP_ALGS,
1788 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1789 {DUMMY_STRINGOP_ALGS,
1790 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1791 1, /* scalar_stmt_cost. */
1792 1, /* scalar load_cost. */
1793 1, /* scalar_store_cost. */
1794 1, /* vec_stmt_cost. */
1795 1, /* vec_to_scalar_cost. */
1796 1, /* scalar_to_vec_cost. */
1797 1, /* vec_align_load_cost. */
1798 2, /* vec_unalign_load_cost. */
1799 1, /* vec_store_cost. */
1800 3, /* cond_taken_branch_cost. */
1801 1, /* cond_not_taken_branch_cost. */
1804 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1807 struct processor_costs generic32_cost = {
1808 COSTS_N_INSNS (1), /* cost of an add instruction */
1809 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1810 COSTS_N_INSNS (1), /* variable shift costs */
1811 COSTS_N_INSNS (1), /* constant shift costs */
1812 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1813 COSTS_N_INSNS (4), /* HI */
1814 COSTS_N_INSNS (3), /* SI */
1815 COSTS_N_INSNS (4), /* DI */
1816 COSTS_N_INSNS (2)}, /* other */
1817 0, /* cost of multiply per each bit set */
1818 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1819 COSTS_N_INSNS (26), /* HI */
1820 COSTS_N_INSNS (42), /* SI */
1821 COSTS_N_INSNS (74), /* DI */
1822 COSTS_N_INSNS (74)}, /* other */
1823 COSTS_N_INSNS (1), /* cost of movsx */
1824 COSTS_N_INSNS (1), /* cost of movzx */
1825 8, /* "large" insn */
1826 17, /* MOVE_RATIO */
1827 4, /* cost for loading QImode using movzbl */
1828 {4, 4, 4}, /* cost of loading integer registers
1829 in QImode, HImode and SImode.
1830 Relative to reg-reg move (2). */
1831 {4, 4, 4}, /* cost of storing integer registers */
1832 4, /* cost of reg,reg fld/fst */
1833 {12, 12, 12}, /* cost of loading fp registers
1834 in SFmode, DFmode and XFmode */
1835 {6, 6, 8}, /* cost of storing fp registers
1836 in SFmode, DFmode and XFmode */
1837 2, /* cost of moving MMX register */
1838 {8, 8}, /* cost of loading MMX registers
1839 in SImode and DImode */
1840 {8, 8}, /* cost of storing MMX registers
1841 in SImode and DImode */
1842 2, /* cost of moving SSE register */
1843 {8, 8, 8}, /* cost of loading SSE registers
1844 in SImode, DImode and TImode */
1845 {8, 8, 8}, /* cost of storing SSE registers
1846 in SImode, DImode and TImode */
1847 5, /* MMX or SSE register to integer */
1848 32, /* size of l1 cache. */
1849 256, /* size of l2 cache. */
1850 64, /* size of prefetch block */
1851 6, /* number of parallel prefetches */
1852 3, /* Branch cost */
1853 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1854 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1855 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1856 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1857 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1858 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1859 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1860 DUMMY_STRINGOP_ALGS},
1861 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1862 DUMMY_STRINGOP_ALGS},
1863 1, /* scalar_stmt_cost. */
1864 1, /* scalar load_cost. */
1865 1, /* scalar_store_cost. */
1866 1, /* vec_stmt_cost. */
1867 1, /* vec_to_scalar_cost. */
1868 1, /* scalar_to_vec_cost. */
1869 1, /* vec_align_load_cost. */
1870 2, /* vec_unalign_load_cost. */
1871 1, /* vec_store_cost. */
1872 3, /* cond_taken_branch_cost. */
1873 1, /* cond_not_taken_branch_cost. */
1876 const struct processor_costs *ix86_cost = &pentium_cost;
1878 /* Processor feature/optimization bitmasks. */
1879 #define m_386 (1<<PROCESSOR_I386)
1880 #define m_486 (1<<PROCESSOR_I486)
1881 #define m_PENT (1<<PROCESSOR_PENTIUM)
1882 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1883 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1884 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1885 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1886 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1887 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1888 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1889 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1890 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1891 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1892 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1893 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1894 #define m_ATOM (1<<PROCESSOR_ATOM)
1896 #define m_GEODE (1<<PROCESSOR_GEODE)
1897 #define m_K6 (1<<PROCESSOR_K6)
1898 #define m_K6_GEODE (m_K6 | m_GEODE)
1899 #define m_K8 (1<<PROCESSOR_K8)
1900 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1903 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1904 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1905 #define m_BDVER (m_BDVER1 | m_BDVER2)
1906 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1907 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1909 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1910 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1912 /* Generic instruction choice should be common subset of supported CPUs
1913 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1914 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1916 /* Feature tests against the various tunings. */
1917 unsigned char ix86_tune_features[X86_TUNE_LAST];
1919 /* Feature tests against the various tunings used to create ix86_tune_features
1920 based on the processor mask. */
1921 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1922 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1923 negatively, so enabling for Generic64 seems like good code size
1924 tradeoff. We can't enable it for 32bit generic because it does not
1925 work well with PPro base chips. */
1926 m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
1928 /* X86_TUNE_PUSH_MEMORY */
1929 m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1931 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1934 /* X86_TUNE_UNROLL_STRLEN */
1935 m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
1937 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1938 on simulation result. But after P4 was made, no performance benefit
1939 was observed with branch hints. It also increases the code size.
1940 As a result, icc never generates branch hints. */
1943 /* X86_TUNE_DOUBLE_WITH_ADD */
1946 /* X86_TUNE_USE_SAHF */
1947 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
1949 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1950 partial dependencies. */
1951 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1953 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1954 register stalls on Generic32 compilation setting as well. However
1955 in current implementation the partial register stalls are not eliminated
1956 very well - they can be introduced via subregs synthesized by combine
1957 and can happen in caller/callee saving sequences. Because this option
1958 pays back little on PPro based chips and is in conflict with partial reg
1959 dependencies used by Athlon/P4 based chips, it is better to leave it off
1960 for generic32 for now. */
1963 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1964 m_CORE2I7 | m_GENERIC,
1966 /* X86_TUNE_USE_HIMODE_FIOP */
1967 m_386 | m_486 | m_K6_GEODE,
1969 /* X86_TUNE_USE_SIMODE_FIOP */
1970 ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
1972 /* X86_TUNE_USE_MOV0 */
1975 /* X86_TUNE_USE_CLTD */
1976 ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
1978 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1981 /* X86_TUNE_SPLIT_LONG_MOVES */
1984 /* X86_TUNE_READ_MODIFY_WRITE */
1987 /* X86_TUNE_READ_MODIFY */
1990 /* X86_TUNE_PROMOTE_QIMODE */
1991 m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1993 /* X86_TUNE_FAST_PREFIX */
1994 ~(m_386 | m_486 | m_PENT),
1996 /* X86_TUNE_SINGLE_STRINGOP */
1997 m_386 | m_P4_NOCONA,
1999 /* X86_TUNE_QIMODE_MATH */
2002 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2003 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2004 might be considered for Generic32 if our scheme for avoiding partial
2005 stalls was more effective. */
2008 /* X86_TUNE_PROMOTE_QI_REGS */
2011 /* X86_TUNE_PROMOTE_HI_REGS */
2014 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2015 over esp addition. */
2016 m_386 | m_486 | m_PENT | m_PPRO,
2018 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2019 over esp addition. */
2022 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2023 over esp subtraction. */
2024 m_386 | m_486 | m_PENT | m_K6_GEODE,
2026 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2027 over esp subtraction. */
2028 m_PENT | m_K6_GEODE,
2030 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2031 for DFmode copies */
2032 ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
2034 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2035 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2037 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2038 conflict here in between PPro/Pentium4 based chips that thread 128bit
2039 SSE registers as single units versus K8 based chips that divide SSE
2040 registers to two 64bit halves. This knob promotes all store destinations
2041 to be 128bit to allow register renaming on 128bit SSE units, but usually
2042 results in one extra microop on 64bit SSE units. Experimental results
2043 shows that disabling this option on P4 brings over 20% SPECfp regression,
2044 while enabling it on K8 brings roughly 2.4% regression that can be partly
2045 masked by careful scheduling of moves. */
2046 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
2048 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2049 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
2051 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2054 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2057 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2058 are resolved on SSE register parts instead of whole registers, so we may
2059 maintain just lower part of scalar values in proper format leaving the
2060 upper part undefined. */
2063 /* X86_TUNE_SSE_TYPELESS_STORES */
2066 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2067 m_PPRO | m_P4_NOCONA,
2069 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2070 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2072 /* X86_TUNE_PROLOGUE_USING_MOVE */
2073 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2075 /* X86_TUNE_EPILOGUE_USING_MOVE */
2076 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2078 /* X86_TUNE_SHIFT1 */
2081 /* X86_TUNE_USE_FFREEP */
2084 /* X86_TUNE_INTER_UNIT_MOVES */
2085 ~(m_AMD_MULTIPLE | m_GENERIC),
2087 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2088 ~(m_AMDFAM10 | m_BDVER ),
2090 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2091 than 4 branch instructions in the 16 byte window. */
2092 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2094 /* X86_TUNE_SCHEDULE */
2095 m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2097 /* X86_TUNE_USE_BT */
2098 m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2100 /* X86_TUNE_USE_INCDEC */
2101 ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
2103 /* X86_TUNE_PAD_RETURNS */
2104 m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
2106 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2109 /* X86_TUNE_EXT_80387_CONSTANTS */
2110 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2112 /* X86_TUNE_SHORTEN_X87_SSE */
2115 /* X86_TUNE_AVOID_VECTOR_DECODE */
2116 m_CORE2I7_64 | m_K8 | m_GENERIC64,
2118 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2119 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2122 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2123 vector path on AMD machines. */
2124 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2126 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2128 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2130 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2134 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2135 but one byte longer. */
2138 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2139 operand that cannot be represented using a modRM byte. The XOR
2140 replacement is long decoded, so this split helps here as well. */
2143 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2145 m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
2147 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2148 from integer to FP. */
2151 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2152 with a subsequent conditional jump instruction into a single
2153 compare-and-branch uop. */
2156 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2157 will impact LEA instruction selection. */
2160 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2164 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2165 at -O3. For the moment, the prefetching seems badly tuned for Intel
2167 m_K6_GEODE | m_AMD_MULTIPLE,
2169 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2170 the auto-vectorizer. */
2174 /* Feature tests against the various architecture variations. */
2175 unsigned char ix86_arch_features[X86_ARCH_LAST];
2177 /* Feature tests against the various architecture variations, used to create
2178 ix86_arch_features based on the processor mask. */
2179 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2180 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2181 ~(m_386 | m_486 | m_PENT | m_K6),
2183 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2186 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2189 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2192 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2196 static const unsigned int x86_accumulate_outgoing_args
2197 = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
2199 static const unsigned int x86_arch_always_fancy_math_387
2200 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
2202 static const unsigned int x86_avx256_split_unaligned_load
2203 = m_COREI7 | m_GENERIC;
2205 static const unsigned int x86_avx256_split_unaligned_store
2206 = m_COREI7 | m_BDVER | m_GENERIC;
2208 /* In case the average insn count for single function invocation is
2209 lower than this constant, emit fast (but longer) prologue and
2211 #define FAST_PROLOGUE_INSN_COUNT 20
2213 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2214 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2215 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2216 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2218 /* Array of the smallest class containing reg number REGNO, indexed by
2219 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2221 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2223 /* ax, dx, cx, bx */
2224 AREG, DREG, CREG, BREG,
2225 /* si, di, bp, sp */
2226 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2228 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2229 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2232 /* flags, fpsr, fpcr, frame */
2233 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2235 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2238 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2241 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2242 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2243 /* SSE REX registers */
2244 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2248 /* The "default" register map used in 32bit mode. */
2250 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2252 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2253 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2254 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2255 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2256 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2257 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2258 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2261 /* The "default" register map used in 64bit mode. */
2263 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2265 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2266 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2267 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2268 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2269 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2270 8,9,10,11,12,13,14,15, /* extended integer registers */
2271 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2274 /* Define the register numbers to be used in Dwarf debugging information.
2275 The SVR4 reference port C compiler uses the following register numbers
2276 in its Dwarf output code:
2277 0 for %eax (gcc regno = 0)
2278 1 for %ecx (gcc regno = 2)
2279 2 for %edx (gcc regno = 1)
2280 3 for %ebx (gcc regno = 3)
2281 4 for %esp (gcc regno = 7)
2282 5 for %ebp (gcc regno = 6)
2283 6 for %esi (gcc regno = 4)
2284 7 for %edi (gcc regno = 5)
2285 The following three DWARF register numbers are never generated by
2286 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2287 believes these numbers have these meanings.
2288 8 for %eip (no gcc equivalent)
2289 9 for %eflags (gcc regno = 17)
2290 10 for %trapno (no gcc equivalent)
2291 It is not at all clear how we should number the FP stack registers
2292 for the x86 architecture. If the version of SDB on x86/svr4 were
2293 a bit less brain dead with respect to floating-point then we would
2294 have a precedent to follow with respect to DWARF register numbers
2295 for x86 FP registers, but the SDB on x86/svr4 is so completely
2296 broken with respect to FP registers that it is hardly worth thinking
2297 of it as something to strive for compatibility with.
2298 The version of x86/svr4 SDB I have at the moment does (partially)
2299 seem to believe that DWARF register number 11 is associated with
2300 the x86 register %st(0), but that's about all. Higher DWARF
2301 register numbers don't seem to be associated with anything in
2302 particular, and even for DWARF regno 11, SDB only seems to under-
2303 stand that it should say that a variable lives in %st(0) (when
2304 asked via an `=' command) if we said it was in DWARF regno 11,
2305 but SDB still prints garbage when asked for the value of the
2306 variable in question (via a `/' command).
2307 (Also note that the labels SDB prints for various FP stack regs
2308 when doing an `x' command are all wrong.)
2309 Note that these problems generally don't affect the native SVR4
2310 C compiler because it doesn't allow the use of -O with -g and
2311 because when it is *not* optimizing, it allocates a memory
2312 location for each floating-point variable, and the memory
2313 location is what gets described in the DWARF AT_location
2314 attribute for the variable in question.
2315 Regardless of the severe mental illness of the x86/svr4 SDB, we
2316 do something sensible here and we use the following DWARF
2317 register numbers. Note that these are all stack-top-relative
2319 11 for %st(0) (gcc regno = 8)
2320 12 for %st(1) (gcc regno = 9)
2321 13 for %st(2) (gcc regno = 10)
2322 14 for %st(3) (gcc regno = 11)
2323 15 for %st(4) (gcc regno = 12)
2324 16 for %st(5) (gcc regno = 13)
2325 17 for %st(6) (gcc regno = 14)
2326 18 for %st(7) (gcc regno = 15)
2328 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2330 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2331 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2332 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2333 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2334 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2335 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2336 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2339 /* Define parameter passing and return registers. */
2341 static int const x86_64_int_parameter_registers[6] =
2343 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2346 static int const x86_64_ms_abi_int_parameter_registers[4] =
2348 CX_REG, DX_REG, R8_REG, R9_REG
2351 static int const x86_64_int_return_registers[4] =
2353 AX_REG, DX_REG, DI_REG, SI_REG
2356 /* Define the structure for the machine field in struct function. */
2358 struct GTY(()) stack_local_entry {
2359 unsigned short mode;
2362 struct stack_local_entry *next;
2365 /* Structure describing stack frame layout.
2366 Stack grows downward:
2372 saved static chain if ix86_static_chain_on_stack
2374 saved frame pointer if frame_pointer_needed
2375 <- HARD_FRAME_POINTER
2381 <- sse_regs_save_offset
2384 [va_arg registers] |
2388 [padding2] | = to_allocate
2397 int outgoing_arguments_size;
2398 HOST_WIDE_INT frame;
2400 /* The offsets relative to ARG_POINTER. */
2401 HOST_WIDE_INT frame_pointer_offset;
2402 HOST_WIDE_INT hard_frame_pointer_offset;
2403 HOST_WIDE_INT stack_pointer_offset;
2404 HOST_WIDE_INT hfp_save_offset;
2405 HOST_WIDE_INT reg_save_offset;
2406 HOST_WIDE_INT sse_reg_save_offset;
2408 /* When save_regs_using_mov is set, emit prologue using
2409 move instead of push instructions. */
2410 bool save_regs_using_mov;
2413 /* Which cpu are we scheduling for. */
2414 enum attr_cpu ix86_schedule;
2416 /* Which cpu are we optimizing for. */
2417 enum processor_type ix86_tune;
2419 /* Which instruction set architecture to use. */
2420 enum processor_type ix86_arch;
2422 /* true if sse prefetch instruction is not NOOP. */
2423 int x86_prefetch_sse;
2425 /* -mstackrealign option */
2426 static const char ix86_force_align_arg_pointer_string[]
2427 = "force_align_arg_pointer";
2429 static rtx (*ix86_gen_leave) (void);
2430 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2431 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2432 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2433 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2434 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2435 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2436 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2437 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2438 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2440 /* Preferred alignment for stack boundary in bits. */
2441 unsigned int ix86_preferred_stack_boundary;
2443 /* Alignment for incoming stack boundary in bits specified at
2445 static unsigned int ix86_user_incoming_stack_boundary;
2447 /* Default alignment for incoming stack boundary in bits. */
2448 static unsigned int ix86_default_incoming_stack_boundary;
2450 /* Alignment for incoming stack boundary in bits. */
2451 unsigned int ix86_incoming_stack_boundary;
2453 /* Calling abi specific va_list type nodes. */
2454 static GTY(()) tree sysv_va_list_type_node;
2455 static GTY(()) tree ms_va_list_type_node;
2457 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2458 char internal_label_prefix[16];
2459 int internal_label_prefix_len;
2461 /* Fence to use after loop using movnt. */
2464 /* Register class used for passing given 64bit part of the argument.
2465 These represent classes as documented by the PS ABI, with the exception
2466 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2467 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2469 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2470 whenever possible (upper half does contain padding). */
2471 enum x86_64_reg_class
2474 X86_64_INTEGER_CLASS,
2475 X86_64_INTEGERSI_CLASS,
2482 X86_64_COMPLEX_X87_CLASS,
2486 #define MAX_CLASSES 4
2488 /* Table of constants used by fldpi, fldln2, etc.... */
2489 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2490 static bool ext_80387_constants_init = 0;
2493 static struct machine_function * ix86_init_machine_status (void);
2494 static rtx ix86_function_value (const_tree, const_tree, bool);
2495 static bool ix86_function_value_regno_p (const unsigned int);
2496 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2498 static rtx ix86_static_chain (const_tree, bool);
2499 static int ix86_function_regparm (const_tree, const_tree);
2500 static void ix86_compute_frame_layout (struct ix86_frame *);
2501 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2503 static void ix86_add_new_builtins (HOST_WIDE_INT);
2504 static rtx ix86_expand_vec_perm_builtin (tree);
2505 static tree ix86_canonical_va_list_type (tree);
2506 static void predict_jump (int);
2507 static unsigned int split_stack_prologue_scratch_regno (void);
2508 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2510 enum ix86_function_specific_strings
2512 IX86_FUNCTION_SPECIFIC_ARCH,
2513 IX86_FUNCTION_SPECIFIC_TUNE,
2514 IX86_FUNCTION_SPECIFIC_MAX
2517 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2518 const char *, enum fpmath_unit, bool);
2519 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2520 static void ix86_function_specific_save (struct cl_target_option *);
2521 static void ix86_function_specific_restore (struct cl_target_option *);
2522 static void ix86_function_specific_print (FILE *, int,
2523 struct cl_target_option *);
2524 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2525 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2526 struct gcc_options *);
2527 static bool ix86_can_inline_p (tree, tree);
2528 static void ix86_set_current_function (tree);
2529 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2531 static enum calling_abi ix86_function_abi (const_tree);
2534 #ifndef SUBTARGET32_DEFAULT_CPU
2535 #define SUBTARGET32_DEFAULT_CPU "i386"
2538 /* The svr4 ABI for the i386 says that records and unions are returned
2540 #ifndef DEFAULT_PCC_STRUCT_RETURN
2541 #define DEFAULT_PCC_STRUCT_RETURN 1
2544 /* Whether -mtune= or -march= were specified */
2545 static int ix86_tune_defaulted;
2546 static int ix86_arch_specified;
2548 /* Vectorization library interface and handlers. */
2549 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2551 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2552 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2554 /* Processor target table, indexed by processor number */
2557 const struct processor_costs *cost; /* Processor costs */
2558 const int align_loop; /* Default alignments. */
2559 const int align_loop_max_skip;
2560 const int align_jump;
2561 const int align_jump_max_skip;
2562 const int align_func;
2565 static const struct ptt processor_target_table[PROCESSOR_max] =
2567 {&i386_cost, 4, 3, 4, 3, 4},
2568 {&i486_cost, 16, 15, 16, 15, 16},
2569 {&pentium_cost, 16, 7, 16, 7, 16},
2570 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2571 {&geode_cost, 0, 0, 0, 0, 0},
2572 {&k6_cost, 32, 7, 32, 7, 32},
2573 {&athlon_cost, 16, 7, 16, 7, 16},
2574 {&pentium4_cost, 0, 0, 0, 0, 0},
2575 {&k8_cost, 16, 7, 16, 7, 16},
2576 {&nocona_cost, 0, 0, 0, 0, 0},
2577 /* Core 2 32-bit. */
2578 {&generic32_cost, 16, 10, 16, 10, 16},
2579 /* Core 2 64-bit. */
2580 {&generic64_cost, 16, 10, 16, 10, 16},
2581 /* Core i7 32-bit. */
2582 {&generic32_cost, 16, 10, 16, 10, 16},
2583 /* Core i7 64-bit. */
2584 {&generic64_cost, 16, 10, 16, 10, 16},
2585 {&generic32_cost, 16, 7, 16, 7, 16},
2586 {&generic64_cost, 16, 10, 16, 10, 16},
2587 {&amdfam10_cost, 32, 24, 32, 7, 32},
2588 {&bdver1_cost, 32, 24, 32, 7, 32},
2589 {&bdver2_cost, 32, 24, 32, 7, 32},
2590 {&btver1_cost, 32, 24, 32, 7, 32},
2591 {&atom_cost, 16, 7, 16, 7, 16}
2594 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2624 /* Return true if a red-zone is in use. */
2627 ix86_using_red_zone (void)
2629 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2632 /* Return a string that documents the current -m options. The caller is
2633 responsible for freeing the string. */
2636 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2637 const char *tune, enum fpmath_unit fpmath,
2640 struct ix86_target_opts
2642 const char *option; /* option string */
2643 HOST_WIDE_INT mask; /* isa mask options */
2646 /* This table is ordered so that options like -msse4.2 that imply
2647 preceding options while match those first. */
2648 static struct ix86_target_opts isa_opts[] =
2650 { "-m64", OPTION_MASK_ISA_64BIT },
2651 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2652 { "-mfma", OPTION_MASK_ISA_FMA },
2653 { "-mxop", OPTION_MASK_ISA_XOP },
2654 { "-mlwp", OPTION_MASK_ISA_LWP },
2655 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2656 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2657 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2658 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2659 { "-msse3", OPTION_MASK_ISA_SSE3 },
2660 { "-msse2", OPTION_MASK_ISA_SSE2 },
2661 { "-msse", OPTION_MASK_ISA_SSE },
2662 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2663 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2664 { "-mmmx", OPTION_MASK_ISA_MMX },
2665 { "-mabm", OPTION_MASK_ISA_ABM },
2666 { "-mbmi", OPTION_MASK_ISA_BMI },
2667 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2668 { "-mtbm", OPTION_MASK_ISA_TBM },
2669 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2670 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2671 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2672 { "-maes", OPTION_MASK_ISA_AES },
2673 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2674 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2675 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2676 { "-mf16c", OPTION_MASK_ISA_F16C },
2680 static struct ix86_target_opts flag_opts[] =
2682 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2683 { "-m80387", MASK_80387 },
2684 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2685 { "-malign-double", MASK_ALIGN_DOUBLE },
2686 { "-mcld", MASK_CLD },
2687 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2688 { "-mieee-fp", MASK_IEEE_FP },
2689 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2690 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2691 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2692 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2693 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2694 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2695 { "-mno-red-zone", MASK_NO_RED_ZONE },
2696 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2697 { "-mrecip", MASK_RECIP },
2698 { "-mrtd", MASK_RTD },
2699 { "-msseregparm", MASK_SSEREGPARM },
2700 { "-mstack-arg-probe", MASK_STACK_PROBE },
2701 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2702 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2703 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2704 { "-mvzeroupper", MASK_VZEROUPPER },
2705 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2706 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2707 { "-mprefer-avx128", MASK_PREFER_AVX128},
2710 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2713 char target_other[40];
2722 memset (opts, '\0', sizeof (opts));
2724 /* Add -march= option. */
2727 opts[num][0] = "-march=";
2728 opts[num++][1] = arch;
2731 /* Add -mtune= option. */
2734 opts[num][0] = "-mtune=";
2735 opts[num++][1] = tune;
2738 /* Pick out the options in isa options. */
2739 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2741 if ((isa & isa_opts[i].mask) != 0)
2743 opts[num++][0] = isa_opts[i].option;
2744 isa &= ~ isa_opts[i].mask;
2748 if (isa && add_nl_p)
2750 opts[num++][0] = isa_other;
2751 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2755 /* Add flag options. */
2756 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2758 if ((flags & flag_opts[i].mask) != 0)
2760 opts[num++][0] = flag_opts[i].option;
2761 flags &= ~ flag_opts[i].mask;
2765 if (flags && add_nl_p)
2767 opts[num++][0] = target_other;
2768 sprintf (target_other, "(other flags: %#x)", flags);
2771 /* Add -fpmath= option. */
2774 opts[num][0] = "-mfpmath=";
2775 switch ((int) fpmath)
2778 opts[num++][1] = "387";
2782 opts[num++][1] = "sse";
2785 case FPMATH_387 | FPMATH_SSE:
2786 opts[num++][1] = "sse+387";
2798 gcc_assert (num < ARRAY_SIZE (opts));
2800 /* Size the string. */
2802 sep_len = (add_nl_p) ? 3 : 1;
2803 for (i = 0; i < num; i++)
2806 for (j = 0; j < 2; j++)
2808 len += strlen (opts[i][j]);
2811 /* Build the string. */
2812 ret = ptr = (char *) xmalloc (len);
2815 for (i = 0; i < num; i++)
2819 for (j = 0; j < 2; j++)
2820 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2827 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2835 for (j = 0; j < 2; j++)
2838 memcpy (ptr, opts[i][j], len2[j]);
2840 line_len += len2[j];
2845 gcc_assert (ret + len >= ptr);
2850 /* Return true, if profiling code should be emitted before
2851 prologue. Otherwise it returns false.
2852 Note: For x86 with "hotfix" it is sorried. */
2854 ix86_profile_before_prologue (void)
2856 return flag_fentry != 0;
2859 /* Function that is callable from the debugger to print the current
2862 ix86_debug_options (void)
2864 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2865 ix86_arch_string, ix86_tune_string,
2870 fprintf (stderr, "%s\n\n", opts);
2874 fputs ("<no options>\n\n", stderr);
2879 /* Override various settings based on options. If MAIN_ARGS_P, the
2880 options are from the command line, otherwise they are from
2884 ix86_option_override_internal (bool main_args_p)
2887 unsigned int ix86_arch_mask, ix86_tune_mask;
2888 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2893 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2894 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2895 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2896 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2897 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2898 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2899 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2900 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2901 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2902 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2903 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2904 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2905 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2906 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2907 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2908 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2909 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2910 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2911 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2912 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2913 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2914 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2915 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2916 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2917 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2918 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2919 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2920 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2921 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2922 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2923 /* if this reaches 64, need to widen struct pta flags below */
2927 const char *const name; /* processor name or nickname. */
2928 const enum processor_type processor;
2929 const enum attr_cpu schedule;
2930 const unsigned HOST_WIDE_INT flags;
2932 const processor_alias_table[] =
2934 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2935 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2936 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2937 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2938 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2939 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2940 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2941 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2942 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2943 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2944 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2945 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2946 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2948 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2950 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2951 PTA_MMX | PTA_SSE | PTA_SSE2},
2952 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2953 PTA_MMX |PTA_SSE | PTA_SSE2},
2954 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2955 PTA_MMX | PTA_SSE | PTA_SSE2},
2956 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2957 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2958 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2959 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2960 | PTA_CX16 | PTA_NO_SAHF},
2961 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
2962 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2963 | PTA_SSSE3 | PTA_CX16},
2964 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
2965 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2966 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
2967 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
2968 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2969 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2970 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
2971 {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
2972 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2973 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2974 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2975 | PTA_RDRND | PTA_F16C},
2976 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2977 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2978 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2979 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2980 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2981 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2982 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2983 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2984 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2985 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2986 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2987 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2988 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2989 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2990 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2991 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2992 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2993 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2994 {"x86-64", PROCESSOR_K8, CPU_K8,
2995 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2996 {"k8", PROCESSOR_K8, CPU_K8,
2997 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2998 | PTA_SSE2 | PTA_NO_SAHF},
2999 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3000 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3001 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3002 {"opteron", PROCESSOR_K8, CPU_K8,
3003 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3004 | PTA_SSE2 | PTA_NO_SAHF},
3005 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3006 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3007 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3008 {"athlon64", PROCESSOR_K8, CPU_K8,
3009 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3010 | PTA_SSE2 | PTA_NO_SAHF},
3011 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3012 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3013 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3014 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3015 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3016 | PTA_SSE2 | PTA_NO_SAHF},
3017 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3018 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3019 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3020 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3021 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3022 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3023 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3024 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3025 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3026 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3027 | PTA_XOP | PTA_LWP},
3028 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3029 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3030 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3031 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3032 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3034 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3035 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3036 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3037 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3038 0 /* flags are only used for -march switch. */ },
3039 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3040 PTA_64BIT /* flags are only used for -march switch. */ },
3043 int const pta_size = ARRAY_SIZE (processor_alias_table);
3045 /* Set up prefix/suffix so the error messages refer to either the command
3046 line argument, or the attribute(target). */
3055 prefix = "option(\"";
3060 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3061 SUBTARGET_OVERRIDE_OPTIONS;
3064 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3065 SUBSUBTARGET_OVERRIDE_OPTIONS;
3069 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3071 /* -fPIC is the default for x86_64. */
3072 if (TARGET_MACHO && TARGET_64BIT)
3075 /* Need to check -mtune=generic first. */
3076 if (ix86_tune_string)
3078 if (!strcmp (ix86_tune_string, "generic")
3079 || !strcmp (ix86_tune_string, "i686")
3080 /* As special support for cross compilers we read -mtune=native
3081 as -mtune=generic. With native compilers we won't see the
3082 -mtune=native, as it was changed by the driver. */
3083 || !strcmp (ix86_tune_string, "native"))
3086 ix86_tune_string = "generic64";
3088 ix86_tune_string = "generic32";
3090 /* If this call is for setting the option attribute, allow the
3091 generic32/generic64 that was previously set. */
3092 else if (!main_args_p
3093 && (!strcmp (ix86_tune_string, "generic32")
3094 || !strcmp (ix86_tune_string, "generic64")))
3096 else if (!strncmp (ix86_tune_string, "generic", 7))
3097 error ("bad value (%s) for %stune=%s %s",
3098 ix86_tune_string, prefix, suffix, sw);
3099 else if (!strcmp (ix86_tune_string, "x86-64"))
3100 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3101 "%stune=k8%s or %stune=generic%s instead as appropriate",
3102 prefix, suffix, prefix, suffix, prefix, suffix);
3106 if (ix86_arch_string)
3107 ix86_tune_string = ix86_arch_string;
3108 if (!ix86_tune_string)
3110 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3111 ix86_tune_defaulted = 1;
3114 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3115 need to use a sensible tune option. */
3116 if (!strcmp (ix86_tune_string, "generic")
3117 || !strcmp (ix86_tune_string, "x86-64")
3118 || !strcmp (ix86_tune_string, "i686"))
3121 ix86_tune_string = "generic64";
3123 ix86_tune_string = "generic32";
3127 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3129 /* rep; movq isn't available in 32-bit code. */
3130 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3131 ix86_stringop_alg = no_stringop;
3134 if (!ix86_arch_string)
3135 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3137 ix86_arch_specified = 1;
3139 if (!global_options_set.x_ix86_abi)
3140 ix86_abi = DEFAULT_ABI;
3142 if (global_options_set.x_ix86_cmodel)
3144 switch (ix86_cmodel)
3149 ix86_cmodel = CM_SMALL_PIC;
3151 error ("code model %qs not supported in the %s bit mode",
3158 ix86_cmodel = CM_MEDIUM_PIC;
3160 error ("code model %qs not supported in the %s bit mode",
3162 else if (TARGET_X32)
3163 error ("code model %qs not supported in x32 mode",
3170 ix86_cmodel = CM_LARGE_PIC;
3172 error ("code model %qs not supported in the %s bit mode",
3174 else if (TARGET_X32)
3175 error ("code model %qs not supported in x32 mode",
3181 error ("code model %s does not support PIC mode", "32");
3183 error ("code model %qs not supported in the %s bit mode",
3190 error ("code model %s does not support PIC mode", "kernel");
3191 ix86_cmodel = CM_32;
3194 error ("code model %qs not supported in the %s bit mode",
3204 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3205 use of rip-relative addressing. This eliminates fixups that
3206 would otherwise be needed if this object is to be placed in a
3207 DLL, and is essentially just as efficient as direct addressing. */
3208 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3209 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3210 else if (TARGET_64BIT)
3211 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3213 ix86_cmodel = CM_32;
3215 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3217 error ("-masm=intel not supported in this configuration");
3218 ix86_asm_dialect = ASM_ATT;
3220 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3221 sorry ("%i-bit mode not compiled in",
3222 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3224 for (i = 0; i < pta_size; i++)
3225 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3227 ix86_schedule = processor_alias_table[i].schedule;
3228 ix86_arch = processor_alias_table[i].processor;
3229 /* Default cpu tuning to the architecture. */
3230 ix86_tune = ix86_arch;
3232 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3233 error ("CPU you selected does not support x86-64 "
3236 if (processor_alias_table[i].flags & PTA_MMX
3237 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3238 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3239 if (processor_alias_table[i].flags & PTA_3DNOW
3240 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3241 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3242 if (processor_alias_table[i].flags & PTA_3DNOW_A
3243 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3244 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3245 if (processor_alias_table[i].flags & PTA_SSE
3246 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3247 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3248 if (processor_alias_table[i].flags & PTA_SSE2
3249 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3250 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3251 if (processor_alias_table[i].flags & PTA_SSE3
3252 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3253 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3254 if (processor_alias_table[i].flags & PTA_SSSE3
3255 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3256 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3257 if (processor_alias_table[i].flags & PTA_SSE4_1
3258 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3259 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3260 if (processor_alias_table[i].flags & PTA_SSE4_2
3261 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3262 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3263 if (processor_alias_table[i].flags & PTA_AVX
3264 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3265 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3266 if (processor_alias_table[i].flags & PTA_FMA
3267 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3268 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3269 if (processor_alias_table[i].flags & PTA_SSE4A
3270 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3271 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3272 if (processor_alias_table[i].flags & PTA_FMA4
3273 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3274 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3275 if (processor_alias_table[i].flags & PTA_XOP
3276 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3277 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3278 if (processor_alias_table[i].flags & PTA_LWP
3279 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3280 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3281 if (processor_alias_table[i].flags & PTA_ABM
3282 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3283 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3284 if (processor_alias_table[i].flags & PTA_BMI
3285 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3286 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3287 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3288 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3289 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3290 if (processor_alias_table[i].flags & PTA_TBM
3291 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3292 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3293 if (processor_alias_table[i].flags & PTA_CX16
3294 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3295 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3296 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3297 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3298 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3299 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3300 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3301 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3302 if (processor_alias_table[i].flags & PTA_MOVBE
3303 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3304 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3305 if (processor_alias_table[i].flags & PTA_AES
3306 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3307 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3308 if (processor_alias_table[i].flags & PTA_PCLMUL
3309 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3310 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3311 if (processor_alias_table[i].flags & PTA_FSGSBASE
3312 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3313 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3314 if (processor_alias_table[i].flags & PTA_RDRND
3315 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3316 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3317 if (processor_alias_table[i].flags & PTA_F16C
3318 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3319 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3320 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3321 x86_prefetch_sse = true;
3326 if (!strcmp (ix86_arch_string, "generic"))
3327 error ("generic CPU can be used only for %stune=%s %s",
3328 prefix, suffix, sw);
3329 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3330 error ("bad value (%s) for %sarch=%s %s",
3331 ix86_arch_string, prefix, suffix, sw);
3333 ix86_arch_mask = 1u << ix86_arch;
3334 for (i = 0; i < X86_ARCH_LAST; ++i)
3335 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3337 for (i = 0; i < pta_size; i++)
3338 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3340 ix86_schedule = processor_alias_table[i].schedule;
3341 ix86_tune = processor_alias_table[i].processor;
3344 if (!(processor_alias_table[i].flags & PTA_64BIT))
3346 if (ix86_tune_defaulted)
3348 ix86_tune_string = "x86-64";
3349 for (i = 0; i < pta_size; i++)
3350 if (! strcmp (ix86_tune_string,
3351 processor_alias_table[i].name))
3353 ix86_schedule = processor_alias_table[i].schedule;
3354 ix86_tune = processor_alias_table[i].processor;
3357 error ("CPU you selected does not support x86-64 "
3363 /* Adjust tuning when compiling for 32-bit ABI. */
3366 case PROCESSOR_GENERIC64:
3367 ix86_tune = PROCESSOR_GENERIC32;
3368 ix86_schedule = CPU_PENTIUMPRO;
3371 case PROCESSOR_CORE2_64:
3372 ix86_tune = PROCESSOR_CORE2_32;
3375 case PROCESSOR_COREI7_64:
3376 ix86_tune = PROCESSOR_COREI7_32;
3383 /* Intel CPUs have always interpreted SSE prefetch instructions as
3384 NOPs; so, we can enable SSE prefetch instructions even when
3385 -mtune (rather than -march) points us to a processor that has them.
3386 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3387 higher processors. */
3389 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3390 x86_prefetch_sse = true;
3394 if (ix86_tune_specified && i == pta_size)
3395 error ("bad value (%s) for %stune=%s %s",
3396 ix86_tune_string, prefix, suffix, sw);
3398 ix86_tune_mask = 1u << ix86_tune;
3399 for (i = 0; i < X86_TUNE_LAST; ++i)
3400 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3402 #ifndef USE_IX86_FRAME_POINTER
3403 #define USE_IX86_FRAME_POINTER 0
3406 #ifndef USE_X86_64_FRAME_POINTER
3407 #define USE_X86_64_FRAME_POINTER 0
3410 /* Set the default values for switches whose default depends on TARGET_64BIT
3411 in case they weren't overwritten by command line options. */
3414 if (optimize > 1 && !global_options_set.x_flag_zee)
3416 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3417 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3418 if (flag_asynchronous_unwind_tables == 2)
3419 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3420 if (flag_pcc_struct_return == 2)
3421 flag_pcc_struct_return = 0;
3425 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3426 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3427 if (flag_asynchronous_unwind_tables == 2)
3428 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3429 if (flag_pcc_struct_return == 2)
3430 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3434 ix86_cost = &ix86_size_cost;
3436 ix86_cost = processor_target_table[ix86_tune].cost;
3438 /* Arrange to set up i386_stack_locals for all functions. */
3439 init_machine_status = ix86_init_machine_status;
3441 /* Validate -mregparm= value. */
3442 if (global_options_set.x_ix86_regparm)
3445 warning (0, "-mregparm is ignored in 64-bit mode");
3446 if (ix86_regparm > REGPARM_MAX)
3448 error ("-mregparm=%d is not between 0 and %d",
3449 ix86_regparm, REGPARM_MAX);
3454 ix86_regparm = REGPARM_MAX;
3456 /* Default align_* from the processor table. */
3457 if (align_loops == 0)
3459 align_loops = processor_target_table[ix86_tune].align_loop;
3460 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3462 if (align_jumps == 0)
3464 align_jumps = processor_target_table[ix86_tune].align_jump;
3465 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3467 if (align_functions == 0)
3469 align_functions = processor_target_table[ix86_tune].align_func;
3472 /* Provide default for -mbranch-cost= value. */
3473 if (!global_options_set.x_ix86_branch_cost)
3474 ix86_branch_cost = ix86_cost->branch_cost;
3478 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3480 /* Enable by default the SSE and MMX builtins. Do allow the user to
3481 explicitly disable any of these. In particular, disabling SSE and
3482 MMX for kernel code is extremely useful. */
3483 if (!ix86_arch_specified)
3485 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3486 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3489 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3493 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3495 if (!ix86_arch_specified)
3497 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3499 /* i386 ABI does not specify red zone. It still makes sense to use it
3500 when programmer takes care to stack from being destroyed. */
3501 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3502 target_flags |= MASK_NO_RED_ZONE;
3505 /* Keep nonleaf frame pointers. */
3506 if (flag_omit_frame_pointer)
3507 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3508 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3509 flag_omit_frame_pointer = 1;
3511 /* If we're doing fast math, we don't care about comparison order
3512 wrt NaNs. This lets us use a shorter comparison sequence. */
3513 if (flag_finite_math_only)
3514 target_flags &= ~MASK_IEEE_FP;
3516 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3517 since the insns won't need emulation. */
3518 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3519 target_flags &= ~MASK_NO_FANCY_MATH_387;
3521 /* Likewise, if the target doesn't have a 387, or we've specified
3522 software floating point, don't use 387 inline intrinsics. */
3524 target_flags |= MASK_NO_FANCY_MATH_387;
3526 /* Turn on MMX builtins for -msse. */
3529 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3530 x86_prefetch_sse = true;
3533 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3534 if (TARGET_SSE4_2 || TARGET_ABM)
3535 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3537 /* Turn on lzcnt instruction for -mabm. */
3539 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
3541 /* Validate -mpreferred-stack-boundary= value or default it to
3542 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3543 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3544 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3546 int min = (TARGET_64BIT ? 4 : 2);
3547 int max = (TARGET_SEH ? 4 : 12);
3549 if (ix86_preferred_stack_boundary_arg < min
3550 || ix86_preferred_stack_boundary_arg > max)
3553 error ("-mpreferred-stack-boundary is not supported "
3556 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3557 ix86_preferred_stack_boundary_arg, min, max);
3560 ix86_preferred_stack_boundary
3561 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3564 /* Set the default value for -mstackrealign. */
3565 if (ix86_force_align_arg_pointer == -1)
3566 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3568 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3570 /* Validate -mincoming-stack-boundary= value or default it to
3571 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3572 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3573 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3575 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3576 || ix86_incoming_stack_boundary_arg > 12)
3577 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3578 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3581 ix86_user_incoming_stack_boundary
3582 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3583 ix86_incoming_stack_boundary
3584 = ix86_user_incoming_stack_boundary;
3588 /* Accept -msseregparm only if at least SSE support is enabled. */
3589 if (TARGET_SSEREGPARM
3591 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3593 if (global_options_set.x_ix86_fpmath)
3595 if (ix86_fpmath & FPMATH_SSE)
3599 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3600 ix86_fpmath = FPMATH_387;
3602 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3604 warning (0, "387 instruction set disabled, using SSE arithmetics");
3605 ix86_fpmath = FPMATH_SSE;
3610 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3612 /* If the i387 is disabled, then do not return values in it. */
3614 target_flags &= ~MASK_FLOAT_RETURNS;
3616 /* Use external vectorized library in vectorizing intrinsics. */
3617 if (global_options_set.x_ix86_veclibabi_type)
3618 switch (ix86_veclibabi_type)
3620 case ix86_veclibabi_type_svml:
3621 ix86_veclib_handler = ix86_veclibabi_svml;
3624 case ix86_veclibabi_type_acml:
3625 ix86_veclib_handler = ix86_veclibabi_acml;
3632 if ((!USE_IX86_FRAME_POINTER
3633 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3634 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3636 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3638 /* ??? Unwind info is not correct around the CFG unless either a frame
3639 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3640 unwind info generation to be aware of the CFG and propagating states
3642 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3643 || flag_exceptions || flag_non_call_exceptions)
3644 && flag_omit_frame_pointer
3645 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3647 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3648 warning (0, "unwind tables currently require either a frame pointer "
3649 "or %saccumulate-outgoing-args%s for correctness",
3651 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3654 /* If stack probes are required, the space used for large function
3655 arguments on the stack must also be probed, so enable
3656 -maccumulate-outgoing-args so this happens in the prologue. */
3657 if (TARGET_STACK_PROBE
3658 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3660 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3661 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3662 "for correctness", prefix, suffix);
3663 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3666 /* For sane SSE instruction set generation we need fcomi instruction.
3667 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3668 expands to a sequence that includes conditional move. */
3669 if (TARGET_SSE || TARGET_RDRND)
3672 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3675 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3676 p = strchr (internal_label_prefix, 'X');
3677 internal_label_prefix_len = p - internal_label_prefix;
3681 /* When scheduling description is not available, disable scheduler pass
3682 so it won't slow down the compilation and make x87 code slower. */
3683 if (!TARGET_SCHEDULE)
3684 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3686 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3687 ix86_cost->simultaneous_prefetches,
3688 global_options.x_param_values,
3689 global_options_set.x_param_values);
3690 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3691 global_options.x_param_values,
3692 global_options_set.x_param_values);
3693 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3694 global_options.x_param_values,
3695 global_options_set.x_param_values);
3696 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3697 global_options.x_param_values,
3698 global_options_set.x_param_values);
3700 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3701 if (flag_prefetch_loop_arrays < 0
3704 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3705 flag_prefetch_loop_arrays = 1;
3707 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3708 can be optimized to ap = __builtin_next_arg (0). */
3709 if (!TARGET_64BIT && !flag_split_stack)
3710 targetm.expand_builtin_va_start = NULL;
3714 ix86_gen_leave = gen_leave_rex64;
3715 ix86_gen_add3 = gen_adddi3;
3716 ix86_gen_sub3 = gen_subdi3;
3717 ix86_gen_sub3_carry = gen_subdi3_carry;
3718 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3719 ix86_gen_monitor = gen_sse3_monitor64;
3720 ix86_gen_andsp = gen_anddi3;
3721 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3722 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3723 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3727 ix86_gen_leave = gen_leave;
3728 ix86_gen_add3 = gen_addsi3;
3729 ix86_gen_sub3 = gen_subsi3;
3730 ix86_gen_sub3_carry = gen_subsi3_carry;
3731 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3732 ix86_gen_monitor = gen_sse3_monitor;
3733 ix86_gen_andsp = gen_andsi3;
3734 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3735 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3736 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3740 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3742 target_flags |= MASK_CLD & ~target_flags_explicit;
3745 if (!TARGET_64BIT && flag_pic)
3747 if (flag_fentry > 0)
3748 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3752 else if (TARGET_SEH)
3754 if (flag_fentry == 0)
3755 sorry ("-mno-fentry isn%'t compatible with SEH");
3758 else if (flag_fentry < 0)
3760 #if defined(PROFILE_BEFORE_PROLOGUE)
3769 /* When not optimize for size, enable vzeroupper optimization for
3770 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3771 AVX unaligned load/store. */
3774 if (flag_expensive_optimizations
3775 && !(target_flags_explicit & MASK_VZEROUPPER))
3776 target_flags |= MASK_VZEROUPPER;
3777 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
3778 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3779 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3780 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
3781 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3782 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3783 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3784 if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
3785 target_flags |= MASK_PREFER_AVX128;
3790 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3791 target_flags &= ~MASK_VZEROUPPER;
3794 /* Save the initial options in case the user does function specific
3797 target_option_default_node = target_option_current_node
3798 = build_target_option_node ();
3801 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3804 function_pass_avx256_p (const_rtx val)
3809 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
3812 if (GET_CODE (val) == PARALLEL)
3817 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
3819 r = XVECEXP (val, 0, i);
3820 if (GET_CODE (r) == EXPR_LIST
3822 && REG_P (XEXP (r, 0))
3823 && (GET_MODE (XEXP (r, 0)) == OImode
3824 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
3832 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3835 ix86_option_override (void)
3837 ix86_option_override_internal (true);
3840 /* Update register usage after having seen the compiler flags. */
3843 ix86_conditional_register_usage (void)
3848 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3850 if (fixed_regs[i] > 1)
3851 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3852 if (call_used_regs[i] > 1)
3853 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3856 /* The PIC register, if it exists, is fixed. */
3857 j = PIC_OFFSET_TABLE_REGNUM;
3858 if (j != INVALID_REGNUM)
3859 fixed_regs[j] = call_used_regs[j] = 1;
3861 /* The 64-bit MS_ABI changes the set of call-used registers. */
3862 if (TARGET_64BIT_MS_ABI)
3864 call_used_regs[SI_REG] = 0;
3865 call_used_regs[DI_REG] = 0;
3866 call_used_regs[XMM6_REG] = 0;
3867 call_used_regs[XMM7_REG] = 0;
3868 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3869 call_used_regs[i] = 0;
3872 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3873 other call-clobbered regs for 64-bit. */
3876 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3878 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3879 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3880 && call_used_regs[i])
3881 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3884 /* If MMX is disabled, squash the registers. */
3886 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3887 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3888 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3890 /* If SSE is disabled, squash the registers. */
3892 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3893 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3894 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3896 /* If the FPU is disabled, squash the registers. */
3897 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3898 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3899 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3900 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3902 /* If 32-bit, squash the 64-bit registers. */
3905 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3907 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3913 /* Save the current options */
3916 ix86_function_specific_save (struct cl_target_option *ptr)
3918 ptr->arch = ix86_arch;
3919 ptr->schedule = ix86_schedule;
3920 ptr->tune = ix86_tune;
3921 ptr->branch_cost = ix86_branch_cost;
3922 ptr->tune_defaulted = ix86_tune_defaulted;
3923 ptr->arch_specified = ix86_arch_specified;
3924 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3925 ptr->ix86_target_flags_explicit = target_flags_explicit;
3927 /* The fields are char but the variables are not; make sure the
3928 values fit in the fields. */
3929 gcc_assert (ptr->arch == ix86_arch);
3930 gcc_assert (ptr->schedule == ix86_schedule);
3931 gcc_assert (ptr->tune == ix86_tune);
3932 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3935 /* Restore the current options */
3938 ix86_function_specific_restore (struct cl_target_option *ptr)
3940 enum processor_type old_tune = ix86_tune;
3941 enum processor_type old_arch = ix86_arch;
3942 unsigned int ix86_arch_mask, ix86_tune_mask;
3945 ix86_arch = (enum processor_type) ptr->arch;
3946 ix86_schedule = (enum attr_cpu) ptr->schedule;
3947 ix86_tune = (enum processor_type) ptr->tune;
3948 ix86_branch_cost = ptr->branch_cost;
3949 ix86_tune_defaulted = ptr->tune_defaulted;
3950 ix86_arch_specified = ptr->arch_specified;
3951 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
3952 target_flags_explicit = ptr->ix86_target_flags_explicit;
3954 /* Recreate the arch feature tests if the arch changed */
3955 if (old_arch != ix86_arch)
3957 ix86_arch_mask = 1u << ix86_arch;
3958 for (i = 0; i < X86_ARCH_LAST; ++i)
3959 ix86_arch_features[i]
3960 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3963 /* Recreate the tune optimization tests */
3964 if (old_tune != ix86_tune)
3966 ix86_tune_mask = 1u << ix86_tune;
3967 for (i = 0; i < X86_TUNE_LAST; ++i)
3968 ix86_tune_features[i]
3969 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3973 /* Print the current options */
3976 ix86_function_specific_print (FILE *file, int indent,
3977 struct cl_target_option *ptr)
3980 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
3981 NULL, NULL, ptr->x_ix86_fpmath, false);
3983 fprintf (file, "%*sarch = %d (%s)\n",
3986 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3987 ? cpu_names[ptr->arch]
3990 fprintf (file, "%*stune = %d (%s)\n",
3993 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3994 ? cpu_names[ptr->tune]
3997 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4001 fprintf (file, "%*s%s\n", indent, "", target_string);
4002 free (target_string);
4007 /* Inner function to process the attribute((target(...))), take an argument and
4008 set the current options from the argument. If we have a list, recursively go
4012 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4013 struct gcc_options *enum_opts_set)
4018 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4019 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4020 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4021 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4022 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4038 enum ix86_opt_type type;
4043 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4044 IX86_ATTR_ISA ("abm", OPT_mabm),
4045 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4046 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4047 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4048 IX86_ATTR_ISA ("aes", OPT_maes),
4049 IX86_ATTR_ISA ("avx", OPT_mavx),
4050 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4051 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4052 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4053 IX86_ATTR_ISA ("sse", OPT_msse),
4054 IX86_ATTR_ISA ("sse2", OPT_msse2),
4055 IX86_ATTR_ISA ("sse3", OPT_msse3),
4056 IX86_ATTR_ISA ("sse4", OPT_msse4),
4057 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4058 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4059 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4060 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4061 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4062 IX86_ATTR_ISA ("xop", OPT_mxop),
4063 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4064 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4065 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4066 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4069 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4071 /* string options */
4072 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4073 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4076 IX86_ATTR_YES ("cld",
4080 IX86_ATTR_NO ("fancy-math-387",
4081 OPT_mfancy_math_387,
4082 MASK_NO_FANCY_MATH_387),
4084 IX86_ATTR_YES ("ieee-fp",
4088 IX86_ATTR_YES ("inline-all-stringops",
4089 OPT_minline_all_stringops,
4090 MASK_INLINE_ALL_STRINGOPS),
4092 IX86_ATTR_YES ("inline-stringops-dynamically",
4093 OPT_minline_stringops_dynamically,
4094 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4096 IX86_ATTR_NO ("align-stringops",
4097 OPT_mno_align_stringops,
4098 MASK_NO_ALIGN_STRINGOPS),
4100 IX86_ATTR_YES ("recip",
4106 /* If this is a list, recurse to get the options. */
4107 if (TREE_CODE (args) == TREE_LIST)
4111 for (; args; args = TREE_CHAIN (args))
4112 if (TREE_VALUE (args)
4113 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4114 p_strings, enum_opts_set))
4120 else if (TREE_CODE (args) != STRING_CST)
4123 /* Handle multiple arguments separated by commas. */
4124 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4126 while (next_optstr && *next_optstr != '\0')
4128 char *p = next_optstr;
4130 char *comma = strchr (next_optstr, ',');
4131 const char *opt_string;
4132 size_t len, opt_len;
4137 enum ix86_opt_type type = ix86_opt_unknown;
4143 len = comma - next_optstr;
4144 next_optstr = comma + 1;
4152 /* Recognize no-xxx. */
4153 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4162 /* Find the option. */
4165 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4167 type = attrs[i].type;
4168 opt_len = attrs[i].len;
4169 if (ch == attrs[i].string[0]
4170 && ((type != ix86_opt_str && type != ix86_opt_enum)
4173 && memcmp (p, attrs[i].string, opt_len) == 0)
4176 mask = attrs[i].mask;
4177 opt_string = attrs[i].string;
4182 /* Process the option. */
4185 error ("attribute(target(\"%s\")) is unknown", orig_p);
4189 else if (type == ix86_opt_isa)
4191 struct cl_decoded_option decoded;
4193 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4194 ix86_handle_option (&global_options, &global_options_set,
4195 &decoded, input_location);
4198 else if (type == ix86_opt_yes || type == ix86_opt_no)
4200 if (type == ix86_opt_no)
4201 opt_set_p = !opt_set_p;
4204 target_flags |= mask;
4206 target_flags &= ~mask;
4209 else if (type == ix86_opt_str)
4213 error ("option(\"%s\") was already specified", opt_string);
4217 p_strings[opt] = xstrdup (p + opt_len);
4220 else if (type == ix86_opt_enum)
4225 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4227 set_option (&global_options, enum_opts_set, opt, value,
4228 p + opt_len, DK_UNSPECIFIED, input_location,
4232 error ("attribute(target(\"%s\")) is unknown", orig_p);
4244 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4247 ix86_valid_target_attribute_tree (tree args)
4249 const char *orig_arch_string = ix86_arch_string;
4250 const char *orig_tune_string = ix86_tune_string;
4251 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4252 int orig_tune_defaulted = ix86_tune_defaulted;
4253 int orig_arch_specified = ix86_arch_specified;
4254 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4257 struct cl_target_option *def
4258 = TREE_TARGET_OPTION (target_option_default_node);
4259 struct gcc_options enum_opts_set;
4261 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4263 /* Process each of the options on the chain. */
4264 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4268 /* If the changed options are different from the default, rerun
4269 ix86_option_override_internal, and then save the options away.
4270 The string options are are attribute options, and will be undone
4271 when we copy the save structure. */
4272 if (ix86_isa_flags != def->x_ix86_isa_flags
4273 || target_flags != def->x_target_flags
4274 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4275 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4276 || enum_opts_set.x_ix86_fpmath)
4278 /* If we are using the default tune= or arch=, undo the string assigned,
4279 and use the default. */
4280 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4281 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4282 else if (!orig_arch_specified)
4283 ix86_arch_string = NULL;
4285 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4286 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4287 else if (orig_tune_defaulted)
4288 ix86_tune_string = NULL;
4290 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4291 if (enum_opts_set.x_ix86_fpmath)
4292 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4293 else if (!TARGET_64BIT && TARGET_SSE)
4295 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4296 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4299 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4300 ix86_option_override_internal (false);
4302 /* Add any builtin functions with the new isa if any. */
4303 ix86_add_new_builtins (ix86_isa_flags);
4305 /* Save the current options unless we are validating options for
4307 t = build_target_option_node ();
4309 ix86_arch_string = orig_arch_string;
4310 ix86_tune_string = orig_tune_string;
4311 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4313 /* Free up memory allocated to hold the strings */
4314 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4315 free (option_strings[i]);
4321 /* Hook to validate attribute((target("string"))). */
4324 ix86_valid_target_attribute_p (tree fndecl,
4325 tree ARG_UNUSED (name),
4327 int ARG_UNUSED (flags))
4329 struct cl_target_option cur_target;
4331 tree old_optimize = build_optimization_node ();
4332 tree new_target, new_optimize;
4333 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4335 /* If the function changed the optimization levels as well as setting target
4336 options, start with the optimizations specified. */
4337 if (func_optimize && func_optimize != old_optimize)
4338 cl_optimization_restore (&global_options,
4339 TREE_OPTIMIZATION (func_optimize));
4341 /* The target attributes may also change some optimization flags, so update
4342 the optimization options if necessary. */
4343 cl_target_option_save (&cur_target, &global_options);
4344 new_target = ix86_valid_target_attribute_tree (args);
4345 new_optimize = build_optimization_node ();
4352 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4354 if (old_optimize != new_optimize)
4355 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4358 cl_target_option_restore (&global_options, &cur_target);
4360 if (old_optimize != new_optimize)
4361 cl_optimization_restore (&global_options,
4362 TREE_OPTIMIZATION (old_optimize));
4368 /* Hook to determine if one function can safely inline another. */
4371 ix86_can_inline_p (tree caller, tree callee)
4374 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4375 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4377 /* If callee has no option attributes, then it is ok to inline. */
4381 /* If caller has no option attributes, but callee does then it is not ok to
4383 else if (!caller_tree)
4388 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4389 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4391 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4392 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4394 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4395 != callee_opts->x_ix86_isa_flags)
4398 /* See if we have the same non-isa options. */
4399 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4402 /* See if arch, tune, etc. are the same. */
4403 else if (caller_opts->arch != callee_opts->arch)
4406 else if (caller_opts->tune != callee_opts->tune)
4409 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4412 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4423 /* Remember the last target of ix86_set_current_function. */
4424 static GTY(()) tree ix86_previous_fndecl;
4426 /* Establish appropriate back-end context for processing the function
4427 FNDECL. The argument might be NULL to indicate processing at top
4428 level, outside of any function scope. */
4430 ix86_set_current_function (tree fndecl)
4432 /* Only change the context if the function changes. This hook is called
4433 several times in the course of compiling a function, and we don't want to
4434 slow things down too much or call target_reinit when it isn't safe. */
4435 if (fndecl && fndecl != ix86_previous_fndecl)
4437 tree old_tree = (ix86_previous_fndecl
4438 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4441 tree new_tree = (fndecl
4442 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4445 ix86_previous_fndecl = fndecl;
4446 if (old_tree == new_tree)
4451 cl_target_option_restore (&global_options,
4452 TREE_TARGET_OPTION (new_tree));
4458 struct cl_target_option *def
4459 = TREE_TARGET_OPTION (target_option_current_node);
4461 cl_target_option_restore (&global_options, def);
4468 /* Return true if this goes in large data/bss. */
4471 ix86_in_large_data_p (tree exp)
4473 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4476 /* Functions are never large data. */
4477 if (TREE_CODE (exp) == FUNCTION_DECL)
4480 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4482 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4483 if (strcmp (section, ".ldata") == 0
4484 || strcmp (section, ".lbss") == 0)
4490 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4492 /* If this is an incomplete type with size 0, then we can't put it
4493 in data because it might be too big when completed. */
4494 if (!size || size > ix86_section_threshold)
4501 /* Switch to the appropriate section for output of DECL.
4502 DECL is either a `VAR_DECL' node or a constant of some sort.
4503 RELOC indicates whether forming the initial value of DECL requires
4504 link-time relocations. */
4506 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4510 x86_64_elf_select_section (tree decl, int reloc,
4511 unsigned HOST_WIDE_INT align)
4513 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4514 && ix86_in_large_data_p (decl))
4516 const char *sname = NULL;
4517 unsigned int flags = SECTION_WRITE;
4518 switch (categorize_decl_for_section (decl, reloc))
4523 case SECCAT_DATA_REL:
4524 sname = ".ldata.rel";
4526 case SECCAT_DATA_REL_LOCAL:
4527 sname = ".ldata.rel.local";
4529 case SECCAT_DATA_REL_RO:
4530 sname = ".ldata.rel.ro";
4532 case SECCAT_DATA_REL_RO_LOCAL:
4533 sname = ".ldata.rel.ro.local";
4537 flags |= SECTION_BSS;
4540 case SECCAT_RODATA_MERGE_STR:
4541 case SECCAT_RODATA_MERGE_STR_INIT:
4542 case SECCAT_RODATA_MERGE_CONST:
4546 case SECCAT_SRODATA:
4553 /* We don't split these for medium model. Place them into
4554 default sections and hope for best. */
4559 /* We might get called with string constants, but get_named_section
4560 doesn't like them as they are not DECLs. Also, we need to set
4561 flags in that case. */
4563 return get_section (sname, flags, NULL);
4564 return get_named_section (decl, sname, reloc);
4567 return default_elf_select_section (decl, reloc, align);
4570 /* Build up a unique section name, expressed as a
4571 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4572 RELOC indicates whether the initial value of EXP requires
4573 link-time relocations. */
4575 static void ATTRIBUTE_UNUSED
4576 x86_64_elf_unique_section (tree decl, int reloc)
4578 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4579 && ix86_in_large_data_p (decl))
4581 const char *prefix = NULL;
4582 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4583 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4585 switch (categorize_decl_for_section (decl, reloc))
4588 case SECCAT_DATA_REL:
4589 case SECCAT_DATA_REL_LOCAL:
4590 case SECCAT_DATA_REL_RO:
4591 case SECCAT_DATA_REL_RO_LOCAL:
4592 prefix = one_only ? ".ld" : ".ldata";
4595 prefix = one_only ? ".lb" : ".lbss";
4598 case SECCAT_RODATA_MERGE_STR:
4599 case SECCAT_RODATA_MERGE_STR_INIT:
4600 case SECCAT_RODATA_MERGE_CONST:
4601 prefix = one_only ? ".lr" : ".lrodata";
4603 case SECCAT_SRODATA:
4610 /* We don't split these for medium model. Place them into
4611 default sections and hope for best. */
4616 const char *name, *linkonce;
4619 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4620 name = targetm.strip_name_encoding (name);
4622 /* If we're using one_only, then there needs to be a .gnu.linkonce
4623 prefix to the section name. */
4624 linkonce = one_only ? ".gnu.linkonce" : "";
4626 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4628 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4632 default_unique_section (decl, reloc);
4635 #ifdef COMMON_ASM_OP
4636 /* This says how to output assembler code to declare an
4637 uninitialized external linkage data object.
4639 For medium model x86-64 we need to use .largecomm opcode for
4642 x86_elf_aligned_common (FILE *file,
4643 const char *name, unsigned HOST_WIDE_INT size,
4646 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4647 && size > (unsigned int)ix86_section_threshold)
4648 fputs (".largecomm\t", file);
4650 fputs (COMMON_ASM_OP, file);
4651 assemble_name (file, name);
4652 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4653 size, align / BITS_PER_UNIT);
4657 /* Utility function for targets to use in implementing
4658 ASM_OUTPUT_ALIGNED_BSS. */
4661 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4662 const char *name, unsigned HOST_WIDE_INT size,
4665 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4666 && size > (unsigned int)ix86_section_threshold)
4667 switch_to_section (get_named_section (decl, ".lbss", 0));
4669 switch_to_section (bss_section);
4670 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4671 #ifdef ASM_DECLARE_OBJECT_NAME
4672 last_assemble_variable_decl = decl;
4673 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4675 /* Standard thing is just output label for the object. */
4676 ASM_OUTPUT_LABEL (file, name);
4677 #endif /* ASM_DECLARE_OBJECT_NAME */
4678 ASM_OUTPUT_SKIP (file, size ? size : 1);
4681 /* Decide whether we must probe the stack before any space allocation
4682 on this target. It's essentially TARGET_STACK_PROBE except when
4683 -fstack-check causes the stack to be already probed differently. */
4686 ix86_target_stack_probe (void)
4688 /* Do not probe the stack twice if static stack checking is enabled. */
4689 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4692 return TARGET_STACK_PROBE;
4695 /* Decide whether we can make a sibling call to a function. DECL is the
4696 declaration of the function being targeted by the call and EXP is the
4697 CALL_EXPR representing the call. */
4700 ix86_function_ok_for_sibcall (tree decl, tree exp)
4702 tree type, decl_or_type;
4705 /* If we are generating position-independent code, we cannot sibcall
4706 optimize any indirect call, or a direct call to a global function,
4707 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4711 && (!decl || !targetm.binds_local_p (decl)))
4714 /* If we need to align the outgoing stack, then sibcalling would
4715 unalign the stack, which may break the called function. */
4716 if (ix86_minimum_incoming_stack_boundary (true)
4717 < PREFERRED_STACK_BOUNDARY)
4722 decl_or_type = decl;
4723 type = TREE_TYPE (decl);
4727 /* We're looking at the CALL_EXPR, we need the type of the function. */
4728 type = CALL_EXPR_FN (exp); /* pointer expression */
4729 type = TREE_TYPE (type); /* pointer type */
4730 type = TREE_TYPE (type); /* function type */
4731 decl_or_type = type;
4734 /* Check that the return value locations are the same. Like
4735 if we are returning floats on the 80387 register stack, we cannot
4736 make a sibcall from a function that doesn't return a float to a
4737 function that does or, conversely, from a function that does return
4738 a float to a function that doesn't; the necessary stack adjustment
4739 would not be executed. This is also the place we notice
4740 differences in the return value ABI. Note that it is ok for one
4741 of the functions to have void return type as long as the return
4742 value of the other is passed in a register. */
4743 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4744 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4746 if (STACK_REG_P (a) || STACK_REG_P (b))
4748 if (!rtx_equal_p (a, b))
4751 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4753 /* Disable sibcall if we need to generate vzeroupper after
4755 if (TARGET_VZEROUPPER
4756 && cfun->machine->callee_return_avx256_p
4757 && !cfun->machine->caller_return_avx256_p)
4760 else if (!rtx_equal_p (a, b))
4765 /* The SYSV ABI has more call-clobbered registers;
4766 disallow sibcalls from MS to SYSV. */
4767 if (cfun->machine->call_abi == MS_ABI
4768 && ix86_function_type_abi (type) == SYSV_ABI)
4773 /* If this call is indirect, we'll need to be able to use a
4774 call-clobbered register for the address of the target function.
4775 Make sure that all such registers are not used for passing
4776 parameters. Note that DLLIMPORT functions are indirect. */
4778 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4780 if (ix86_function_regparm (type, NULL) >= 3)
4782 /* ??? Need to count the actual number of registers to be used,
4783 not the possible number of registers. Fix later. */
4789 /* Otherwise okay. That also includes certain types of indirect calls. */
4793 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4794 and "sseregparm" calling convention attributes;
4795 arguments as in struct attribute_spec.handler. */
4798 ix86_handle_cconv_attribute (tree *node, tree name,
4800 int flags ATTRIBUTE_UNUSED,
4803 if (TREE_CODE (*node) != FUNCTION_TYPE
4804 && TREE_CODE (*node) != METHOD_TYPE
4805 && TREE_CODE (*node) != FIELD_DECL
4806 && TREE_CODE (*node) != TYPE_DECL)
4808 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4810 *no_add_attrs = true;
4814 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4815 if (is_attribute_p ("regparm", name))
4819 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4821 error ("fastcall and regparm attributes are not compatible");
4824 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4826 error ("regparam and thiscall attributes are not compatible");
4829 cst = TREE_VALUE (args);
4830 if (TREE_CODE (cst) != INTEGER_CST)
4832 warning (OPT_Wattributes,
4833 "%qE attribute requires an integer constant argument",
4835 *no_add_attrs = true;
4837 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4839 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4841 *no_add_attrs = true;
4849 /* Do not warn when emulating the MS ABI. */
4850 if ((TREE_CODE (*node) != FUNCTION_TYPE
4851 && TREE_CODE (*node) != METHOD_TYPE)
4852 || ix86_function_type_abi (*node) != MS_ABI)
4853 warning (OPT_Wattributes, "%qE attribute ignored",
4855 *no_add_attrs = true;
4859 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4860 if (is_attribute_p ("fastcall", name))
4862 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4864 error ("fastcall and cdecl attributes are not compatible");
4866 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4868 error ("fastcall and stdcall attributes are not compatible");
4870 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4872 error ("fastcall and regparm attributes are not compatible");
4874 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4876 error ("fastcall and thiscall attributes are not compatible");
4880 /* Can combine stdcall with fastcall (redundant), regparm and
4882 else if (is_attribute_p ("stdcall", name))
4884 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4886 error ("stdcall and cdecl attributes are not compatible");
4888 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4890 error ("stdcall and fastcall attributes are not compatible");
4892 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4894 error ("stdcall and thiscall attributes are not compatible");
4898 /* Can combine cdecl with regparm and sseregparm. */
4899 else if (is_attribute_p ("cdecl", name))
4901 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4903 error ("stdcall and cdecl attributes are not compatible");
4905 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4907 error ("fastcall and cdecl attributes are not compatible");
4909 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4911 error ("cdecl and thiscall attributes are not compatible");
4914 else if (is_attribute_p ("thiscall", name))
4916 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4917 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4919 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4921 error ("stdcall and thiscall attributes are not compatible");
4923 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4925 error ("fastcall and thiscall attributes are not compatible");
4927 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4929 error ("cdecl and thiscall attributes are not compatible");
4933 /* Can combine sseregparm with all attributes. */
4938 /* This function determines from TYPE the calling-convention. */
4941 ix86_get_callcvt (const_tree type)
4943 unsigned int ret = 0;
4948 return IX86_CALLCVT_CDECL;
4950 attrs = TYPE_ATTRIBUTES (type);
4951 if (attrs != NULL_TREE)
4953 if (lookup_attribute ("cdecl", attrs))
4954 ret |= IX86_CALLCVT_CDECL;
4955 else if (lookup_attribute ("stdcall", attrs))
4956 ret |= IX86_CALLCVT_STDCALL;
4957 else if (lookup_attribute ("fastcall", attrs))
4958 ret |= IX86_CALLCVT_FASTCALL;
4959 else if (lookup_attribute ("thiscall", attrs))
4960 ret |= IX86_CALLCVT_THISCALL;
4962 /* Regparam isn't allowed for thiscall and fastcall. */
4963 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
4965 if (lookup_attribute ("regparm", attrs))
4966 ret |= IX86_CALLCVT_REGPARM;
4967 if (lookup_attribute ("sseregparm", attrs))
4968 ret |= IX86_CALLCVT_SSEREGPARM;
4971 if (IX86_BASE_CALLCVT(ret) != 0)
4975 is_stdarg = stdarg_p (type);
4976 if (TARGET_RTD && !is_stdarg)
4977 return IX86_CALLCVT_STDCALL | ret;
4981 || TREE_CODE (type) != METHOD_TYPE
4982 || ix86_function_type_abi (type) != MS_ABI)
4983 return IX86_CALLCVT_CDECL | ret;
4985 return IX86_CALLCVT_THISCALL;
4988 /* Return 0 if the attributes for two types are incompatible, 1 if they
4989 are compatible, and 2 if they are nearly compatible (which causes a
4990 warning to be generated). */
4993 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4995 unsigned int ccvt1, ccvt2;
4997 if (TREE_CODE (type1) != FUNCTION_TYPE
4998 && TREE_CODE (type1) != METHOD_TYPE)
5001 ccvt1 = ix86_get_callcvt (type1);
5002 ccvt2 = ix86_get_callcvt (type2);
5005 if (ix86_function_regparm (type1, NULL)
5006 != ix86_function_regparm (type2, NULL))
5012 /* Return the regparm value for a function with the indicated TYPE and DECL.
5013 DECL may be NULL when calling function indirectly
5014 or considering a libcall. */
5017 ix86_function_regparm (const_tree type, const_tree decl)
5024 return (ix86_function_type_abi (type) == SYSV_ABI
5025 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5026 ccvt = ix86_get_callcvt (type);
5027 regparm = ix86_regparm;
5029 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5031 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5034 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5038 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5040 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5043 /* Use register calling convention for local functions when possible. */
5045 && TREE_CODE (decl) == FUNCTION_DECL
5047 && !(profile_flag && !flag_fentry))
5049 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5050 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5051 if (i && i->local && i->can_change_signature)
5053 int local_regparm, globals = 0, regno;
5055 /* Make sure no regparm register is taken by a
5056 fixed register variable. */
5057 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5058 if (fixed_regs[local_regparm])
5061 /* We don't want to use regparm(3) for nested functions as
5062 these use a static chain pointer in the third argument. */
5063 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5066 /* In 32-bit mode save a register for the split stack. */
5067 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5070 /* Each fixed register usage increases register pressure,
5071 so less registers should be used for argument passing.
5072 This functionality can be overriden by an explicit
5074 for (regno = 0; regno <= DI_REG; regno++)
5075 if (fixed_regs[regno])
5079 = globals < local_regparm ? local_regparm - globals : 0;
5081 if (local_regparm > regparm)
5082 regparm = local_regparm;
5089 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5090 DFmode (2) arguments in SSE registers for a function with the
5091 indicated TYPE and DECL. DECL may be NULL when calling function
5092 indirectly or considering a libcall. Otherwise return 0. */
5095 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5097 gcc_assert (!TARGET_64BIT);
5099 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5100 by the sseregparm attribute. */
5101 if (TARGET_SSEREGPARM
5102 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5109 error ("calling %qD with attribute sseregparm without "
5110 "SSE/SSE2 enabled", decl);
5112 error ("calling %qT with attribute sseregparm without "
5113 "SSE/SSE2 enabled", type);
5121 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5122 (and DFmode for SSE2) arguments in SSE registers. */
5123 if (decl && TARGET_SSE_MATH && optimize
5124 && !(profile_flag && !flag_fentry))
5126 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5127 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5128 if (i && i->local && i->can_change_signature)
5129 return TARGET_SSE2 ? 2 : 1;
5135 /* Return true if EAX is live at the start of the function. Used by
5136 ix86_expand_prologue to determine if we need special help before
5137 calling allocate_stack_worker. */
5140 ix86_eax_live_at_start_p (void)
5142 /* Cheat. Don't bother working forward from ix86_function_regparm
5143 to the function type to whether an actual argument is located in
5144 eax. Instead just look at cfg info, which is still close enough
5145 to correct at this point. This gives false positives for broken
5146 functions that might use uninitialized data that happens to be
5147 allocated in eax, but who cares? */
5148 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5152 ix86_keep_aggregate_return_pointer (tree fntype)
5158 attr = lookup_attribute ("callee_pop_aggregate_return",
5159 TYPE_ATTRIBUTES (fntype));
5161 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5163 /* For 32-bit MS-ABI the default is to keep aggregate
5165 if (ix86_function_type_abi (fntype) == MS_ABI)
5168 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5171 /* Value is the number of bytes of arguments automatically
5172 popped when returning from a subroutine call.
5173 FUNDECL is the declaration node of the function (as a tree),
5174 FUNTYPE is the data type of the function (as a tree),
5175 or for a library call it is an identifier node for the subroutine name.
5176 SIZE is the number of bytes of arguments passed on the stack.
5178 On the 80386, the RTD insn may be used to pop them if the number
5179 of args is fixed, but if the number is variable then the caller
5180 must pop them all. RTD can't be used for library calls now
5181 because the library is compiled with the Unix compiler.
5182 Use of RTD is a selectable option, since it is incompatible with
5183 standard Unix calling sequences. If the option is not selected,
5184 the caller must always pop the args.
5186 The attribute stdcall is equivalent to RTD on a per module basis. */
5189 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5193 /* None of the 64-bit ABIs pop arguments. */
5197 ccvt = ix86_get_callcvt (funtype);
5199 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5200 | IX86_CALLCVT_THISCALL)) != 0
5201 && ! stdarg_p (funtype))
5204 /* Lose any fake structure return argument if it is passed on the stack. */
5205 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5206 && !ix86_keep_aggregate_return_pointer (funtype))
5208 int nregs = ix86_function_regparm (funtype, fundecl);
5210 return GET_MODE_SIZE (Pmode);
5216 /* Argument support functions. */
5218 /* Return true when register may be used to pass function parameters. */
5220 ix86_function_arg_regno_p (int regno)
5223 const int *parm_regs;
5228 return (regno < REGPARM_MAX
5229 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5231 return (regno < REGPARM_MAX
5232 || (TARGET_MMX && MMX_REGNO_P (regno)
5233 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5234 || (TARGET_SSE && SSE_REGNO_P (regno)
5235 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5240 if (SSE_REGNO_P (regno) && TARGET_SSE)
5245 if (TARGET_SSE && SSE_REGNO_P (regno)
5246 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5250 /* TODO: The function should depend on current function ABI but
5251 builtins.c would need updating then. Therefore we use the
5254 /* RAX is used as hidden argument to va_arg functions. */
5255 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5258 if (ix86_abi == MS_ABI)
5259 parm_regs = x86_64_ms_abi_int_parameter_registers;
5261 parm_regs = x86_64_int_parameter_registers;
5262 for (i = 0; i < (ix86_abi == MS_ABI
5263 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5264 if (regno == parm_regs[i])
5269 /* Return if we do not know how to pass TYPE solely in registers. */
5272 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5274 if (must_pass_in_stack_var_size_or_pad (mode, type))
5277 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5278 The layout_type routine is crafty and tries to trick us into passing
5279 currently unsupported vector types on the stack by using TImode. */
5280 return (!TARGET_64BIT && mode == TImode
5281 && type && TREE_CODE (type) != VECTOR_TYPE);
5284 /* It returns the size, in bytes, of the area reserved for arguments passed
5285 in registers for the function represented by fndecl dependent to the used
5288 ix86_reg_parm_stack_space (const_tree fndecl)
5290 enum calling_abi call_abi = SYSV_ABI;
5291 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5292 call_abi = ix86_function_abi (fndecl);
5294 call_abi = ix86_function_type_abi (fndecl);
5295 if (TARGET_64BIT && call_abi == MS_ABI)
5300 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5303 ix86_function_type_abi (const_tree fntype)
5305 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5307 enum calling_abi abi = ix86_abi;
5308 if (abi == SYSV_ABI)
5310 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5313 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5321 ix86_function_ms_hook_prologue (const_tree fn)
5323 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5325 if (decl_function_context (fn) != NULL_TREE)
5326 error_at (DECL_SOURCE_LOCATION (fn),
5327 "ms_hook_prologue is not compatible with nested function");
5334 static enum calling_abi
5335 ix86_function_abi (const_tree fndecl)
5339 return ix86_function_type_abi (TREE_TYPE (fndecl));
5342 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5345 ix86_cfun_abi (void)
5349 return cfun->machine->call_abi;
5352 /* Write the extra assembler code needed to declare a function properly. */
5355 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5358 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5362 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5363 unsigned int filler_cc = 0xcccccccc;
5365 for (i = 0; i < filler_count; i += 4)
5366 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5369 #ifdef SUBTARGET_ASM_UNWIND_INIT
5370 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5373 ASM_OUTPUT_LABEL (asm_out_file, fname);
5375 /* Output magic byte marker, if hot-patch attribute is set. */
5380 /* leaq [%rsp + 0], %rsp */
5381 asm_fprintf (asm_out_file, ASM_BYTE
5382 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5386 /* movl.s %edi, %edi
5388 movl.s %esp, %ebp */
5389 asm_fprintf (asm_out_file, ASM_BYTE
5390 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5396 extern void init_regs (void);
5398 /* Implementation of call abi switching target hook. Specific to FNDECL
5399 the specific call register sets are set. See also
5400 ix86_conditional_register_usage for more details. */
5402 ix86_call_abi_override (const_tree fndecl)
5404 if (fndecl == NULL_TREE)
5405 cfun->machine->call_abi = ix86_abi;
5407 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5410 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5411 expensive re-initialization of init_regs each time we switch function context
5412 since this is needed only during RTL expansion. */
5414 ix86_maybe_switch_abi (void)
5417 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5421 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5422 for a call to a function whose data type is FNTYPE.
5423 For a library call, FNTYPE is 0. */
5426 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5427 tree fntype, /* tree ptr for function decl */
5428 rtx libname, /* SYMBOL_REF of library name or 0 */
5432 struct cgraph_local_info *i;
5435 memset (cum, 0, sizeof (*cum));
5437 /* Initialize for the current callee. */
5440 cfun->machine->callee_pass_avx256_p = false;
5441 cfun->machine->callee_return_avx256_p = false;
5446 i = cgraph_local_info (fndecl);
5447 cum->call_abi = ix86_function_abi (fndecl);
5448 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5453 cum->call_abi = ix86_function_type_abi (fntype);
5455 fnret_type = TREE_TYPE (fntype);
5460 if (TARGET_VZEROUPPER && fnret_type)
5462 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5464 if (function_pass_avx256_p (fnret_value))
5466 /* The return value of this function uses 256bit AVX modes. */
5468 cfun->machine->callee_return_avx256_p = true;
5470 cfun->machine->caller_return_avx256_p = true;
5474 cum->caller = caller;
5476 /* Set up the number of registers to use for passing arguments. */
5478 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5479 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5480 "or subtarget optimization implying it");
5481 cum->nregs = ix86_regparm;
5484 cum->nregs = (cum->call_abi == SYSV_ABI
5485 ? X86_64_REGPARM_MAX
5486 : X86_64_MS_REGPARM_MAX);
5490 cum->sse_nregs = SSE_REGPARM_MAX;
5493 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5494 ? X86_64_SSE_REGPARM_MAX
5495 : X86_64_MS_SSE_REGPARM_MAX);
5499 cum->mmx_nregs = MMX_REGPARM_MAX;
5500 cum->warn_avx = true;
5501 cum->warn_sse = true;
5502 cum->warn_mmx = true;
5504 /* Because type might mismatch in between caller and callee, we need to
5505 use actual type of function for local calls.
5506 FIXME: cgraph_analyze can be told to actually record if function uses
5507 va_start so for local functions maybe_vaarg can be made aggressive
5509 FIXME: once typesytem is fixed, we won't need this code anymore. */
5510 if (i && i->local && i->can_change_signature)
5511 fntype = TREE_TYPE (fndecl);
5512 cum->maybe_vaarg = (fntype
5513 ? (!prototype_p (fntype) || stdarg_p (fntype))
5518 /* If there are variable arguments, then we won't pass anything
5519 in registers in 32-bit mode. */
5520 if (stdarg_p (fntype))
5531 /* Use ecx and edx registers if function has fastcall attribute,
5532 else look for regparm information. */
5535 unsigned int ccvt = ix86_get_callcvt (fntype);
5536 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5539 cum->fastcall = 1; /* Same first register as in fastcall. */
5541 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5547 cum->nregs = ix86_function_regparm (fntype, fndecl);
5550 /* Set up the number of SSE registers used for passing SFmode
5551 and DFmode arguments. Warn for mismatching ABI. */
5552 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5556 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5557 But in the case of vector types, it is some vector mode.
5559 When we have only some of our vector isa extensions enabled, then there
5560 are some modes for which vector_mode_supported_p is false. For these
5561 modes, the generic vector support in gcc will choose some non-vector mode
5562 in order to implement the type. By computing the natural mode, we'll
5563 select the proper ABI location for the operand and not depend on whatever
5564 the middle-end decides to do with these vector types.
5566 The midde-end can't deal with the vector types > 16 bytes. In this
5567 case, we return the original mode and warn ABI change if CUM isn't
5570 static enum machine_mode
5571 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5573 enum machine_mode mode = TYPE_MODE (type);
5575 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5577 HOST_WIDE_INT size = int_size_in_bytes (type);
5578 if ((size == 8 || size == 16 || size == 32)
5579 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5580 && TYPE_VECTOR_SUBPARTS (type) > 1)
5582 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5584 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5585 mode = MIN_MODE_VECTOR_FLOAT;
5587 mode = MIN_MODE_VECTOR_INT;
5589 /* Get the mode which has this inner mode and number of units. */
5590 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5591 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5592 && GET_MODE_INNER (mode) == innermode)
5594 if (size == 32 && !TARGET_AVX)
5596 static bool warnedavx;
5603 warning (0, "AVX vector argument without AVX "
5604 "enabled changes the ABI");
5606 return TYPE_MODE (type);
5619 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5620 this may not agree with the mode that the type system has chosen for the
5621 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5622 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5625 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5630 if (orig_mode != BLKmode)
5631 tmp = gen_rtx_REG (orig_mode, regno);
5634 tmp = gen_rtx_REG (mode, regno);
5635 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5636 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5642 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5643 of this code is to classify each 8bytes of incoming argument by the register
5644 class and assign registers accordingly. */
5646 /* Return the union class of CLASS1 and CLASS2.
5647 See the x86-64 PS ABI for details. */
5649 static enum x86_64_reg_class
5650 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5652 /* Rule #1: If both classes are equal, this is the resulting class. */
5653 if (class1 == class2)
5656 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5658 if (class1 == X86_64_NO_CLASS)
5660 if (class2 == X86_64_NO_CLASS)
5663 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5664 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5665 return X86_64_MEMORY_CLASS;
5667 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5668 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5669 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5670 return X86_64_INTEGERSI_CLASS;
5671 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5672 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5673 return X86_64_INTEGER_CLASS;
5675 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5677 if (class1 == X86_64_X87_CLASS
5678 || class1 == X86_64_X87UP_CLASS
5679 || class1 == X86_64_COMPLEX_X87_CLASS
5680 || class2 == X86_64_X87_CLASS
5681 || class2 == X86_64_X87UP_CLASS
5682 || class2 == X86_64_COMPLEX_X87_CLASS)
5683 return X86_64_MEMORY_CLASS;
5685 /* Rule #6: Otherwise class SSE is used. */
5686 return X86_64_SSE_CLASS;
5689 /* Classify the argument of type TYPE and mode MODE.
5690 CLASSES will be filled by the register class used to pass each word
5691 of the operand. The number of words is returned. In case the parameter
5692 should be passed in memory, 0 is returned. As a special case for zero
5693 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5695 BIT_OFFSET is used internally for handling records and specifies offset
5696 of the offset in bits modulo 256 to avoid overflow cases.
5698 See the x86-64 PS ABI for details.
5702 classify_argument (enum machine_mode mode, const_tree type,
5703 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5705 HOST_WIDE_INT bytes =
5706 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5707 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5709 /* Variable sized entities are always passed/returned in memory. */
5713 if (mode != VOIDmode
5714 && targetm.calls.must_pass_in_stack (mode, type))
5717 if (type && AGGREGATE_TYPE_P (type))
5721 enum x86_64_reg_class subclasses[MAX_CLASSES];
5723 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5727 for (i = 0; i < words; i++)
5728 classes[i] = X86_64_NO_CLASS;
5730 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5731 signalize memory class, so handle it as special case. */
5734 classes[0] = X86_64_NO_CLASS;
5738 /* Classify each field of record and merge classes. */
5739 switch (TREE_CODE (type))
5742 /* And now merge the fields of structure. */
5743 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5745 if (TREE_CODE (field) == FIELD_DECL)
5749 if (TREE_TYPE (field) == error_mark_node)
5752 /* Bitfields are always classified as integer. Handle them
5753 early, since later code would consider them to be
5754 misaligned integers. */
5755 if (DECL_BIT_FIELD (field))
5757 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5758 i < ((int_bit_position (field) + (bit_offset % 64))
5759 + tree_low_cst (DECL_SIZE (field), 0)
5762 merge_classes (X86_64_INTEGER_CLASS,
5769 type = TREE_TYPE (field);
5771 /* Flexible array member is ignored. */
5772 if (TYPE_MODE (type) == BLKmode
5773 && TREE_CODE (type) == ARRAY_TYPE
5774 && TYPE_SIZE (type) == NULL_TREE
5775 && TYPE_DOMAIN (type) != NULL_TREE
5776 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5781 if (!warned && warn_psabi)
5784 inform (input_location,
5785 "the ABI of passing struct with"
5786 " a flexible array member has"
5787 " changed in GCC 4.4");
5791 num = classify_argument (TYPE_MODE (type), type,
5793 (int_bit_position (field)
5794 + bit_offset) % 256);
5797 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5798 for (i = 0; i < num && (i + pos) < words; i++)
5800 merge_classes (subclasses[i], classes[i + pos]);
5807 /* Arrays are handled as small records. */
5810 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5811 TREE_TYPE (type), subclasses, bit_offset);
5815 /* The partial classes are now full classes. */
5816 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5817 subclasses[0] = X86_64_SSE_CLASS;
5818 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5819 && !((bit_offset % 64) == 0 && bytes == 4))
5820 subclasses[0] = X86_64_INTEGER_CLASS;
5822 for (i = 0; i < words; i++)
5823 classes[i] = subclasses[i % num];
5828 case QUAL_UNION_TYPE:
5829 /* Unions are similar to RECORD_TYPE but offset is always 0.
5831 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5833 if (TREE_CODE (field) == FIELD_DECL)
5837 if (TREE_TYPE (field) == error_mark_node)
5840 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5841 TREE_TYPE (field), subclasses,
5845 for (i = 0; i < num; i++)
5846 classes[i] = merge_classes (subclasses[i], classes[i]);
5857 /* When size > 16 bytes, if the first one isn't
5858 X86_64_SSE_CLASS or any other ones aren't
5859 X86_64_SSEUP_CLASS, everything should be passed in
5861 if (classes[0] != X86_64_SSE_CLASS)
5864 for (i = 1; i < words; i++)
5865 if (classes[i] != X86_64_SSEUP_CLASS)
5869 /* Final merger cleanup. */
5870 for (i = 0; i < words; i++)
5872 /* If one class is MEMORY, everything should be passed in
5874 if (classes[i] == X86_64_MEMORY_CLASS)
5877 /* The X86_64_SSEUP_CLASS should be always preceded by
5878 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5879 if (classes[i] == X86_64_SSEUP_CLASS
5880 && classes[i - 1] != X86_64_SSE_CLASS
5881 && classes[i - 1] != X86_64_SSEUP_CLASS)
5883 /* The first one should never be X86_64_SSEUP_CLASS. */
5884 gcc_assert (i != 0);
5885 classes[i] = X86_64_SSE_CLASS;
5888 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5889 everything should be passed in memory. */
5890 if (classes[i] == X86_64_X87UP_CLASS
5891 && (classes[i - 1] != X86_64_X87_CLASS))
5895 /* The first one should never be X86_64_X87UP_CLASS. */
5896 gcc_assert (i != 0);
5897 if (!warned && warn_psabi)
5900 inform (input_location,
5901 "the ABI of passing union with long double"
5902 " has changed in GCC 4.4");
5910 /* Compute alignment needed. We align all types to natural boundaries with
5911 exception of XFmode that is aligned to 64bits. */
5912 if (mode != VOIDmode && mode != BLKmode)
5914 int mode_alignment = GET_MODE_BITSIZE (mode);
5917 mode_alignment = 128;
5918 else if (mode == XCmode)
5919 mode_alignment = 256;
5920 if (COMPLEX_MODE_P (mode))
5921 mode_alignment /= 2;
5922 /* Misaligned fields are always returned in memory. */
5923 if (bit_offset % mode_alignment)
5927 /* for V1xx modes, just use the base mode */
5928 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5929 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5930 mode = GET_MODE_INNER (mode);
5932 /* Classification of atomic types. */
5937 classes[0] = X86_64_SSE_CLASS;
5940 classes[0] = X86_64_SSE_CLASS;
5941 classes[1] = X86_64_SSEUP_CLASS;
5951 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5955 classes[0] = X86_64_INTEGERSI_CLASS;
5958 else if (size <= 64)
5960 classes[0] = X86_64_INTEGER_CLASS;
5963 else if (size <= 64+32)
5965 classes[0] = X86_64_INTEGER_CLASS;
5966 classes[1] = X86_64_INTEGERSI_CLASS;
5969 else if (size <= 64+64)
5971 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5979 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5983 /* OImode shouldn't be used directly. */
5988 if (!(bit_offset % 64))
5989 classes[0] = X86_64_SSESF_CLASS;
5991 classes[0] = X86_64_SSE_CLASS;
5994 classes[0] = X86_64_SSEDF_CLASS;
5997 classes[0] = X86_64_X87_CLASS;
5998 classes[1] = X86_64_X87UP_CLASS;
6001 classes[0] = X86_64_SSE_CLASS;
6002 classes[1] = X86_64_SSEUP_CLASS;
6005 classes[0] = X86_64_SSE_CLASS;
6006 if (!(bit_offset % 64))
6012 if (!warned && warn_psabi)
6015 inform (input_location,
6016 "the ABI of passing structure with complex float"
6017 " member has changed in GCC 4.4");
6019 classes[1] = X86_64_SSESF_CLASS;
6023 classes[0] = X86_64_SSEDF_CLASS;
6024 classes[1] = X86_64_SSEDF_CLASS;
6027 classes[0] = X86_64_COMPLEX_X87_CLASS;
6030 /* This modes is larger than 16 bytes. */
6038 classes[0] = X86_64_SSE_CLASS;
6039 classes[1] = X86_64_SSEUP_CLASS;
6040 classes[2] = X86_64_SSEUP_CLASS;
6041 classes[3] = X86_64_SSEUP_CLASS;
6049 classes[0] = X86_64_SSE_CLASS;
6050 classes[1] = X86_64_SSEUP_CLASS;
6058 classes[0] = X86_64_SSE_CLASS;
6064 gcc_assert (VECTOR_MODE_P (mode));
6069 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6071 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6072 classes[0] = X86_64_INTEGERSI_CLASS;
6074 classes[0] = X86_64_INTEGER_CLASS;
6075 classes[1] = X86_64_INTEGER_CLASS;
6076 return 1 + (bytes > 8);
6080 /* Examine the argument and return set number of register required in each
6081 class. Return 0 iff parameter should be passed in memory. */
6083 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6084 int *int_nregs, int *sse_nregs)
6086 enum x86_64_reg_class regclass[MAX_CLASSES];
6087 int n = classify_argument (mode, type, regclass, 0);
6093 for (n--; n >= 0; n--)
6094 switch (regclass[n])
6096 case X86_64_INTEGER_CLASS:
6097 case X86_64_INTEGERSI_CLASS:
6100 case X86_64_SSE_CLASS:
6101 case X86_64_SSESF_CLASS:
6102 case X86_64_SSEDF_CLASS:
6105 case X86_64_NO_CLASS:
6106 case X86_64_SSEUP_CLASS:
6108 case X86_64_X87_CLASS:
6109 case X86_64_X87UP_CLASS:
6113 case X86_64_COMPLEX_X87_CLASS:
6114 return in_return ? 2 : 0;
6115 case X86_64_MEMORY_CLASS:
6121 /* Construct container for the argument used by GCC interface. See
6122 FUNCTION_ARG for the detailed description. */
6125 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6126 const_tree type, int in_return, int nintregs, int nsseregs,
6127 const int *intreg, int sse_regno)
6129 /* The following variables hold the static issued_error state. */
6130 static bool issued_sse_arg_error;
6131 static bool issued_sse_ret_error;
6132 static bool issued_x87_ret_error;
6134 enum machine_mode tmpmode;
6136 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6137 enum x86_64_reg_class regclass[MAX_CLASSES];
6141 int needed_sseregs, needed_intregs;
6142 rtx exp[MAX_CLASSES];
6145 n = classify_argument (mode, type, regclass, 0);
6148 if (!examine_argument (mode, type, in_return, &needed_intregs,
6151 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6154 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6155 some less clueful developer tries to use floating-point anyway. */
6156 if (needed_sseregs && !TARGET_SSE)
6160 if (!issued_sse_ret_error)
6162 error ("SSE register return with SSE disabled");
6163 issued_sse_ret_error = true;
6166 else if (!issued_sse_arg_error)
6168 error ("SSE register argument with SSE disabled");
6169 issued_sse_arg_error = true;
6174 /* Likewise, error if the ABI requires us to return values in the
6175 x87 registers and the user specified -mno-80387. */
6176 if (!TARGET_80387 && in_return)
6177 for (i = 0; i < n; i++)
6178 if (regclass[i] == X86_64_X87_CLASS
6179 || regclass[i] == X86_64_X87UP_CLASS
6180 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6182 if (!issued_x87_ret_error)
6184 error ("x87 register return with x87 disabled");
6185 issued_x87_ret_error = true;
6190 /* First construct simple cases. Avoid SCmode, since we want to use
6191 single register to pass this type. */
6192 if (n == 1 && mode != SCmode)
6193 switch (regclass[0])
6195 case X86_64_INTEGER_CLASS:
6196 case X86_64_INTEGERSI_CLASS:
6197 return gen_rtx_REG (mode, intreg[0]);
6198 case X86_64_SSE_CLASS:
6199 case X86_64_SSESF_CLASS:
6200 case X86_64_SSEDF_CLASS:
6201 if (mode != BLKmode)
6202 return gen_reg_or_parallel (mode, orig_mode,
6203 SSE_REGNO (sse_regno));
6205 case X86_64_X87_CLASS:
6206 case X86_64_COMPLEX_X87_CLASS:
6207 return gen_rtx_REG (mode, FIRST_STACK_REG);
6208 case X86_64_NO_CLASS:
6209 /* Zero sized array, struct or class. */
6214 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6215 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6216 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6218 && regclass[0] == X86_64_SSE_CLASS
6219 && regclass[1] == X86_64_SSEUP_CLASS
6220 && regclass[2] == X86_64_SSEUP_CLASS
6221 && regclass[3] == X86_64_SSEUP_CLASS
6223 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6226 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6227 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6228 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6229 && regclass[1] == X86_64_INTEGER_CLASS
6230 && (mode == CDImode || mode == TImode || mode == TFmode)
6231 && intreg[0] + 1 == intreg[1])
6232 return gen_rtx_REG (mode, intreg[0]);
6234 /* Otherwise figure out the entries of the PARALLEL. */
6235 for (i = 0; i < n; i++)
6239 switch (regclass[i])
6241 case X86_64_NO_CLASS:
6243 case X86_64_INTEGER_CLASS:
6244 case X86_64_INTEGERSI_CLASS:
6245 /* Merge TImodes on aligned occasions here too. */
6246 if (i * 8 + 8 > bytes)
6247 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6248 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6252 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6253 if (tmpmode == BLKmode)
6255 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6256 gen_rtx_REG (tmpmode, *intreg),
6260 case X86_64_SSESF_CLASS:
6261 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6262 gen_rtx_REG (SFmode,
6263 SSE_REGNO (sse_regno)),
6267 case X86_64_SSEDF_CLASS:
6268 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6269 gen_rtx_REG (DFmode,
6270 SSE_REGNO (sse_regno)),
6274 case X86_64_SSE_CLASS:
6282 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6292 && regclass[1] == X86_64_SSEUP_CLASS
6293 && regclass[2] == X86_64_SSEUP_CLASS
6294 && regclass[3] == X86_64_SSEUP_CLASS);
6301 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6302 gen_rtx_REG (tmpmode,
6303 SSE_REGNO (sse_regno)),
6312 /* Empty aligned struct, union or class. */
6316 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6317 for (i = 0; i < nexps; i++)
6318 XVECEXP (ret, 0, i) = exp [i];
6322 /* Update the data in CUM to advance over an argument of mode MODE
6323 and data type TYPE. (TYPE is null for libcalls where that information
6324 may not be available.) */
6327 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6328 const_tree type, HOST_WIDE_INT bytes,
6329 HOST_WIDE_INT words)
6345 cum->words += words;
6346 cum->nregs -= words;
6347 cum->regno += words;
6349 if (cum->nregs <= 0)
6357 /* OImode shouldn't be used directly. */
6361 if (cum->float_in_sse < 2)
6364 if (cum->float_in_sse < 1)
6381 if (!type || !AGGREGATE_TYPE_P (type))
6383 cum->sse_words += words;
6384 cum->sse_nregs -= 1;
6385 cum->sse_regno += 1;
6386 if (cum->sse_nregs <= 0)
6400 if (!type || !AGGREGATE_TYPE_P (type))
6402 cum->mmx_words += words;
6403 cum->mmx_nregs -= 1;
6404 cum->mmx_regno += 1;
6405 if (cum->mmx_nregs <= 0)
6416 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6417 const_tree type, HOST_WIDE_INT words, bool named)
6419 int int_nregs, sse_nregs;
6421 /* Unnamed 256bit vector mode parameters are passed on stack. */
6422 if (!named && VALID_AVX256_REG_MODE (mode))
6425 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6426 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6428 cum->nregs -= int_nregs;
6429 cum->sse_nregs -= sse_nregs;
6430 cum->regno += int_nregs;
6431 cum->sse_regno += sse_nregs;
6435 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6436 cum->words = (cum->words + align - 1) & ~(align - 1);
6437 cum->words += words;
6442 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6443 HOST_WIDE_INT words)
6445 /* Otherwise, this should be passed indirect. */
6446 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6448 cum->words += words;
6456 /* Update the data in CUM to advance over an argument of mode MODE and
6457 data type TYPE. (TYPE is null for libcalls where that information
6458 may not be available.) */
6461 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6462 const_tree type, bool named)
6464 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6465 HOST_WIDE_INT bytes, words;
6467 if (mode == BLKmode)
6468 bytes = int_size_in_bytes (type);
6470 bytes = GET_MODE_SIZE (mode);
6471 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6474 mode = type_natural_mode (type, NULL);
6476 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6477 function_arg_advance_ms_64 (cum, bytes, words);
6478 else if (TARGET_64BIT)
6479 function_arg_advance_64 (cum, mode, type, words, named);
6481 function_arg_advance_32 (cum, mode, type, bytes, words);
6484 /* Define where to put the arguments to a function.
6485 Value is zero to push the argument on the stack,
6486 or a hard register in which to store the argument.
6488 MODE is the argument's machine mode.
6489 TYPE is the data type of the argument (as a tree).
6490 This is null for libcalls where that information may
6492 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6493 the preceding args and about the function being called.
6494 NAMED is nonzero if this argument is a named parameter
6495 (otherwise it is an extra parameter matching an ellipsis). */
6498 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6499 enum machine_mode orig_mode, const_tree type,
6500 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6502 static bool warnedsse, warnedmmx;
6504 /* Avoid the AL settings for the Unix64 ABI. */
6505 if (mode == VOIDmode)
6521 if (words <= cum->nregs)
6523 int regno = cum->regno;
6525 /* Fastcall allocates the first two DWORD (SImode) or
6526 smaller arguments to ECX and EDX if it isn't an
6532 || (type && AGGREGATE_TYPE_P (type)))
6535 /* ECX not EAX is the first allocated register. */
6536 if (regno == AX_REG)
6539 return gen_rtx_REG (mode, regno);
6544 if (cum->float_in_sse < 2)
6547 if (cum->float_in_sse < 1)
6551 /* In 32bit, we pass TImode in xmm registers. */
6558 if (!type || !AGGREGATE_TYPE_P (type))
6560 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6563 warning (0, "SSE vector argument without SSE enabled "
6567 return gen_reg_or_parallel (mode, orig_mode,
6568 cum->sse_regno + FIRST_SSE_REG);
6573 /* OImode shouldn't be used directly. */
6582 if (!type || !AGGREGATE_TYPE_P (type))
6585 return gen_reg_or_parallel (mode, orig_mode,
6586 cum->sse_regno + FIRST_SSE_REG);
6596 if (!type || !AGGREGATE_TYPE_P (type))
6598 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6601 warning (0, "MMX vector argument without MMX enabled "
6605 return gen_reg_or_parallel (mode, orig_mode,
6606 cum->mmx_regno + FIRST_MMX_REG);
6615 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6616 enum machine_mode orig_mode, const_tree type, bool named)
6618 /* Handle a hidden AL argument containing number of registers
6619 for varargs x86-64 functions. */
6620 if (mode == VOIDmode)
6621 return GEN_INT (cum->maybe_vaarg
6622 ? (cum->sse_nregs < 0
6623 ? X86_64_SSE_REGPARM_MAX
6638 /* Unnamed 256bit vector mode parameters are passed on stack. */
6644 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6646 &x86_64_int_parameter_registers [cum->regno],
6651 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6652 enum machine_mode orig_mode, bool named,
6653 HOST_WIDE_INT bytes)
6657 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6658 We use value of -2 to specify that current function call is MSABI. */
6659 if (mode == VOIDmode)
6660 return GEN_INT (-2);
6662 /* If we've run out of registers, it goes on the stack. */
6663 if (cum->nregs == 0)
6666 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6668 /* Only floating point modes are passed in anything but integer regs. */
6669 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6672 regno = cum->regno + FIRST_SSE_REG;
6677 /* Unnamed floating parameters are passed in both the
6678 SSE and integer registers. */
6679 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6680 t2 = gen_rtx_REG (mode, regno);
6681 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6682 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6683 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6686 /* Handle aggregated types passed in register. */
6687 if (orig_mode == BLKmode)
6689 if (bytes > 0 && bytes <= 8)
6690 mode = (bytes > 4 ? DImode : SImode);
6691 if (mode == BLKmode)
6695 return gen_reg_or_parallel (mode, orig_mode, regno);
6698 /* Return where to put the arguments to a function.
6699 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6701 MODE is the argument's machine mode. TYPE is the data type of the
6702 argument. It is null for libcalls where that information may not be
6703 available. CUM gives information about the preceding args and about
6704 the function being called. NAMED is nonzero if this argument is a
6705 named parameter (otherwise it is an extra parameter matching an
6709 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
6710 const_tree type, bool named)
6712 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6713 enum machine_mode mode = omode;
6714 HOST_WIDE_INT bytes, words;
6717 if (mode == BLKmode)
6718 bytes = int_size_in_bytes (type);
6720 bytes = GET_MODE_SIZE (mode);
6721 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6723 /* To simplify the code below, represent vector types with a vector mode
6724 even if MMX/SSE are not active. */
6725 if (type && TREE_CODE (type) == VECTOR_TYPE)
6726 mode = type_natural_mode (type, cum);
6728 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6729 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6730 else if (TARGET_64BIT)
6731 arg = function_arg_64 (cum, mode, omode, type, named);
6733 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6735 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6737 /* This argument uses 256bit AVX modes. */
6739 cfun->machine->callee_pass_avx256_p = true;
6741 cfun->machine->caller_pass_avx256_p = true;
6747 /* A C expression that indicates when an argument must be passed by
6748 reference. If nonzero for an argument, a copy of that argument is
6749 made in memory and a pointer to the argument is passed instead of
6750 the argument itself. The pointer is passed in whatever way is
6751 appropriate for passing a pointer to that type. */
6754 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
6755 enum machine_mode mode ATTRIBUTE_UNUSED,
6756 const_tree type, bool named ATTRIBUTE_UNUSED)
6758 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6760 /* See Windows x64 Software Convention. */
6761 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6763 int msize = (int) GET_MODE_SIZE (mode);
6766 /* Arrays are passed by reference. */
6767 if (TREE_CODE (type) == ARRAY_TYPE)
6770 if (AGGREGATE_TYPE_P (type))
6772 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6773 are passed by reference. */
6774 msize = int_size_in_bytes (type);
6778 /* __m128 is passed by reference. */
6780 case 1: case 2: case 4: case 8:
6786 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6792 /* Return true when TYPE should be 128bit aligned for 32bit argument
6793 passing ABI. XXX: This function is obsolete and is only used for
6794 checking psABI compatibility with previous versions of GCC. */
6797 ix86_compat_aligned_value_p (const_tree type)
6799 enum machine_mode mode = TYPE_MODE (type);
6800 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6804 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6806 if (TYPE_ALIGN (type) < 128)
6809 if (AGGREGATE_TYPE_P (type))
6811 /* Walk the aggregates recursively. */
6812 switch (TREE_CODE (type))
6816 case QUAL_UNION_TYPE:
6820 /* Walk all the structure fields. */
6821 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6823 if (TREE_CODE (field) == FIELD_DECL
6824 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
6831 /* Just for use if some languages passes arrays by value. */
6832 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
6843 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6844 XXX: This function is obsolete and is only used for checking psABI
6845 compatibility with previous versions of GCC. */
6848 ix86_compat_function_arg_boundary (enum machine_mode mode,
6849 const_tree type, unsigned int align)
6851 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6852 natural boundaries. */
6853 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6855 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6856 make an exception for SSE modes since these require 128bit
6859 The handling here differs from field_alignment. ICC aligns MMX
6860 arguments to 4 byte boundaries, while structure fields are aligned
6861 to 8 byte boundaries. */
6864 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6865 align = PARM_BOUNDARY;
6869 if (!ix86_compat_aligned_value_p (type))
6870 align = PARM_BOUNDARY;
6873 if (align > BIGGEST_ALIGNMENT)
6874 align = BIGGEST_ALIGNMENT;
6878 /* Return true when TYPE should be 128bit aligned for 32bit argument
6882 ix86_contains_aligned_value_p (const_tree type)
6884 enum machine_mode mode = TYPE_MODE (type);
6886 if (mode == XFmode || mode == XCmode)
6889 if (TYPE_ALIGN (type) < 128)
6892 if (AGGREGATE_TYPE_P (type))
6894 /* Walk the aggregates recursively. */
6895 switch (TREE_CODE (type))
6899 case QUAL_UNION_TYPE:
6903 /* Walk all the structure fields. */
6904 for (field = TYPE_FIELDS (type);
6906 field = DECL_CHAIN (field))
6908 if (TREE_CODE (field) == FIELD_DECL
6909 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
6916 /* Just for use if some languages passes arrays by value. */
6917 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
6926 return TYPE_ALIGN (type) >= 128;
6931 /* Gives the alignment boundary, in bits, of an argument with the
6932 specified mode and type. */
6935 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6940 /* Since the main variant type is used for call, we convert it to
6941 the main variant type. */
6942 type = TYPE_MAIN_VARIANT (type);
6943 align = TYPE_ALIGN (type);
6946 align = GET_MODE_ALIGNMENT (mode);
6947 if (align < PARM_BOUNDARY)
6948 align = PARM_BOUNDARY;
6952 unsigned int saved_align = align;
6956 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
6959 if (mode == XFmode || mode == XCmode)
6960 align = PARM_BOUNDARY;
6962 else if (!ix86_contains_aligned_value_p (type))
6963 align = PARM_BOUNDARY;
6966 align = PARM_BOUNDARY;
6971 && align != ix86_compat_function_arg_boundary (mode, type,
6975 inform (input_location,
6976 "The ABI for passing parameters with %d-byte"
6977 " alignment has changed in GCC 4.6",
6978 align / BITS_PER_UNIT);
6985 /* Return true if N is a possible register number of function value. */
6988 ix86_function_value_regno_p (const unsigned int regno)
6995 case FIRST_FLOAT_REG:
6996 /* TODO: The function should depend on current function ABI but
6997 builtins.c would need updating then. Therefore we use the
6999 if (TARGET_64BIT && ix86_abi == MS_ABI)
7001 return TARGET_FLOAT_RETURNS_IN_80387;
7007 if (TARGET_MACHO || TARGET_64BIT)
7015 /* Define how to find the value returned by a function.
7016 VALTYPE is the data type of the value (as a tree).
7017 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7018 otherwise, FUNC is 0. */
7021 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7022 const_tree fntype, const_tree fn)
7026 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7027 we normally prevent this case when mmx is not available. However
7028 some ABIs may require the result to be returned like DImode. */
7029 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7030 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
7032 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7033 we prevent this case when sse is not available. However some ABIs
7034 may require the result to be returned like integer TImode. */
7035 else if (mode == TImode
7036 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7037 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
7039 /* 32-byte vector modes in %ymm0. */
7040 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7041 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
7043 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7044 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7045 regno = FIRST_FLOAT_REG;
7047 /* Most things go in %eax. */
7050 /* Override FP return register with %xmm0 for local functions when
7051 SSE math is enabled or for functions with sseregparm attribute. */
7052 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7054 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7055 if ((sse_level >= 1 && mode == SFmode)
7056 || (sse_level == 2 && mode == DFmode))
7057 regno = FIRST_SSE_REG;
7060 /* OImode shouldn't be used directly. */
7061 gcc_assert (mode != OImode);
7063 return gen_rtx_REG (orig_mode, regno);
7067 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7072 /* Handle libcalls, which don't provide a type node. */
7073 if (valtype == NULL)
7085 return gen_rtx_REG (mode, FIRST_SSE_REG);
7088 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
7092 return gen_rtx_REG (mode, AX_REG);
7095 else if (POINTER_TYPE_P (valtype))
7097 /* Pointers are always returned in Pmode. */
7101 ret = construct_container (mode, orig_mode, valtype, 1,
7102 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7103 x86_64_int_return_registers, 0);
7105 /* For zero sized structures, construct_container returns NULL, but we
7106 need to keep rest of compiler happy by returning meaningful value. */
7108 ret = gen_rtx_REG (orig_mode, AX_REG);
7114 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7116 unsigned int regno = AX_REG;
7120 switch (GET_MODE_SIZE (mode))
7123 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7124 && !COMPLEX_MODE_P (mode))
7125 regno = FIRST_SSE_REG;
7129 if (mode == SFmode || mode == DFmode)
7130 regno = FIRST_SSE_REG;
7136 return gen_rtx_REG (orig_mode, regno);
7140 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7141 enum machine_mode orig_mode, enum machine_mode mode)
7143 const_tree fn, fntype;
7146 if (fntype_or_decl && DECL_P (fntype_or_decl))
7147 fn = fntype_or_decl;
7148 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7150 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7151 return function_value_ms_64 (orig_mode, mode);
7152 else if (TARGET_64BIT)
7153 return function_value_64 (orig_mode, mode, valtype);
7155 return function_value_32 (orig_mode, mode, fntype, fn);
7159 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7160 bool outgoing ATTRIBUTE_UNUSED)
7162 enum machine_mode mode, orig_mode;
7164 orig_mode = TYPE_MODE (valtype);
7165 mode = type_natural_mode (valtype, NULL);
7166 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7169 /* Pointer function arguments and return values are promoted to Pmode. */
7171 static enum machine_mode
7172 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7173 int *punsignedp, const_tree fntype,
7176 if (type != NULL_TREE && POINTER_TYPE_P (type))
7178 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7181 return default_promote_function_mode (type, mode, punsignedp, fntype,
7186 ix86_libcall_value (enum machine_mode mode)
7188 return ix86_function_value_1 (NULL, NULL, mode, mode);
7191 /* Return true iff type is returned in memory. */
7193 static bool ATTRIBUTE_UNUSED
7194 return_in_memory_32 (const_tree type, enum machine_mode mode)
7198 if (mode == BLKmode)
7201 size = int_size_in_bytes (type);
7203 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7206 if (VECTOR_MODE_P (mode) || mode == TImode)
7208 /* User-created vectors small enough to fit in EAX. */
7212 /* MMX/3dNow values are returned in MM0,
7213 except when it doesn't exits or the ABI prescribes otherwise. */
7215 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7217 /* SSE values are returned in XMM0, except when it doesn't exist. */
7221 /* AVX values are returned in YMM0, except when it doesn't exist. */
7232 /* OImode shouldn't be used directly. */
7233 gcc_assert (mode != OImode);
7238 static bool ATTRIBUTE_UNUSED
7239 return_in_memory_64 (const_tree type, enum machine_mode mode)
7241 int needed_intregs, needed_sseregs;
7242 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7245 static bool ATTRIBUTE_UNUSED
7246 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7248 HOST_WIDE_INT size = int_size_in_bytes (type);
7250 /* __m128 is returned in xmm0. */
7251 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7252 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7255 /* Otherwise, the size must be exactly in [1248]. */
7256 return size != 1 && size != 2 && size != 4 && size != 8;
7260 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7262 #ifdef SUBTARGET_RETURN_IN_MEMORY
7263 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7265 const enum machine_mode mode = type_natural_mode (type, NULL);
7269 if (ix86_function_type_abi (fntype) == MS_ABI)
7270 return return_in_memory_ms_64 (type, mode);
7272 return return_in_memory_64 (type, mode);
7275 return return_in_memory_32 (type, mode);
7279 /* When returning SSE vector types, we have a choice of either
7280 (1) being abi incompatible with a -march switch, or
7281 (2) generating an error.
7282 Given no good solution, I think the safest thing is one warning.
7283 The user won't be able to use -Werror, but....
7285 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7286 called in response to actually generating a caller or callee that
7287 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7288 via aggregate_value_p for general type probing from tree-ssa. */
7291 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7293 static bool warnedsse, warnedmmx;
7295 if (!TARGET_64BIT && type)
7297 /* Look at the return type of the function, not the function type. */
7298 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7300 if (!TARGET_SSE && !warnedsse)
7303 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7306 warning (0, "SSE vector return without SSE enabled "
7311 if (!TARGET_MMX && !warnedmmx)
7313 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7316 warning (0, "MMX vector return without MMX enabled "
7326 /* Create the va_list data type. */
7328 /* Returns the calling convention specific va_list date type.
7329 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7332 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7334 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7336 /* For i386 we use plain pointer to argument area. */
7337 if (!TARGET_64BIT || abi == MS_ABI)
7338 return build_pointer_type (char_type_node);
7340 record = lang_hooks.types.make_type (RECORD_TYPE);
7341 type_decl = build_decl (BUILTINS_LOCATION,
7342 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7344 f_gpr = build_decl (BUILTINS_LOCATION,
7345 FIELD_DECL, get_identifier ("gp_offset"),
7346 unsigned_type_node);
7347 f_fpr = build_decl (BUILTINS_LOCATION,
7348 FIELD_DECL, get_identifier ("fp_offset"),
7349 unsigned_type_node);
7350 f_ovf = build_decl (BUILTINS_LOCATION,
7351 FIELD_DECL, get_identifier ("overflow_arg_area"),
7353 f_sav = build_decl (BUILTINS_LOCATION,
7354 FIELD_DECL, get_identifier ("reg_save_area"),
7357 va_list_gpr_counter_field = f_gpr;
7358 va_list_fpr_counter_field = f_fpr;
7360 DECL_FIELD_CONTEXT (f_gpr) = record;
7361 DECL_FIELD_CONTEXT (f_fpr) = record;
7362 DECL_FIELD_CONTEXT (f_ovf) = record;
7363 DECL_FIELD_CONTEXT (f_sav) = record;
7365 TYPE_STUB_DECL (record) = type_decl;
7366 TYPE_NAME (record) = type_decl;
7367 TYPE_FIELDS (record) = f_gpr;
7368 DECL_CHAIN (f_gpr) = f_fpr;
7369 DECL_CHAIN (f_fpr) = f_ovf;
7370 DECL_CHAIN (f_ovf) = f_sav;
7372 layout_type (record);
7374 /* The correct type is an array type of one element. */
7375 return build_array_type (record, build_index_type (size_zero_node));
7378 /* Setup the builtin va_list data type and for 64-bit the additional
7379 calling convention specific va_list data types. */
7382 ix86_build_builtin_va_list (void)
7384 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7386 /* Initialize abi specific va_list builtin types. */
7390 if (ix86_abi == MS_ABI)
7392 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7393 if (TREE_CODE (t) != RECORD_TYPE)
7394 t = build_variant_type_copy (t);
7395 sysv_va_list_type_node = t;
7400 if (TREE_CODE (t) != RECORD_TYPE)
7401 t = build_variant_type_copy (t);
7402 sysv_va_list_type_node = t;
7404 if (ix86_abi != MS_ABI)
7406 t = ix86_build_builtin_va_list_abi (MS_ABI);
7407 if (TREE_CODE (t) != RECORD_TYPE)
7408 t = build_variant_type_copy (t);
7409 ms_va_list_type_node = t;
7414 if (TREE_CODE (t) != RECORD_TYPE)
7415 t = build_variant_type_copy (t);
7416 ms_va_list_type_node = t;
7423 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7426 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7432 /* GPR size of varargs save area. */
7433 if (cfun->va_list_gpr_size)
7434 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7436 ix86_varargs_gpr_size = 0;
7438 /* FPR size of varargs save area. We don't need it if we don't pass
7439 anything in SSE registers. */
7440 if (TARGET_SSE && cfun->va_list_fpr_size)
7441 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7443 ix86_varargs_fpr_size = 0;
7445 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7448 save_area = frame_pointer_rtx;
7449 set = get_varargs_alias_set ();
7451 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7452 if (max > X86_64_REGPARM_MAX)
7453 max = X86_64_REGPARM_MAX;
7455 for (i = cum->regno; i < max; i++)
7457 mem = gen_rtx_MEM (Pmode,
7458 plus_constant (save_area, i * UNITS_PER_WORD));
7459 MEM_NOTRAP_P (mem) = 1;
7460 set_mem_alias_set (mem, set);
7461 emit_move_insn (mem, gen_rtx_REG (Pmode,
7462 x86_64_int_parameter_registers[i]));
7465 if (ix86_varargs_fpr_size)
7467 enum machine_mode smode;
7470 /* Now emit code to save SSE registers. The AX parameter contains number
7471 of SSE parameter registers used to call this function, though all we
7472 actually check here is the zero/non-zero status. */
7474 label = gen_label_rtx ();
7475 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7476 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7479 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7480 we used movdqa (i.e. TImode) instead? Perhaps even better would
7481 be if we could determine the real mode of the data, via a hook
7482 into pass_stdarg. Ignore all that for now. */
7484 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7485 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7487 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7488 if (max > X86_64_SSE_REGPARM_MAX)
7489 max = X86_64_SSE_REGPARM_MAX;
7491 for (i = cum->sse_regno; i < max; ++i)
7493 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7494 mem = gen_rtx_MEM (smode, mem);
7495 MEM_NOTRAP_P (mem) = 1;
7496 set_mem_alias_set (mem, set);
7497 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7499 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7507 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7509 alias_set_type set = get_varargs_alias_set ();
7512 /* Reset to zero, as there might be a sysv vaarg used
7514 ix86_varargs_gpr_size = 0;
7515 ix86_varargs_fpr_size = 0;
7517 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7521 mem = gen_rtx_MEM (Pmode,
7522 plus_constant (virtual_incoming_args_rtx,
7523 i * UNITS_PER_WORD));
7524 MEM_NOTRAP_P (mem) = 1;
7525 set_mem_alias_set (mem, set);
7527 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7528 emit_move_insn (mem, reg);
7533 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7534 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7537 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7538 CUMULATIVE_ARGS next_cum;
7541 /* This argument doesn't appear to be used anymore. Which is good,
7542 because the old code here didn't suppress rtl generation. */
7543 gcc_assert (!no_rtl);
7548 fntype = TREE_TYPE (current_function_decl);
7550 /* For varargs, we do not want to skip the dummy va_dcl argument.
7551 For stdargs, we do want to skip the last named argument. */
7553 if (stdarg_p (fntype))
7554 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7557 if (cum->call_abi == MS_ABI)
7558 setup_incoming_varargs_ms_64 (&next_cum);
7560 setup_incoming_varargs_64 (&next_cum);
7563 /* Checks if TYPE is of kind va_list char *. */
7566 is_va_list_char_pointer (tree type)
7570 /* For 32-bit it is always true. */
7573 canonic = ix86_canonical_va_list_type (type);
7574 return (canonic == ms_va_list_type_node
7575 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7578 /* Implement va_start. */
7581 ix86_va_start (tree valist, rtx nextarg)
7583 HOST_WIDE_INT words, n_gpr, n_fpr;
7584 tree f_gpr, f_fpr, f_ovf, f_sav;
7585 tree gpr, fpr, ovf, sav, t;
7589 if (flag_split_stack
7590 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7592 unsigned int scratch_regno;
7594 /* When we are splitting the stack, we can't refer to the stack
7595 arguments using internal_arg_pointer, because they may be on
7596 the old stack. The split stack prologue will arrange to
7597 leave a pointer to the old stack arguments in a scratch
7598 register, which we here copy to a pseudo-register. The split
7599 stack prologue can't set the pseudo-register directly because
7600 it (the prologue) runs before any registers have been saved. */
7602 scratch_regno = split_stack_prologue_scratch_regno ();
7603 if (scratch_regno != INVALID_REGNUM)
7607 reg = gen_reg_rtx (Pmode);
7608 cfun->machine->split_stack_varargs_pointer = reg;
7611 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7615 push_topmost_sequence ();
7616 emit_insn_after (seq, entry_of_function ());
7617 pop_topmost_sequence ();
7621 /* Only 64bit target needs something special. */
7622 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7624 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7625 std_expand_builtin_va_start (valist, nextarg);
7630 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7631 next = expand_binop (ptr_mode, add_optab,
7632 cfun->machine->split_stack_varargs_pointer,
7633 crtl->args.arg_offset_rtx,
7634 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7635 convert_move (va_r, next, 0);
7640 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7641 f_fpr = DECL_CHAIN (f_gpr);
7642 f_ovf = DECL_CHAIN (f_fpr);
7643 f_sav = DECL_CHAIN (f_ovf);
7645 valist = build_simple_mem_ref (valist);
7646 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7647 /* The following should be folded into the MEM_REF offset. */
7648 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7650 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7652 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7654 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7657 /* Count number of gp and fp argument registers used. */
7658 words = crtl->args.info.words;
7659 n_gpr = crtl->args.info.regno;
7660 n_fpr = crtl->args.info.sse_regno;
7662 if (cfun->va_list_gpr_size)
7664 type = TREE_TYPE (gpr);
7665 t = build2 (MODIFY_EXPR, type,
7666 gpr, build_int_cst (type, n_gpr * 8));
7667 TREE_SIDE_EFFECTS (t) = 1;
7668 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7671 if (TARGET_SSE && cfun->va_list_fpr_size)
7673 type = TREE_TYPE (fpr);
7674 t = build2 (MODIFY_EXPR, type, fpr,
7675 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7676 TREE_SIDE_EFFECTS (t) = 1;
7677 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7680 /* Find the overflow area. */
7681 type = TREE_TYPE (ovf);
7682 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7683 ovf_rtx = crtl->args.internal_arg_pointer;
7685 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7686 t = make_tree (type, ovf_rtx);
7688 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
7689 t = build2 (MODIFY_EXPR, type, ovf, t);
7690 TREE_SIDE_EFFECTS (t) = 1;
7691 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7693 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7695 /* Find the register save area.
7696 Prologue of the function save it right above stack frame. */
7697 type = TREE_TYPE (sav);
7698 t = make_tree (type, frame_pointer_rtx);
7699 if (!ix86_varargs_gpr_size)
7700 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
7701 t = build2 (MODIFY_EXPR, type, sav, t);
7702 TREE_SIDE_EFFECTS (t) = 1;
7703 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7707 /* Implement va_arg. */
7710 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7713 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7714 tree f_gpr, f_fpr, f_ovf, f_sav;
7715 tree gpr, fpr, ovf, sav, t;
7717 tree lab_false, lab_over = NULL_TREE;
7722 enum machine_mode nat_mode;
7723 unsigned int arg_boundary;
7725 /* Only 64bit target needs something special. */
7726 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7727 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7729 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7730 f_fpr = DECL_CHAIN (f_gpr);
7731 f_ovf = DECL_CHAIN (f_fpr);
7732 f_sav = DECL_CHAIN (f_ovf);
7734 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7735 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7736 valist = build_va_arg_indirect_ref (valist);
7737 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7738 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7739 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7741 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7743 type = build_pointer_type (type);
7744 size = int_size_in_bytes (type);
7745 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7747 nat_mode = type_natural_mode (type, NULL);
7756 /* Unnamed 256bit vector mode parameters are passed on stack. */
7757 if (!TARGET_64BIT_MS_ABI)
7764 container = construct_container (nat_mode, TYPE_MODE (type),
7765 type, 0, X86_64_REGPARM_MAX,
7766 X86_64_SSE_REGPARM_MAX, intreg,
7771 /* Pull the value out of the saved registers. */
7773 addr = create_tmp_var (ptr_type_node, "addr");
7777 int needed_intregs, needed_sseregs;
7779 tree int_addr, sse_addr;
7781 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7782 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7784 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7786 need_temp = (!REG_P (container)
7787 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7788 || TYPE_ALIGN (type) > 128));
7790 /* In case we are passing structure, verify that it is consecutive block
7791 on the register save area. If not we need to do moves. */
7792 if (!need_temp && !REG_P (container))
7794 /* Verify that all registers are strictly consecutive */
7795 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7799 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7801 rtx slot = XVECEXP (container, 0, i);
7802 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7803 || INTVAL (XEXP (slot, 1)) != i * 16)
7811 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7813 rtx slot = XVECEXP (container, 0, i);
7814 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7815 || INTVAL (XEXP (slot, 1)) != i * 8)
7827 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7828 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7831 /* First ensure that we fit completely in registers. */
7834 t = build_int_cst (TREE_TYPE (gpr),
7835 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7836 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7837 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7838 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7839 gimplify_and_add (t, pre_p);
7843 t = build_int_cst (TREE_TYPE (fpr),
7844 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7845 + X86_64_REGPARM_MAX * 8);
7846 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7847 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7848 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7849 gimplify_and_add (t, pre_p);
7852 /* Compute index to start of area used for integer regs. */
7855 /* int_addr = gpr + sav; */
7856 t = fold_build_pointer_plus (sav, gpr);
7857 gimplify_assign (int_addr, t, pre_p);
7861 /* sse_addr = fpr + sav; */
7862 t = fold_build_pointer_plus (sav, fpr);
7863 gimplify_assign (sse_addr, t, pre_p);
7867 int i, prev_size = 0;
7868 tree temp = create_tmp_var (type, "va_arg_tmp");
7871 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7872 gimplify_assign (addr, t, pre_p);
7874 for (i = 0; i < XVECLEN (container, 0); i++)
7876 rtx slot = XVECEXP (container, 0, i);
7877 rtx reg = XEXP (slot, 0);
7878 enum machine_mode mode = GET_MODE (reg);
7884 tree dest_addr, dest;
7885 int cur_size = GET_MODE_SIZE (mode);
7887 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
7888 prev_size = INTVAL (XEXP (slot, 1));
7889 if (prev_size + cur_size > size)
7891 cur_size = size - prev_size;
7892 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7893 if (mode == BLKmode)
7896 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7897 if (mode == GET_MODE (reg))
7898 addr_type = build_pointer_type (piece_type);
7900 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7902 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7905 if (SSE_REGNO_P (REGNO (reg)))
7907 src_addr = sse_addr;
7908 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7912 src_addr = int_addr;
7913 src_offset = REGNO (reg) * 8;
7915 src_addr = fold_convert (addr_type, src_addr);
7916 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
7918 dest_addr = fold_convert (daddr_type, addr);
7919 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
7920 if (cur_size == GET_MODE_SIZE (mode))
7922 src = build_va_arg_indirect_ref (src_addr);
7923 dest = build_va_arg_indirect_ref (dest_addr);
7925 gimplify_assign (dest, src, pre_p);
7930 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7931 3, dest_addr, src_addr,
7932 size_int (cur_size));
7933 gimplify_and_add (copy, pre_p);
7935 prev_size += cur_size;
7941 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7942 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7943 gimplify_assign (gpr, t, pre_p);
7948 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7949 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7950 gimplify_assign (fpr, t, pre_p);
7953 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7955 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7958 /* ... otherwise out of the overflow area. */
7960 /* When we align parameter on stack for caller, if the parameter
7961 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7962 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7963 here with caller. */
7964 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
7965 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7966 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7968 /* Care for on-stack alignment if needed. */
7969 if (arg_boundary <= 64 || size == 0)
7973 HOST_WIDE_INT align = arg_boundary / 8;
7974 t = fold_build_pointer_plus_hwi (ovf, align - 1);
7975 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7976 build_int_cst (TREE_TYPE (t), -align));
7979 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7980 gimplify_assign (addr, t, pre_p);
7982 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
7983 gimplify_assign (unshare_expr (ovf), t, pre_p);
7986 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7988 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7989 addr = fold_convert (ptrtype, addr);
7992 addr = build_va_arg_indirect_ref (addr);
7993 return build_va_arg_indirect_ref (addr);
7996 /* Return true if OPNUM's MEM should be matched
7997 in movabs* patterns. */
8000 ix86_check_movabs (rtx insn, int opnum)
8004 set = PATTERN (insn);
8005 if (GET_CODE (set) == PARALLEL)
8006 set = XVECEXP (set, 0, 0);
8007 gcc_assert (GET_CODE (set) == SET);
8008 mem = XEXP (set, opnum);
8009 while (GET_CODE (mem) == SUBREG)
8010 mem = SUBREG_REG (mem);
8011 gcc_assert (MEM_P (mem));
8012 return volatile_ok || !MEM_VOLATILE_P (mem);
8015 /* Initialize the table of extra 80387 mathematical constants. */
8018 init_ext_80387_constants (void)
8020 static const char * cst[5] =
8022 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8023 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8024 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8025 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8026 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8030 for (i = 0; i < 5; i++)
8032 real_from_string (&ext_80387_constants_table[i], cst[i]);
8033 /* Ensure each constant is rounded to XFmode precision. */
8034 real_convert (&ext_80387_constants_table[i],
8035 XFmode, &ext_80387_constants_table[i]);
8038 ext_80387_constants_init = 1;
8041 /* Return non-zero if the constant is something that
8042 can be loaded with a special instruction. */
8045 standard_80387_constant_p (rtx x)
8047 enum machine_mode mode = GET_MODE (x);
8051 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8054 if (x == CONST0_RTX (mode))
8056 if (x == CONST1_RTX (mode))
8059 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8061 /* For XFmode constants, try to find a special 80387 instruction when
8062 optimizing for size or on those CPUs that benefit from them. */
8064 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8068 if (! ext_80387_constants_init)
8069 init_ext_80387_constants ();
8071 for (i = 0; i < 5; i++)
8072 if (real_identical (&r, &ext_80387_constants_table[i]))
8076 /* Load of the constant -0.0 or -1.0 will be split as
8077 fldz;fchs or fld1;fchs sequence. */
8078 if (real_isnegzero (&r))
8080 if (real_identical (&r, &dconstm1))
8086 /* Return the opcode of the special instruction to be used to load
8090 standard_80387_constant_opcode (rtx x)
8092 switch (standard_80387_constant_p (x))
8116 /* Return the CONST_DOUBLE representing the 80387 constant that is
8117 loaded by the specified special instruction. The argument IDX
8118 matches the return value from standard_80387_constant_p. */
8121 standard_80387_constant_rtx (int idx)
8125 if (! ext_80387_constants_init)
8126 init_ext_80387_constants ();
8142 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8146 /* Return 1 if X is all 0s and 2 if x is all 1s
8147 in supported SSE vector mode. */
8150 standard_sse_constant_p (rtx x)
8152 enum machine_mode mode = GET_MODE (x);
8154 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8156 if (vector_all_ones_operand (x, mode))
8172 /* Return the opcode of the special instruction to be used to load
8176 standard_sse_constant_opcode (rtx insn, rtx x)
8178 switch (standard_sse_constant_p (x))
8181 switch (get_attr_mode (insn))
8184 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8185 return "%vpxor\t%0, %d0";
8187 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8188 return "%vxorpd\t%0, %d0";
8190 return "%vxorps\t%0, %d0";
8193 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8194 return "vpxor\t%x0, %x0, %x0";
8196 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8197 return "vxorpd\t%x0, %x0, %x0";
8199 return "vxorps\t%x0, %x0, %x0";
8206 return "%vpcmpeqd\t%0, %d0";
8213 /* Returns true if OP contains a symbol reference */
8216 symbolic_reference_mentioned_p (rtx op)
8221 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8224 fmt = GET_RTX_FORMAT (GET_CODE (op));
8225 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8231 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8232 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8236 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8243 /* Return true if it is appropriate to emit `ret' instructions in the
8244 body of a function. Do this only if the epilogue is simple, needing a
8245 couple of insns. Prior to reloading, we can't tell how many registers
8246 must be saved, so return false then. Return false if there is no frame
8247 marker to de-allocate. */
8250 ix86_can_use_return_insn_p (void)
8252 struct ix86_frame frame;
8254 if (! reload_completed || frame_pointer_needed)
8257 /* Don't allow more than 32k pop, since that's all we can do
8258 with one instruction. */
8259 if (crtl->args.pops_args && crtl->args.size >= 32768)
8262 ix86_compute_frame_layout (&frame);
8263 return (frame.stack_pointer_offset == UNITS_PER_WORD
8264 && (frame.nregs + frame.nsseregs) == 0);
8267 /* Value should be nonzero if functions must have frame pointers.
8268 Zero means the frame pointer need not be set up (and parms may
8269 be accessed via the stack pointer) in functions that seem suitable. */
8272 ix86_frame_pointer_required (void)
8274 /* If we accessed previous frames, then the generated code expects
8275 to be able to access the saved ebp value in our frame. */
8276 if (cfun->machine->accesses_prev_frame)
8279 /* Several x86 os'es need a frame pointer for other reasons,
8280 usually pertaining to setjmp. */
8281 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8284 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8285 turns off the frame pointer by default. Turn it back on now if
8286 we've not got a leaf function. */
8287 if (TARGET_OMIT_LEAF_FRAME_POINTER
8288 && (!current_function_is_leaf
8289 || ix86_current_function_calls_tls_descriptor))
8292 if (crtl->profile && !flag_fentry)
8298 /* Record that the current function accesses previous call frames. */
8301 ix86_setup_frame_addresses (void)
8303 cfun->machine->accesses_prev_frame = 1;
8306 #ifndef USE_HIDDEN_LINKONCE
8307 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8308 # define USE_HIDDEN_LINKONCE 1
8310 # define USE_HIDDEN_LINKONCE 0
8314 static int pic_labels_used;
8316 /* Fills in the label name that should be used for a pc thunk for
8317 the given register. */
8320 get_pc_thunk_name (char name[32], unsigned int regno)
8322 gcc_assert (!TARGET_64BIT);
8324 if (USE_HIDDEN_LINKONCE)
8325 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8327 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8331 /* This function generates code for -fpic that loads %ebx with
8332 the return address of the caller and then returns. */
8335 ix86_code_end (void)
8340 for (regno = AX_REG; regno <= SP_REG; regno++)
8345 if (!(pic_labels_used & (1 << regno)))
8348 get_pc_thunk_name (name, regno);
8350 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8351 get_identifier (name),
8352 build_function_type_list (void_type_node, NULL_TREE));
8353 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8354 NULL_TREE, void_type_node);
8355 TREE_PUBLIC (decl) = 1;
8356 TREE_STATIC (decl) = 1;
8361 switch_to_section (darwin_sections[text_coal_section]);
8362 fputs ("\t.weak_definition\t", asm_out_file);
8363 assemble_name (asm_out_file, name);
8364 fputs ("\n\t.private_extern\t", asm_out_file);
8365 assemble_name (asm_out_file, name);
8366 putc ('\n', asm_out_file);
8367 ASM_OUTPUT_LABEL (asm_out_file, name);
8368 DECL_WEAK (decl) = 1;
8372 if (USE_HIDDEN_LINKONCE)
8374 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8376 targetm.asm_out.unique_section (decl, 0);
8377 switch_to_section (get_named_section (decl, NULL, 0));
8379 targetm.asm_out.globalize_label (asm_out_file, name);
8380 fputs ("\t.hidden\t", asm_out_file);
8381 assemble_name (asm_out_file, name);
8382 putc ('\n', asm_out_file);
8383 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8387 switch_to_section (text_section);
8388 ASM_OUTPUT_LABEL (asm_out_file, name);
8391 DECL_INITIAL (decl) = make_node (BLOCK);
8392 current_function_decl = decl;
8393 init_function_start (decl);
8394 first_function_block_is_cold = false;
8395 /* Make sure unwind info is emitted for the thunk if needed. */
8396 final_start_function (emit_barrier (), asm_out_file, 1);
8398 /* Pad stack IP move with 4 instructions (two NOPs count
8399 as one instruction). */
8400 if (TARGET_PAD_SHORT_FUNCTION)
8405 fputs ("\tnop\n", asm_out_file);
8408 xops[0] = gen_rtx_REG (Pmode, regno);
8409 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8410 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8411 fputs ("\tret\n", asm_out_file);
8412 final_end_function ();
8413 init_insn_lengths ();
8414 free_after_compilation (cfun);
8416 current_function_decl = NULL;
8419 if (flag_split_stack)
8420 file_end_indicate_split_stack ();
8423 /* Emit code for the SET_GOT patterns. */
8426 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8432 if (TARGET_VXWORKS_RTP && flag_pic)
8434 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8435 xops[2] = gen_rtx_MEM (Pmode,
8436 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8437 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8439 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8440 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8441 an unadorned address. */
8442 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8443 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8444 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8448 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8452 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8454 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8457 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8458 is what will be referenced by the Mach-O PIC subsystem. */
8460 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8463 targetm.asm_out.internal_label (asm_out_file, "L",
8464 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8469 get_pc_thunk_name (name, REGNO (dest));
8470 pic_labels_used |= 1 << REGNO (dest);
8472 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8473 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8474 output_asm_insn ("call\t%X2", xops);
8475 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8476 is what will be referenced by the Mach-O PIC subsystem. */
8479 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8481 targetm.asm_out.internal_label (asm_out_file, "L",
8482 CODE_LABEL_NUMBER (label));
8487 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8492 /* Generate an "push" pattern for input ARG. */
8497 struct machine_function *m = cfun->machine;
8499 if (m->fs.cfa_reg == stack_pointer_rtx)
8500 m->fs.cfa_offset += UNITS_PER_WORD;
8501 m->fs.sp_offset += UNITS_PER_WORD;
8503 return gen_rtx_SET (VOIDmode,
8505 gen_rtx_PRE_DEC (Pmode,
8506 stack_pointer_rtx)),
8510 /* Generate an "pop" pattern for input ARG. */
8515 return gen_rtx_SET (VOIDmode,
8518 gen_rtx_POST_INC (Pmode,
8519 stack_pointer_rtx)));
8522 /* Return >= 0 if there is an unused call-clobbered register available
8523 for the entire function. */
8526 ix86_select_alt_pic_regnum (void)
8528 if (current_function_is_leaf
8530 && !ix86_current_function_calls_tls_descriptor)
8533 /* Can't use the same register for both PIC and DRAP. */
8535 drap = REGNO (crtl->drap_reg);
8538 for (i = 2; i >= 0; --i)
8539 if (i != drap && !df_regs_ever_live_p (i))
8543 return INVALID_REGNUM;
8546 /* Return TRUE if we need to save REGNO. */
8549 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8551 if (pic_offset_table_rtx
8552 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8553 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8555 || crtl->calls_eh_return
8556 || crtl->uses_const_pool))
8557 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8559 if (crtl->calls_eh_return && maybe_eh_return)
8564 unsigned test = EH_RETURN_DATA_REGNO (i);
8565 if (test == INVALID_REGNUM)
8572 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8575 return (df_regs_ever_live_p (regno)
8576 && !call_used_regs[regno]
8577 && !fixed_regs[regno]
8578 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8581 /* Return number of saved general prupose registers. */
8584 ix86_nsaved_regs (void)
8589 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8590 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8595 /* Return number of saved SSE registrers. */
8598 ix86_nsaved_sseregs (void)
8603 if (!TARGET_64BIT_MS_ABI)
8605 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8606 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8611 /* Given FROM and TO register numbers, say whether this elimination is
8612 allowed. If stack alignment is needed, we can only replace argument
8613 pointer with hard frame pointer, or replace frame pointer with stack
8614 pointer. Otherwise, frame pointer elimination is automatically
8615 handled and all other eliminations are valid. */
8618 ix86_can_eliminate (const int from, const int to)
8620 if (stack_realign_fp)
8621 return ((from == ARG_POINTER_REGNUM
8622 && to == HARD_FRAME_POINTER_REGNUM)
8623 || (from == FRAME_POINTER_REGNUM
8624 && to == STACK_POINTER_REGNUM));
8626 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8629 /* Return the offset between two registers, one to be eliminated, and the other
8630 its replacement, at the start of a routine. */
8633 ix86_initial_elimination_offset (int from, int to)
8635 struct ix86_frame frame;
8636 ix86_compute_frame_layout (&frame);
8638 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8639 return frame.hard_frame_pointer_offset;
8640 else if (from == FRAME_POINTER_REGNUM
8641 && to == HARD_FRAME_POINTER_REGNUM)
8642 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8645 gcc_assert (to == STACK_POINTER_REGNUM);
8647 if (from == ARG_POINTER_REGNUM)
8648 return frame.stack_pointer_offset;
8650 gcc_assert (from == FRAME_POINTER_REGNUM);
8651 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8655 /* In a dynamically-aligned function, we can't know the offset from
8656 stack pointer to frame pointer, so we must ensure that setjmp
8657 eliminates fp against the hard fp (%ebp) rather than trying to
8658 index from %esp up to the top of the frame across a gap that is
8659 of unknown (at compile-time) size. */
8661 ix86_builtin_setjmp_frame_value (void)
8663 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8666 /* When using -fsplit-stack, the allocation routines set a field in
8667 the TCB to the bottom of the stack plus this much space, measured
8670 #define SPLIT_STACK_AVAILABLE 256
8672 /* Fill structure ix86_frame about frame of currently computed function. */
8675 ix86_compute_frame_layout (struct ix86_frame *frame)
8677 unsigned int stack_alignment_needed;
8678 HOST_WIDE_INT offset;
8679 unsigned int preferred_alignment;
8680 HOST_WIDE_INT size = get_frame_size ();
8681 HOST_WIDE_INT to_allocate;
8683 frame->nregs = ix86_nsaved_regs ();
8684 frame->nsseregs = ix86_nsaved_sseregs ();
8686 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8687 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8689 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8690 function prologues and leaf. */
8691 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
8692 && (!current_function_is_leaf || cfun->calls_alloca != 0
8693 || ix86_current_function_calls_tls_descriptor))
8695 preferred_alignment = 16;
8696 stack_alignment_needed = 16;
8697 crtl->preferred_stack_boundary = 128;
8698 crtl->stack_alignment_needed = 128;
8701 gcc_assert (!size || stack_alignment_needed);
8702 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8703 gcc_assert (preferred_alignment <= stack_alignment_needed);
8705 /* For SEH we have to limit the amount of code movement into the prologue.
8706 At present we do this via a BLOCKAGE, at which point there's very little
8707 scheduling that can be done, which means that there's very little point
8708 in doing anything except PUSHs. */
8710 cfun->machine->use_fast_prologue_epilogue = false;
8712 /* During reload iteration the amount of registers saved can change.
8713 Recompute the value as needed. Do not recompute when amount of registers
8714 didn't change as reload does multiple calls to the function and does not
8715 expect the decision to change within single iteration. */
8716 else if (!optimize_function_for_size_p (cfun)
8717 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8719 int count = frame->nregs;
8720 struct cgraph_node *node = cgraph_get_node (current_function_decl);
8722 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8724 /* The fast prologue uses move instead of push to save registers. This
8725 is significantly longer, but also executes faster as modern hardware
8726 can execute the moves in parallel, but can't do that for push/pop.
8728 Be careful about choosing what prologue to emit: When function takes
8729 many instructions to execute we may use slow version as well as in
8730 case function is known to be outside hot spot (this is known with
8731 feedback only). Weight the size of function by number of registers
8732 to save as it is cheap to use one or two push instructions but very
8733 slow to use many of them. */
8735 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8736 if (node->frequency < NODE_FREQUENCY_NORMAL
8737 || (flag_branch_probabilities
8738 && node->frequency < NODE_FREQUENCY_HOT))
8739 cfun->machine->use_fast_prologue_epilogue = false;
8741 cfun->machine->use_fast_prologue_epilogue
8742 = !expensive_function_p (count);
8745 frame->save_regs_using_mov
8746 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
8747 /* If static stack checking is enabled and done with probes,
8748 the registers need to be saved before allocating the frame. */
8749 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
8751 /* Skip return address. */
8752 offset = UNITS_PER_WORD;
8754 /* Skip pushed static chain. */
8755 if (ix86_static_chain_on_stack)
8756 offset += UNITS_PER_WORD;
8758 /* Skip saved base pointer. */
8759 if (frame_pointer_needed)
8760 offset += UNITS_PER_WORD;
8761 frame->hfp_save_offset = offset;
8763 /* The traditional frame pointer location is at the top of the frame. */
8764 frame->hard_frame_pointer_offset = offset;
8766 /* Register save area */
8767 offset += frame->nregs * UNITS_PER_WORD;
8768 frame->reg_save_offset = offset;
8770 /* Align and set SSE register save area. */
8771 if (frame->nsseregs)
8773 /* The only ABI that has saved SSE registers (Win64) also has a
8774 16-byte aligned default stack, and thus we don't need to be
8775 within the re-aligned local stack frame to save them. */
8776 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8777 offset = (offset + 16 - 1) & -16;
8778 offset += frame->nsseregs * 16;
8780 frame->sse_reg_save_offset = offset;
8782 /* The re-aligned stack starts here. Values before this point are not
8783 directly comparable with values below this point. In order to make
8784 sure that no value happens to be the same before and after, force
8785 the alignment computation below to add a non-zero value. */
8786 if (stack_realign_fp)
8787 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8790 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8791 offset += frame->va_arg_size;
8793 /* Align start of frame for local function. */
8794 if (stack_realign_fp
8795 || offset != frame->sse_reg_save_offset
8797 || !current_function_is_leaf
8798 || cfun->calls_alloca
8799 || ix86_current_function_calls_tls_descriptor)
8800 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8802 /* Frame pointer points here. */
8803 frame->frame_pointer_offset = offset;
8807 /* Add outgoing arguments area. Can be skipped if we eliminated
8808 all the function calls as dead code.
8809 Skipping is however impossible when function calls alloca. Alloca
8810 expander assumes that last crtl->outgoing_args_size
8811 of stack frame are unused. */
8812 if (ACCUMULATE_OUTGOING_ARGS
8813 && (!current_function_is_leaf || cfun->calls_alloca
8814 || ix86_current_function_calls_tls_descriptor))
8816 offset += crtl->outgoing_args_size;
8817 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8820 frame->outgoing_arguments_size = 0;
8822 /* Align stack boundary. Only needed if we're calling another function
8824 if (!current_function_is_leaf || cfun->calls_alloca
8825 || ix86_current_function_calls_tls_descriptor)
8826 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8828 /* We've reached end of stack frame. */
8829 frame->stack_pointer_offset = offset;
8831 /* Size prologue needs to allocate. */
8832 to_allocate = offset - frame->sse_reg_save_offset;
8834 if ((!to_allocate && frame->nregs <= 1)
8835 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8836 frame->save_regs_using_mov = false;
8838 if (ix86_using_red_zone ()
8839 && current_function_sp_is_unchanging
8840 && current_function_is_leaf
8841 && !ix86_current_function_calls_tls_descriptor)
8843 frame->red_zone_size = to_allocate;
8844 if (frame->save_regs_using_mov)
8845 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8846 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8847 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8850 frame->red_zone_size = 0;
8851 frame->stack_pointer_offset -= frame->red_zone_size;
8853 /* The SEH frame pointer location is near the bottom of the frame.
8854 This is enforced by the fact that the difference between the
8855 stack pointer and the frame pointer is limited to 240 bytes in
8856 the unwind data structure. */
8861 /* If we can leave the frame pointer where it is, do so. */
8862 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
8863 if (diff > 240 || (diff & 15) != 0)
8865 /* Ideally we'd determine what portion of the local stack frame
8866 (within the constraint of the lowest 240) is most heavily used.
8867 But without that complication, simply bias the frame pointer
8868 by 128 bytes so as to maximize the amount of the local stack
8869 frame that is addressable with 8-bit offsets. */
8870 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
8875 /* This is semi-inlined memory_address_length, but simplified
8876 since we know that we're always dealing with reg+offset, and
8877 to avoid having to create and discard all that rtl. */
8880 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8886 /* EBP and R13 cannot be encoded without an offset. */
8887 len = (regno == BP_REG || regno == R13_REG);
8889 else if (IN_RANGE (offset, -128, 127))
8892 /* ESP and R12 must be encoded with a SIB byte. */
8893 if (regno == SP_REG || regno == R12_REG)
8899 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8900 The valid base registers are taken from CFUN->MACHINE->FS. */
8903 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8905 const struct machine_function *m = cfun->machine;
8906 rtx base_reg = NULL;
8907 HOST_WIDE_INT base_offset = 0;
8909 if (m->use_fast_prologue_epilogue)
8911 /* Choose the base register most likely to allow the most scheduling
8912 opportunities. Generally FP is valid througout the function,
8913 while DRAP must be reloaded within the epilogue. But choose either
8914 over the SP due to increased encoding size. */
8918 base_reg = hard_frame_pointer_rtx;
8919 base_offset = m->fs.fp_offset - cfa_offset;
8921 else if (m->fs.drap_valid)
8923 base_reg = crtl->drap_reg;
8924 base_offset = 0 - cfa_offset;
8926 else if (m->fs.sp_valid)
8928 base_reg = stack_pointer_rtx;
8929 base_offset = m->fs.sp_offset - cfa_offset;
8934 HOST_WIDE_INT toffset;
8937 /* Choose the base register with the smallest address encoding.
8938 With a tie, choose FP > DRAP > SP. */
8941 base_reg = stack_pointer_rtx;
8942 base_offset = m->fs.sp_offset - cfa_offset;
8943 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8945 if (m->fs.drap_valid)
8947 toffset = 0 - cfa_offset;
8948 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8951 base_reg = crtl->drap_reg;
8952 base_offset = toffset;
8958 toffset = m->fs.fp_offset - cfa_offset;
8959 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8962 base_reg = hard_frame_pointer_rtx;
8963 base_offset = toffset;
8968 gcc_assert (base_reg != NULL);
8970 return plus_constant (base_reg, base_offset);
8973 /* Emit code to save registers in the prologue. */
8976 ix86_emit_save_regs (void)
8981 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8982 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8984 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8985 RTX_FRAME_RELATED_P (insn) = 1;
8989 /* Emit a single register save at CFA - CFA_OFFSET. */
8992 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
8993 HOST_WIDE_INT cfa_offset)
8995 struct machine_function *m = cfun->machine;
8996 rtx reg = gen_rtx_REG (mode, regno);
8997 rtx mem, addr, base, insn;
8999 addr = choose_baseaddr (cfa_offset);
9000 mem = gen_frame_mem (mode, addr);
9002 /* For SSE saves, we need to indicate the 128-bit alignment. */
9003 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9005 insn = emit_move_insn (mem, reg);
9006 RTX_FRAME_RELATED_P (insn) = 1;
9009 if (GET_CODE (base) == PLUS)
9010 base = XEXP (base, 0);
9011 gcc_checking_assert (REG_P (base));
9013 /* When saving registers into a re-aligned local stack frame, avoid
9014 any tricky guessing by dwarf2out. */
9015 if (m->fs.realigned)
9017 gcc_checking_assert (stack_realign_drap);
9019 if (regno == REGNO (crtl->drap_reg))
9021 /* A bit of a hack. We force the DRAP register to be saved in
9022 the re-aligned stack frame, which provides us with a copy
9023 of the CFA that will last past the prologue. Install it. */
9024 gcc_checking_assert (cfun->machine->fs.fp_valid);
9025 addr = plus_constant (hard_frame_pointer_rtx,
9026 cfun->machine->fs.fp_offset - cfa_offset);
9027 mem = gen_rtx_MEM (mode, addr);
9028 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9032 /* The frame pointer is a stable reference within the
9033 aligned frame. Use it. */
9034 gcc_checking_assert (cfun->machine->fs.fp_valid);
9035 addr = plus_constant (hard_frame_pointer_rtx,
9036 cfun->machine->fs.fp_offset - cfa_offset);
9037 mem = gen_rtx_MEM (mode, addr);
9038 add_reg_note (insn, REG_CFA_EXPRESSION,
9039 gen_rtx_SET (VOIDmode, mem, reg));
9043 /* The memory may not be relative to the current CFA register,
9044 which means that we may need to generate a new pattern for
9045 use by the unwind info. */
9046 else if (base != m->fs.cfa_reg)
9048 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9049 mem = gen_rtx_MEM (mode, addr);
9050 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9054 /* Emit code to save registers using MOV insns.
9055 First register is stored at CFA - CFA_OFFSET. */
9057 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9061 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9062 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9064 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9065 cfa_offset -= UNITS_PER_WORD;
9069 /* Emit code to save SSE registers using MOV insns.
9070 First register is stored at CFA - CFA_OFFSET. */
9072 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9076 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9077 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9079 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9084 static GTY(()) rtx queued_cfa_restores;
9086 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9087 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9088 Don't add the note if the previously saved value will be left untouched
9089 within stack red-zone till return, as unwinders can find the same value
9090 in the register and on the stack. */
9093 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9095 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9100 add_reg_note (insn, REG_CFA_RESTORE, reg);
9101 RTX_FRAME_RELATED_P (insn) = 1;
9105 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9108 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9111 ix86_add_queued_cfa_restore_notes (rtx insn)
9114 if (!queued_cfa_restores)
9116 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9118 XEXP (last, 1) = REG_NOTES (insn);
9119 REG_NOTES (insn) = queued_cfa_restores;
9120 queued_cfa_restores = NULL_RTX;
9121 RTX_FRAME_RELATED_P (insn) = 1;
9124 /* Expand prologue or epilogue stack adjustment.
9125 The pattern exist to put a dependency on all ebp-based memory accesses.
9126 STYLE should be negative if instructions should be marked as frame related,
9127 zero if %r11 register is live and cannot be freely used and positive
9131 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9132 int style, bool set_cfa)
9134 struct machine_function *m = cfun->machine;
9136 bool add_frame_related_expr = false;
9139 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9140 else if (x86_64_immediate_operand (offset, DImode))
9141 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9145 /* r11 is used by indirect sibcall return as well, set before the
9146 epilogue and used after the epilogue. */
9148 tmp = gen_rtx_REG (DImode, R11_REG);
9151 gcc_assert (src != hard_frame_pointer_rtx
9152 && dest != hard_frame_pointer_rtx);
9153 tmp = hard_frame_pointer_rtx;
9155 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9157 add_frame_related_expr = true;
9159 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9162 insn = emit_insn (insn);
9164 ix86_add_queued_cfa_restore_notes (insn);
9170 gcc_assert (m->fs.cfa_reg == src);
9171 m->fs.cfa_offset += INTVAL (offset);
9172 m->fs.cfa_reg = dest;
9174 r = gen_rtx_PLUS (Pmode, src, offset);
9175 r = gen_rtx_SET (VOIDmode, dest, r);
9176 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9177 RTX_FRAME_RELATED_P (insn) = 1;
9181 RTX_FRAME_RELATED_P (insn) = 1;
9182 if (add_frame_related_expr)
9184 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9185 r = gen_rtx_SET (VOIDmode, dest, r);
9186 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9190 if (dest == stack_pointer_rtx)
9192 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9193 bool valid = m->fs.sp_valid;
9195 if (src == hard_frame_pointer_rtx)
9197 valid = m->fs.fp_valid;
9198 ooffset = m->fs.fp_offset;
9200 else if (src == crtl->drap_reg)
9202 valid = m->fs.drap_valid;
9207 /* Else there are two possibilities: SP itself, which we set
9208 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9209 taken care of this by hand along the eh_return path. */
9210 gcc_checking_assert (src == stack_pointer_rtx
9211 || offset == const0_rtx);
9214 m->fs.sp_offset = ooffset - INTVAL (offset);
9215 m->fs.sp_valid = valid;
9219 /* Find an available register to be used as dynamic realign argument
9220 pointer regsiter. Such a register will be written in prologue and
9221 used in begin of body, so it must not be
9222 1. parameter passing register.
9224 We reuse static-chain register if it is available. Otherwise, we
9225 use DI for i386 and R13 for x86-64. We chose R13 since it has
9228 Return: the regno of chosen register. */
9231 find_drap_reg (void)
9233 tree decl = cfun->decl;
9237 /* Use R13 for nested function or function need static chain.
9238 Since function with tail call may use any caller-saved
9239 registers in epilogue, DRAP must not use caller-saved
9240 register in such case. */
9241 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9248 /* Use DI for nested function or function need static chain.
9249 Since function with tail call may use any caller-saved
9250 registers in epilogue, DRAP must not use caller-saved
9251 register in such case. */
9252 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9255 /* Reuse static chain register if it isn't used for parameter
9257 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9259 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9260 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9267 /* Return minimum incoming stack alignment. */
9270 ix86_minimum_incoming_stack_boundary (bool sibcall)
9272 unsigned int incoming_stack_boundary;
9274 /* Prefer the one specified at command line. */
9275 if (ix86_user_incoming_stack_boundary)
9276 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9277 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9278 if -mstackrealign is used, it isn't used for sibcall check and
9279 estimated stack alignment is 128bit. */
9282 && ix86_force_align_arg_pointer
9283 && crtl->stack_alignment_estimated == 128)
9284 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9286 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9288 /* Incoming stack alignment can be changed on individual functions
9289 via force_align_arg_pointer attribute. We use the smallest
9290 incoming stack boundary. */
9291 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9292 && lookup_attribute (ix86_force_align_arg_pointer_string,
9293 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9294 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9296 /* The incoming stack frame has to be aligned at least at
9297 parm_stack_boundary. */
9298 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9299 incoming_stack_boundary = crtl->parm_stack_boundary;
9301 /* Stack at entrance of main is aligned by runtime. We use the
9302 smallest incoming stack boundary. */
9303 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9304 && DECL_NAME (current_function_decl)
9305 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9306 && DECL_FILE_SCOPE_P (current_function_decl))
9307 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9309 return incoming_stack_boundary;
9312 /* Update incoming stack boundary and estimated stack alignment. */
9315 ix86_update_stack_boundary (void)
9317 ix86_incoming_stack_boundary
9318 = ix86_minimum_incoming_stack_boundary (false);
9320 /* x86_64 vararg needs 16byte stack alignment for register save
9324 && crtl->stack_alignment_estimated < 128)
9325 crtl->stack_alignment_estimated = 128;
9328 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9329 needed or an rtx for DRAP otherwise. */
9332 ix86_get_drap_rtx (void)
9334 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9335 crtl->need_drap = true;
9337 if (stack_realign_drap)
9339 /* Assign DRAP to vDRAP and returns vDRAP */
9340 unsigned int regno = find_drap_reg ();
9345 arg_ptr = gen_rtx_REG (Pmode, regno);
9346 crtl->drap_reg = arg_ptr;
9349 drap_vreg = copy_to_reg (arg_ptr);
9353 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9356 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9357 RTX_FRAME_RELATED_P (insn) = 1;
9365 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9368 ix86_internal_arg_pointer (void)
9370 return virtual_incoming_args_rtx;
9373 struct scratch_reg {
9378 /* Return a short-lived scratch register for use on function entry.
9379 In 32-bit mode, it is valid only after the registers are saved
9380 in the prologue. This register must be released by means of
9381 release_scratch_register_on_entry once it is dead. */
9384 get_scratch_register_on_entry (struct scratch_reg *sr)
9392 /* We always use R11 in 64-bit mode. */
9397 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9399 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9400 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9401 int regparm = ix86_function_regparm (fntype, decl);
9403 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9405 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9406 for the static chain register. */
9407 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9408 && drap_regno != AX_REG)
9410 else if (regparm < 2 && drap_regno != DX_REG)
9412 /* ecx is the static chain register. */
9413 else if (regparm < 3 && !fastcall_p && !static_chain_p
9414 && drap_regno != CX_REG)
9416 else if (ix86_save_reg (BX_REG, true))
9418 /* esi is the static chain register. */
9419 else if (!(regparm == 3 && static_chain_p)
9420 && ix86_save_reg (SI_REG, true))
9422 else if (ix86_save_reg (DI_REG, true))
9426 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9431 sr->reg = gen_rtx_REG (Pmode, regno);
9434 rtx insn = emit_insn (gen_push (sr->reg));
9435 RTX_FRAME_RELATED_P (insn) = 1;
9439 /* Release a scratch register obtained from the preceding function. */
9442 release_scratch_register_on_entry (struct scratch_reg *sr)
9446 rtx x, insn = emit_insn (gen_pop (sr->reg));
9448 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9449 RTX_FRAME_RELATED_P (insn) = 1;
9450 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9451 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9452 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9456 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9458 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9461 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9463 /* We skip the probe for the first interval + a small dope of 4 words and
9464 probe that many bytes past the specified size to maintain a protection
9465 area at the botton of the stack. */
9466 const int dope = 4 * UNITS_PER_WORD;
9467 rtx size_rtx = GEN_INT (size), last;
9469 /* See if we have a constant small number of probes to generate. If so,
9470 that's the easy case. The run-time loop is made up of 11 insns in the
9471 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9472 for n # of intervals. */
9473 if (size <= 5 * PROBE_INTERVAL)
9475 HOST_WIDE_INT i, adjust;
9476 bool first_probe = true;
9478 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9479 values of N from 1 until it exceeds SIZE. If only one probe is
9480 needed, this will not generate any code. Then adjust and probe
9481 to PROBE_INTERVAL + SIZE. */
9482 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9486 adjust = 2 * PROBE_INTERVAL + dope;
9487 first_probe = false;
9490 adjust = PROBE_INTERVAL;
9492 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9493 plus_constant (stack_pointer_rtx, -adjust)));
9494 emit_stack_probe (stack_pointer_rtx);
9498 adjust = size + PROBE_INTERVAL + dope;
9500 adjust = size + PROBE_INTERVAL - i;
9502 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9503 plus_constant (stack_pointer_rtx, -adjust)));
9504 emit_stack_probe (stack_pointer_rtx);
9506 /* Adjust back to account for the additional first interval. */
9507 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9508 plus_constant (stack_pointer_rtx,
9509 PROBE_INTERVAL + dope)));
9512 /* Otherwise, do the same as above, but in a loop. Note that we must be
9513 extra careful with variables wrapping around because we might be at
9514 the very top (or the very bottom) of the address space and we have
9515 to be able to handle this case properly; in particular, we use an
9516 equality test for the loop condition. */
9519 HOST_WIDE_INT rounded_size;
9520 struct scratch_reg sr;
9522 get_scratch_register_on_entry (&sr);
9525 /* Step 1: round SIZE to the previous multiple of the interval. */
9527 rounded_size = size & -PROBE_INTERVAL;
9530 /* Step 2: compute initial and final value of the loop counter. */
9532 /* SP = SP_0 + PROBE_INTERVAL. */
9533 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9534 plus_constant (stack_pointer_rtx,
9535 - (PROBE_INTERVAL + dope))));
9537 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9538 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9539 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9540 gen_rtx_PLUS (Pmode, sr.reg,
9541 stack_pointer_rtx)));
9546 while (SP != LAST_ADDR)
9548 SP = SP + PROBE_INTERVAL
9552 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9553 values of N from 1 until it is equal to ROUNDED_SIZE. */
9555 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9558 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9559 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9561 if (size != rounded_size)
9563 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9564 plus_constant (stack_pointer_rtx,
9565 rounded_size - size)));
9566 emit_stack_probe (stack_pointer_rtx);
9569 /* Adjust back to account for the additional first interval. */
9570 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9571 plus_constant (stack_pointer_rtx,
9572 PROBE_INTERVAL + dope)));
9574 release_scratch_register_on_entry (&sr);
9577 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9579 /* Even if the stack pointer isn't the CFA register, we need to correctly
9580 describe the adjustments made to it, in particular differentiate the
9581 frame-related ones from the frame-unrelated ones. */
9584 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9585 XVECEXP (expr, 0, 0)
9586 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9587 plus_constant (stack_pointer_rtx, -size));
9588 XVECEXP (expr, 0, 1)
9589 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9590 plus_constant (stack_pointer_rtx,
9591 PROBE_INTERVAL + dope + size));
9592 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9593 RTX_FRAME_RELATED_P (last) = 1;
9595 cfun->machine->fs.sp_offset += size;
9598 /* Make sure nothing is scheduled before we are done. */
9599 emit_insn (gen_blockage ());
9602 /* Adjust the stack pointer up to REG while probing it. */
9605 output_adjust_stack_and_probe (rtx reg)
9607 static int labelno = 0;
9608 char loop_lab[32], end_lab[32];
9611 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9612 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9614 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9616 /* Jump to END_LAB if SP == LAST_ADDR. */
9617 xops[0] = stack_pointer_rtx;
9619 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9620 fputs ("\tje\t", asm_out_file);
9621 assemble_name_raw (asm_out_file, end_lab);
9622 fputc ('\n', asm_out_file);
9624 /* SP = SP + PROBE_INTERVAL. */
9625 xops[1] = GEN_INT (PROBE_INTERVAL);
9626 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9629 xops[1] = const0_rtx;
9630 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9632 fprintf (asm_out_file, "\tjmp\t");
9633 assemble_name_raw (asm_out_file, loop_lab);
9634 fputc ('\n', asm_out_file);
9636 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9641 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9642 inclusive. These are offsets from the current stack pointer. */
9645 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9647 /* See if we have a constant small number of probes to generate. If so,
9648 that's the easy case. The run-time loop is made up of 7 insns in the
9649 generic case while the compile-time loop is made up of n insns for n #
9651 if (size <= 7 * PROBE_INTERVAL)
9655 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9656 it exceeds SIZE. If only one probe is needed, this will not
9657 generate any code. Then probe at FIRST + SIZE. */
9658 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9659 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9661 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9664 /* Otherwise, do the same as above, but in a loop. Note that we must be
9665 extra careful with variables wrapping around because we might be at
9666 the very top (or the very bottom) of the address space and we have
9667 to be able to handle this case properly; in particular, we use an
9668 equality test for the loop condition. */
9671 HOST_WIDE_INT rounded_size, last;
9672 struct scratch_reg sr;
9674 get_scratch_register_on_entry (&sr);
9677 /* Step 1: round SIZE to the previous multiple of the interval. */
9679 rounded_size = size & -PROBE_INTERVAL;
9682 /* Step 2: compute initial and final value of the loop counter. */
9684 /* TEST_OFFSET = FIRST. */
9685 emit_move_insn (sr.reg, GEN_INT (-first));
9687 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9688 last = first + rounded_size;
9693 while (TEST_ADDR != LAST_ADDR)
9695 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9699 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9700 until it is equal to ROUNDED_SIZE. */
9702 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9705 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9706 that SIZE is equal to ROUNDED_SIZE. */
9708 if (size != rounded_size)
9709 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9712 rounded_size - size));
9714 release_scratch_register_on_entry (&sr);
9717 /* Make sure nothing is scheduled before we are done. */
9718 emit_insn (gen_blockage ());
9721 /* Probe a range of stack addresses from REG to END, inclusive. These are
9722 offsets from the current stack pointer. */
9725 output_probe_stack_range (rtx reg, rtx end)
9727 static int labelno = 0;
9728 char loop_lab[32], end_lab[32];
9731 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9732 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9734 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9736 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9739 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9740 fputs ("\tje\t", asm_out_file);
9741 assemble_name_raw (asm_out_file, end_lab);
9742 fputc ('\n', asm_out_file);
9744 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9745 xops[1] = GEN_INT (PROBE_INTERVAL);
9746 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9748 /* Probe at TEST_ADDR. */
9749 xops[0] = stack_pointer_rtx;
9751 xops[2] = const0_rtx;
9752 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9754 fprintf (asm_out_file, "\tjmp\t");
9755 assemble_name_raw (asm_out_file, loop_lab);
9756 fputc ('\n', asm_out_file);
9758 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9763 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9764 to be generated in correct form. */
9766 ix86_finalize_stack_realign_flags (void)
9768 /* Check if stack realign is really needed after reload, and
9769 stores result in cfun */
9770 unsigned int incoming_stack_boundary
9771 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9772 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9773 unsigned int stack_realign = (incoming_stack_boundary
9774 < (current_function_is_leaf
9775 ? crtl->max_used_stack_slot_alignment
9776 : crtl->stack_alignment_needed));
9778 if (crtl->stack_realign_finalized)
9780 /* After stack_realign_needed is finalized, we can't no longer
9782 gcc_assert (crtl->stack_realign_needed == stack_realign);
9786 crtl->stack_realign_needed = stack_realign;
9787 crtl->stack_realign_finalized = true;
9791 /* Expand the prologue into a bunch of separate insns. */
9794 ix86_expand_prologue (void)
9796 struct machine_function *m = cfun->machine;
9799 struct ix86_frame frame;
9800 HOST_WIDE_INT allocate;
9801 bool int_registers_saved;
9803 ix86_finalize_stack_realign_flags ();
9805 /* DRAP should not coexist with stack_realign_fp */
9806 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9808 memset (&m->fs, 0, sizeof (m->fs));
9810 /* Initialize CFA state for before the prologue. */
9811 m->fs.cfa_reg = stack_pointer_rtx;
9812 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9814 /* Track SP offset to the CFA. We continue tracking this after we've
9815 swapped the CFA register away from SP. In the case of re-alignment
9816 this is fudged; we're interested to offsets within the local frame. */
9817 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9818 m->fs.sp_valid = true;
9820 ix86_compute_frame_layout (&frame);
9822 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9824 /* We should have already generated an error for any use of
9825 ms_hook on a nested function. */
9826 gcc_checking_assert (!ix86_static_chain_on_stack);
9828 /* Check if profiling is active and we shall use profiling before
9829 prologue variant. If so sorry. */
9830 if (crtl->profile && flag_fentry != 0)
9831 sorry ("ms_hook_prologue attribute isn%'t compatible "
9832 "with -mfentry for 32-bit");
9834 /* In ix86_asm_output_function_label we emitted:
9835 8b ff movl.s %edi,%edi
9837 8b ec movl.s %esp,%ebp
9839 This matches the hookable function prologue in Win32 API
9840 functions in Microsoft Windows XP Service Pack 2 and newer.
9841 Wine uses this to enable Windows apps to hook the Win32 API
9842 functions provided by Wine.
9844 What that means is that we've already set up the frame pointer. */
9846 if (frame_pointer_needed
9847 && !(crtl->drap_reg && crtl->stack_realign_needed))
9851 /* We've decided to use the frame pointer already set up.
9852 Describe this to the unwinder by pretending that both
9853 push and mov insns happen right here.
9855 Putting the unwind info here at the end of the ms_hook
9856 is done so that we can make absolutely certain we get
9857 the required byte sequence at the start of the function,
9858 rather than relying on an assembler that can produce
9859 the exact encoding required.
9861 However it does mean (in the unpatched case) that we have
9862 a 1 insn window where the asynchronous unwind info is
9863 incorrect. However, if we placed the unwind info at
9864 its correct location we would have incorrect unwind info
9865 in the patched case. Which is probably all moot since
9866 I don't expect Wine generates dwarf2 unwind info for the
9867 system libraries that use this feature. */
9869 insn = emit_insn (gen_blockage ());
9871 push = gen_push (hard_frame_pointer_rtx);
9872 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9874 RTX_FRAME_RELATED_P (push) = 1;
9875 RTX_FRAME_RELATED_P (mov) = 1;
9877 RTX_FRAME_RELATED_P (insn) = 1;
9878 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9879 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9881 /* Note that gen_push incremented m->fs.cfa_offset, even
9882 though we didn't emit the push insn here. */
9883 m->fs.cfa_reg = hard_frame_pointer_rtx;
9884 m->fs.fp_offset = m->fs.cfa_offset;
9885 m->fs.fp_valid = true;
9889 /* The frame pointer is not needed so pop %ebp again.
9890 This leaves us with a pristine state. */
9891 emit_insn (gen_pop (hard_frame_pointer_rtx));
9895 /* The first insn of a function that accepts its static chain on the
9896 stack is to push the register that would be filled in by a direct
9897 call. This insn will be skipped by the trampoline. */
9898 else if (ix86_static_chain_on_stack)
9900 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9901 emit_insn (gen_blockage ());
9903 /* We don't want to interpret this push insn as a register save,
9904 only as a stack adjustment. The real copy of the register as
9905 a save will be done later, if needed. */
9906 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9907 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9908 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9909 RTX_FRAME_RELATED_P (insn) = 1;
9912 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9913 of DRAP is needed and stack realignment is really needed after reload */
9914 if (stack_realign_drap)
9916 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9918 /* Only need to push parameter pointer reg if it is caller saved. */
9919 if (!call_used_regs[REGNO (crtl->drap_reg)])
9921 /* Push arg pointer reg */
9922 insn = emit_insn (gen_push (crtl->drap_reg));
9923 RTX_FRAME_RELATED_P (insn) = 1;
9926 /* Grab the argument pointer. */
9927 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9928 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9929 RTX_FRAME_RELATED_P (insn) = 1;
9930 m->fs.cfa_reg = crtl->drap_reg;
9931 m->fs.cfa_offset = 0;
9933 /* Align the stack. */
9934 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9936 GEN_INT (-align_bytes)));
9937 RTX_FRAME_RELATED_P (insn) = 1;
9939 /* Replicate the return address on the stack so that return
9940 address can be reached via (argp - 1) slot. This is needed
9941 to implement macro RETURN_ADDR_RTX and intrinsic function
9942 expand_builtin_return_addr etc. */
9943 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9944 t = gen_frame_mem (Pmode, t);
9945 insn = emit_insn (gen_push (t));
9946 RTX_FRAME_RELATED_P (insn) = 1;
9948 /* For the purposes of frame and register save area addressing,
9949 we've started over with a new frame. */
9950 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9951 m->fs.realigned = true;
9954 if (frame_pointer_needed && !m->fs.fp_valid)
9956 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9957 slower on all targets. Also sdb doesn't like it. */
9958 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9959 RTX_FRAME_RELATED_P (insn) = 1;
9961 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
9963 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9964 RTX_FRAME_RELATED_P (insn) = 1;
9966 if (m->fs.cfa_reg == stack_pointer_rtx)
9967 m->fs.cfa_reg = hard_frame_pointer_rtx;
9968 m->fs.fp_offset = m->fs.sp_offset;
9969 m->fs.fp_valid = true;
9973 int_registers_saved = (frame.nregs == 0);
9975 if (!int_registers_saved)
9977 /* If saving registers via PUSH, do so now. */
9978 if (!frame.save_regs_using_mov)
9980 ix86_emit_save_regs ();
9981 int_registers_saved = true;
9982 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9985 /* When using red zone we may start register saving before allocating
9986 the stack frame saving one cycle of the prologue. However, avoid
9987 doing this if we have to probe the stack; at least on x86_64 the
9988 stack probe can turn into a call that clobbers a red zone location. */
9989 else if (ix86_using_red_zone ()
9990 && (! TARGET_STACK_PROBE
9991 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9993 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9994 int_registers_saved = true;
9998 if (stack_realign_fp)
10000 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10001 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10003 /* The computation of the size of the re-aligned stack frame means
10004 that we must allocate the size of the register save area before
10005 performing the actual alignment. Otherwise we cannot guarantee
10006 that there's enough storage above the realignment point. */
10007 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10008 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10009 GEN_INT (m->fs.sp_offset
10010 - frame.sse_reg_save_offset),
10013 /* Align the stack. */
10014 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10016 GEN_INT (-align_bytes)));
10018 /* For the purposes of register save area addressing, the stack
10019 pointer is no longer valid. As for the value of sp_offset,
10020 see ix86_compute_frame_layout, which we need to match in order
10021 to pass verification of stack_pointer_offset at the end. */
10022 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10023 m->fs.sp_valid = false;
10026 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10028 if (flag_stack_usage_info)
10030 /* We start to count from ARG_POINTER. */
10031 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10033 /* If it was realigned, take into account the fake frame. */
10034 if (stack_realign_drap)
10036 if (ix86_static_chain_on_stack)
10037 stack_size += UNITS_PER_WORD;
10039 if (!call_used_regs[REGNO (crtl->drap_reg)])
10040 stack_size += UNITS_PER_WORD;
10042 /* This over-estimates by 1 minimal-stack-alignment-unit but
10043 mitigates that by counting in the new return address slot. */
10044 current_function_dynamic_stack_size
10045 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10048 current_function_static_stack_size = stack_size;
10051 /* The stack has already been decremented by the instruction calling us
10052 so probe if the size is non-negative to preserve the protection area. */
10053 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10055 /* We expect the registers to be saved when probes are used. */
10056 gcc_assert (int_registers_saved);
10058 if (STACK_CHECK_MOVING_SP)
10060 ix86_adjust_stack_and_probe (allocate);
10065 HOST_WIDE_INT size = allocate;
10067 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10068 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10070 if (TARGET_STACK_PROBE)
10071 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10073 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10079 else if (!ix86_target_stack_probe ()
10080 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10082 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10083 GEN_INT (-allocate), -1,
10084 m->fs.cfa_reg == stack_pointer_rtx);
10088 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10090 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10092 bool eax_live = false;
10093 bool r10_live = false;
10096 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10097 if (!TARGET_64BIT_MS_ABI)
10098 eax_live = ix86_eax_live_at_start_p ();
10102 emit_insn (gen_push (eax));
10103 allocate -= UNITS_PER_WORD;
10107 r10 = gen_rtx_REG (Pmode, R10_REG);
10108 emit_insn (gen_push (r10));
10109 allocate -= UNITS_PER_WORD;
10112 emit_move_insn (eax, GEN_INT (allocate));
10113 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10115 /* Use the fact that AX still contains ALLOCATE. */
10116 adjust_stack_insn = (TARGET_64BIT
10117 ? gen_pro_epilogue_adjust_stack_di_sub
10118 : gen_pro_epilogue_adjust_stack_si_sub);
10120 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10121 stack_pointer_rtx, eax));
10123 /* Note that SEH directives need to continue tracking the stack
10124 pointer even after the frame pointer has been set up. */
10125 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10127 if (m->fs.cfa_reg == stack_pointer_rtx)
10128 m->fs.cfa_offset += allocate;
10130 RTX_FRAME_RELATED_P (insn) = 1;
10131 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10132 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10133 plus_constant (stack_pointer_rtx,
10136 m->fs.sp_offset += allocate;
10138 if (r10_live && eax_live)
10140 t = choose_baseaddr (m->fs.sp_offset - allocate);
10141 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10142 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10143 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10145 else if (eax_live || r10_live)
10147 t = choose_baseaddr (m->fs.sp_offset - allocate);
10148 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10151 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10153 /* If we havn't already set up the frame pointer, do so now. */
10154 if (frame_pointer_needed && !m->fs.fp_valid)
10156 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10157 GEN_INT (frame.stack_pointer_offset
10158 - frame.hard_frame_pointer_offset));
10159 insn = emit_insn (insn);
10160 RTX_FRAME_RELATED_P (insn) = 1;
10161 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10163 if (m->fs.cfa_reg == stack_pointer_rtx)
10164 m->fs.cfa_reg = hard_frame_pointer_rtx;
10165 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10166 m->fs.fp_valid = true;
10169 if (!int_registers_saved)
10170 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10171 if (frame.nsseregs)
10172 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10174 pic_reg_used = false;
10175 if (pic_offset_table_rtx
10176 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10179 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10181 if (alt_pic_reg_used != INVALID_REGNUM)
10182 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10184 pic_reg_used = true;
10191 if (ix86_cmodel == CM_LARGE_PIC)
10193 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10194 rtx label = gen_label_rtx ();
10195 emit_label (label);
10196 LABEL_PRESERVE_P (label) = 1;
10197 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10198 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10199 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10200 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10201 pic_offset_table_rtx, tmp_reg));
10204 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10208 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10209 RTX_FRAME_RELATED_P (insn) = 1;
10210 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10214 /* In the pic_reg_used case, make sure that the got load isn't deleted
10215 when mcount needs it. Blockage to avoid call movement across mcount
10216 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10218 if (crtl->profile && !flag_fentry && pic_reg_used)
10219 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10221 if (crtl->drap_reg && !crtl->stack_realign_needed)
10223 /* vDRAP is setup but after reload it turns out stack realign
10224 isn't necessary, here we will emit prologue to setup DRAP
10225 without stack realign adjustment */
10226 t = choose_baseaddr (0);
10227 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10230 /* Prevent instructions from being scheduled into register save push
10231 sequence when access to the redzone area is done through frame pointer.
10232 The offset between the frame pointer and the stack pointer is calculated
10233 relative to the value of the stack pointer at the end of the function
10234 prologue, and moving instructions that access redzone area via frame
10235 pointer inside push sequence violates this assumption. */
10236 if (frame_pointer_needed && frame.red_zone_size)
10237 emit_insn (gen_memory_blockage ());
10239 /* Emit cld instruction if stringops are used in the function. */
10240 if (TARGET_CLD && ix86_current_function_needs_cld)
10241 emit_insn (gen_cld ());
10243 /* SEH requires that the prologue end within 256 bytes of the start of
10244 the function. Prevent instruction schedules that would extend that.
10245 Further, prevent alloca modifications to the stack pointer from being
10246 combined with prologue modifications. */
10248 emit_insn (gen_prologue_use (stack_pointer_rtx));
10251 /* Emit code to restore REG using a POP insn. */
10254 ix86_emit_restore_reg_using_pop (rtx reg)
10256 struct machine_function *m = cfun->machine;
10257 rtx insn = emit_insn (gen_pop (reg));
10259 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10260 m->fs.sp_offset -= UNITS_PER_WORD;
10262 if (m->fs.cfa_reg == crtl->drap_reg
10263 && REGNO (reg) == REGNO (crtl->drap_reg))
10265 /* Previously we'd represented the CFA as an expression
10266 like *(%ebp - 8). We've just popped that value from
10267 the stack, which means we need to reset the CFA to
10268 the drap register. This will remain until we restore
10269 the stack pointer. */
10270 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10271 RTX_FRAME_RELATED_P (insn) = 1;
10273 /* This means that the DRAP register is valid for addressing too. */
10274 m->fs.drap_valid = true;
10278 if (m->fs.cfa_reg == stack_pointer_rtx)
10280 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10281 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10282 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10283 RTX_FRAME_RELATED_P (insn) = 1;
10285 m->fs.cfa_offset -= UNITS_PER_WORD;
10288 /* When the frame pointer is the CFA, and we pop it, we are
10289 swapping back to the stack pointer as the CFA. This happens
10290 for stack frames that don't allocate other data, so we assume
10291 the stack pointer is now pointing at the return address, i.e.
10292 the function entry state, which makes the offset be 1 word. */
10293 if (reg == hard_frame_pointer_rtx)
10295 m->fs.fp_valid = false;
10296 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10298 m->fs.cfa_reg = stack_pointer_rtx;
10299 m->fs.cfa_offset -= UNITS_PER_WORD;
10301 add_reg_note (insn, REG_CFA_DEF_CFA,
10302 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10303 GEN_INT (m->fs.cfa_offset)));
10304 RTX_FRAME_RELATED_P (insn) = 1;
10309 /* Emit code to restore saved registers using POP insns. */
10312 ix86_emit_restore_regs_using_pop (void)
10314 unsigned int regno;
10316 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10317 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10318 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10321 /* Emit code and notes for the LEAVE instruction. */
10324 ix86_emit_leave (void)
10326 struct machine_function *m = cfun->machine;
10327 rtx insn = emit_insn (ix86_gen_leave ());
10329 ix86_add_queued_cfa_restore_notes (insn);
10331 gcc_assert (m->fs.fp_valid);
10332 m->fs.sp_valid = true;
10333 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10334 m->fs.fp_valid = false;
10336 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10338 m->fs.cfa_reg = stack_pointer_rtx;
10339 m->fs.cfa_offset = m->fs.sp_offset;
10341 add_reg_note (insn, REG_CFA_DEF_CFA,
10342 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10343 RTX_FRAME_RELATED_P (insn) = 1;
10344 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10349 /* Emit code to restore saved registers using MOV insns.
10350 First register is restored from CFA - CFA_OFFSET. */
10352 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10353 bool maybe_eh_return)
10355 struct machine_function *m = cfun->machine;
10356 unsigned int regno;
10358 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10359 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10361 rtx reg = gen_rtx_REG (Pmode, regno);
10364 mem = choose_baseaddr (cfa_offset);
10365 mem = gen_frame_mem (Pmode, mem);
10366 insn = emit_move_insn (reg, mem);
10368 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10370 /* Previously we'd represented the CFA as an expression
10371 like *(%ebp - 8). We've just popped that value from
10372 the stack, which means we need to reset the CFA to
10373 the drap register. This will remain until we restore
10374 the stack pointer. */
10375 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10376 RTX_FRAME_RELATED_P (insn) = 1;
10378 /* This means that the DRAP register is valid for addressing. */
10379 m->fs.drap_valid = true;
10382 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10384 cfa_offset -= UNITS_PER_WORD;
10388 /* Emit code to restore saved registers using MOV insns.
10389 First register is restored from CFA - CFA_OFFSET. */
10391 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10392 bool maybe_eh_return)
10394 unsigned int regno;
10396 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10397 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10399 rtx reg = gen_rtx_REG (V4SFmode, regno);
10402 mem = choose_baseaddr (cfa_offset);
10403 mem = gen_rtx_MEM (V4SFmode, mem);
10404 set_mem_align (mem, 128);
10405 emit_move_insn (reg, mem);
10407 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10413 /* Restore function stack, frame, and registers. */
10416 ix86_expand_epilogue (int style)
10418 struct machine_function *m = cfun->machine;
10419 struct machine_frame_state frame_state_save = m->fs;
10420 struct ix86_frame frame;
10421 bool restore_regs_via_mov;
10424 ix86_finalize_stack_realign_flags ();
10425 ix86_compute_frame_layout (&frame);
10427 m->fs.sp_valid = (!frame_pointer_needed
10428 || (current_function_sp_is_unchanging
10429 && !stack_realign_fp));
10430 gcc_assert (!m->fs.sp_valid
10431 || m->fs.sp_offset == frame.stack_pointer_offset);
10433 /* The FP must be valid if the frame pointer is present. */
10434 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10435 gcc_assert (!m->fs.fp_valid
10436 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10438 /* We must have *some* valid pointer to the stack frame. */
10439 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10441 /* The DRAP is never valid at this point. */
10442 gcc_assert (!m->fs.drap_valid);
10444 /* See the comment about red zone and frame
10445 pointer usage in ix86_expand_prologue. */
10446 if (frame_pointer_needed && frame.red_zone_size)
10447 emit_insn (gen_memory_blockage ());
10449 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10450 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10452 /* Determine the CFA offset of the end of the red-zone. */
10453 m->fs.red_zone_offset = 0;
10454 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10456 /* The red-zone begins below the return address. */
10457 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10459 /* When the register save area is in the aligned portion of
10460 the stack, determine the maximum runtime displacement that
10461 matches up with the aligned frame. */
10462 if (stack_realign_drap)
10463 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10467 /* Special care must be taken for the normal return case of a function
10468 using eh_return: the eax and edx registers are marked as saved, but
10469 not restored along this path. Adjust the save location to match. */
10470 if (crtl->calls_eh_return && style != 2)
10471 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10473 /* EH_RETURN requires the use of moves to function properly. */
10474 if (crtl->calls_eh_return)
10475 restore_regs_via_mov = true;
10476 /* SEH requires the use of pops to identify the epilogue. */
10477 else if (TARGET_SEH)
10478 restore_regs_via_mov = false;
10479 /* If we're only restoring one register and sp is not valid then
10480 using a move instruction to restore the register since it's
10481 less work than reloading sp and popping the register. */
10482 else if (!m->fs.sp_valid && frame.nregs <= 1)
10483 restore_regs_via_mov = true;
10484 else if (TARGET_EPILOGUE_USING_MOVE
10485 && cfun->machine->use_fast_prologue_epilogue
10486 && (frame.nregs > 1
10487 || m->fs.sp_offset != frame.reg_save_offset))
10488 restore_regs_via_mov = true;
10489 else if (frame_pointer_needed
10491 && m->fs.sp_offset != frame.reg_save_offset)
10492 restore_regs_via_mov = true;
10493 else if (frame_pointer_needed
10494 && TARGET_USE_LEAVE
10495 && cfun->machine->use_fast_prologue_epilogue
10496 && frame.nregs == 1)
10497 restore_regs_via_mov = true;
10499 restore_regs_via_mov = false;
10501 if (restore_regs_via_mov || frame.nsseregs)
10503 /* Ensure that the entire register save area is addressable via
10504 the stack pointer, if we will restore via sp. */
10506 && m->fs.sp_offset > 0x7fffffff
10507 && !(m->fs.fp_valid || m->fs.drap_valid)
10508 && (frame.nsseregs + frame.nregs) != 0)
10510 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10511 GEN_INT (m->fs.sp_offset
10512 - frame.sse_reg_save_offset),
10514 m->fs.cfa_reg == stack_pointer_rtx);
10518 /* If there are any SSE registers to restore, then we have to do it
10519 via moves, since there's obviously no pop for SSE regs. */
10520 if (frame.nsseregs)
10521 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10524 if (restore_regs_via_mov)
10529 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10531 /* eh_return epilogues need %ecx added to the stack pointer. */
10534 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10536 /* Stack align doesn't work with eh_return. */
10537 gcc_assert (!stack_realign_drap);
10538 /* Neither does regparm nested functions. */
10539 gcc_assert (!ix86_static_chain_on_stack);
10541 if (frame_pointer_needed)
10543 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10544 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10545 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10547 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10548 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10550 /* Note that we use SA as a temporary CFA, as the return
10551 address is at the proper place relative to it. We
10552 pretend this happens at the FP restore insn because
10553 prior to this insn the FP would be stored at the wrong
10554 offset relative to SA, and after this insn we have no
10555 other reasonable register to use for the CFA. We don't
10556 bother resetting the CFA to the SP for the duration of
10557 the return insn. */
10558 add_reg_note (insn, REG_CFA_DEF_CFA,
10559 plus_constant (sa, UNITS_PER_WORD));
10560 ix86_add_queued_cfa_restore_notes (insn);
10561 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10562 RTX_FRAME_RELATED_P (insn) = 1;
10564 m->fs.cfa_reg = sa;
10565 m->fs.cfa_offset = UNITS_PER_WORD;
10566 m->fs.fp_valid = false;
10568 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10569 const0_rtx, style, false);
10573 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10574 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10575 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10576 ix86_add_queued_cfa_restore_notes (insn);
10578 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10579 if (m->fs.cfa_offset != UNITS_PER_WORD)
10581 m->fs.cfa_offset = UNITS_PER_WORD;
10582 add_reg_note (insn, REG_CFA_DEF_CFA,
10583 plus_constant (stack_pointer_rtx,
10585 RTX_FRAME_RELATED_P (insn) = 1;
10588 m->fs.sp_offset = UNITS_PER_WORD;
10589 m->fs.sp_valid = true;
10594 /* SEH requires that the function end with (1) a stack adjustment
10595 if necessary, (2) a sequence of pops, and (3) a return or
10596 jump instruction. Prevent insns from the function body from
10597 being scheduled into this sequence. */
10600 /* Prevent a catch region from being adjacent to the standard
10601 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10602 several other flags that would be interesting to test are
10604 if (flag_non_call_exceptions)
10605 emit_insn (gen_nops (const1_rtx));
10607 emit_insn (gen_blockage ());
10610 /* First step is to deallocate the stack frame so that we can
10611 pop the registers. */
10612 if (!m->fs.sp_valid)
10614 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10615 GEN_INT (m->fs.fp_offset
10616 - frame.reg_save_offset),
10619 else if (m->fs.sp_offset != frame.reg_save_offset)
10621 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10622 GEN_INT (m->fs.sp_offset
10623 - frame.reg_save_offset),
10625 m->fs.cfa_reg == stack_pointer_rtx);
10628 ix86_emit_restore_regs_using_pop ();
10631 /* If we used a stack pointer and haven't already got rid of it,
10633 if (m->fs.fp_valid)
10635 /* If the stack pointer is valid and pointing at the frame
10636 pointer store address, then we only need a pop. */
10637 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10638 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10639 /* Leave results in shorter dependency chains on CPUs that are
10640 able to grok it fast. */
10641 else if (TARGET_USE_LEAVE
10642 || optimize_function_for_size_p (cfun)
10643 || !cfun->machine->use_fast_prologue_epilogue)
10644 ix86_emit_leave ();
10647 pro_epilogue_adjust_stack (stack_pointer_rtx,
10648 hard_frame_pointer_rtx,
10649 const0_rtx, style, !using_drap);
10650 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10656 int param_ptr_offset = UNITS_PER_WORD;
10659 gcc_assert (stack_realign_drap);
10661 if (ix86_static_chain_on_stack)
10662 param_ptr_offset += UNITS_PER_WORD;
10663 if (!call_used_regs[REGNO (crtl->drap_reg)])
10664 param_ptr_offset += UNITS_PER_WORD;
10666 insn = emit_insn (gen_rtx_SET
10667 (VOIDmode, stack_pointer_rtx,
10668 gen_rtx_PLUS (Pmode,
10670 GEN_INT (-param_ptr_offset))));
10671 m->fs.cfa_reg = stack_pointer_rtx;
10672 m->fs.cfa_offset = param_ptr_offset;
10673 m->fs.sp_offset = param_ptr_offset;
10674 m->fs.realigned = false;
10676 add_reg_note (insn, REG_CFA_DEF_CFA,
10677 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10678 GEN_INT (param_ptr_offset)));
10679 RTX_FRAME_RELATED_P (insn) = 1;
10681 if (!call_used_regs[REGNO (crtl->drap_reg)])
10682 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10685 /* At this point the stack pointer must be valid, and we must have
10686 restored all of the registers. We may not have deallocated the
10687 entire stack frame. We've delayed this until now because it may
10688 be possible to merge the local stack deallocation with the
10689 deallocation forced by ix86_static_chain_on_stack. */
10690 gcc_assert (m->fs.sp_valid);
10691 gcc_assert (!m->fs.fp_valid);
10692 gcc_assert (!m->fs.realigned);
10693 if (m->fs.sp_offset != UNITS_PER_WORD)
10695 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10696 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10700 /* Sibcall epilogues don't want a return instruction. */
10703 m->fs = frame_state_save;
10707 /* Emit vzeroupper if needed. */
10708 if (TARGET_VZEROUPPER
10709 && !TREE_THIS_VOLATILE (cfun->decl)
10710 && !cfun->machine->caller_return_avx256_p)
10711 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10713 if (crtl->args.pops_args && crtl->args.size)
10715 rtx popc = GEN_INT (crtl->args.pops_args);
10717 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10718 address, do explicit add, and jump indirectly to the caller. */
10720 if (crtl->args.pops_args >= 65536)
10722 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10725 /* There is no "pascal" calling convention in any 64bit ABI. */
10726 gcc_assert (!TARGET_64BIT);
10728 insn = emit_insn (gen_pop (ecx));
10729 m->fs.cfa_offset -= UNITS_PER_WORD;
10730 m->fs.sp_offset -= UNITS_PER_WORD;
10732 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10733 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10734 add_reg_note (insn, REG_CFA_REGISTER,
10735 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10736 RTX_FRAME_RELATED_P (insn) = 1;
10738 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10740 emit_jump_insn (gen_return_indirect_internal (ecx));
10743 emit_jump_insn (gen_return_pop_internal (popc));
10746 emit_jump_insn (gen_return_internal ());
10748 /* Restore the state back to the state from the prologue,
10749 so that it's correct for the next epilogue. */
10750 m->fs = frame_state_save;
10753 /* Reset from the function's potential modifications. */
10756 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10757 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10759 if (pic_offset_table_rtx)
10760 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10762 /* Mach-O doesn't support labels at the end of objects, so if
10763 it looks like we might want one, insert a NOP. */
10765 rtx insn = get_last_insn ();
10768 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10769 insn = PREV_INSN (insn);
10773 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10774 fputs ("\tnop\n", file);
10780 /* Return a scratch register to use in the split stack prologue. The
10781 split stack prologue is used for -fsplit-stack. It is the first
10782 instructions in the function, even before the regular prologue.
10783 The scratch register can be any caller-saved register which is not
10784 used for parameters or for the static chain. */
10786 static unsigned int
10787 split_stack_prologue_scratch_regno (void)
10796 is_fastcall = (lookup_attribute ("fastcall",
10797 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10799 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10803 if (DECL_STATIC_CHAIN (cfun->decl))
10805 sorry ("-fsplit-stack does not support fastcall with "
10806 "nested function");
10807 return INVALID_REGNUM;
10811 else if (regparm < 3)
10813 if (!DECL_STATIC_CHAIN (cfun->decl))
10819 sorry ("-fsplit-stack does not support 2 register "
10820 " parameters for a nested function");
10821 return INVALID_REGNUM;
10828 /* FIXME: We could make this work by pushing a register
10829 around the addition and comparison. */
10830 sorry ("-fsplit-stack does not support 3 register parameters");
10831 return INVALID_REGNUM;
10836 /* A SYMBOL_REF for the function which allocates new stackspace for
10839 static GTY(()) rtx split_stack_fn;
10841 /* A SYMBOL_REF for the more stack function when using the large
10844 static GTY(()) rtx split_stack_fn_large;
10846 /* Handle -fsplit-stack. These are the first instructions in the
10847 function, even before the regular prologue. */
10850 ix86_expand_split_stack_prologue (void)
10852 struct ix86_frame frame;
10853 HOST_WIDE_INT allocate;
10854 unsigned HOST_WIDE_INT args_size;
10855 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10856 rtx scratch_reg = NULL_RTX;
10857 rtx varargs_label = NULL_RTX;
10860 gcc_assert (flag_split_stack && reload_completed);
10862 ix86_finalize_stack_realign_flags ();
10863 ix86_compute_frame_layout (&frame);
10864 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10866 /* This is the label we will branch to if we have enough stack
10867 space. We expect the basic block reordering pass to reverse this
10868 branch if optimizing, so that we branch in the unlikely case. */
10869 label = gen_label_rtx ();
10871 /* We need to compare the stack pointer minus the frame size with
10872 the stack boundary in the TCB. The stack boundary always gives
10873 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10874 can compare directly. Otherwise we need to do an addition. */
10876 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10877 UNSPEC_STACK_CHECK);
10878 limit = gen_rtx_CONST (Pmode, limit);
10879 limit = gen_rtx_MEM (Pmode, limit);
10880 if (allocate < SPLIT_STACK_AVAILABLE)
10881 current = stack_pointer_rtx;
10884 unsigned int scratch_regno;
10887 /* We need a scratch register to hold the stack pointer minus
10888 the required frame size. Since this is the very start of the
10889 function, the scratch register can be any caller-saved
10890 register which is not used for parameters. */
10891 offset = GEN_INT (- allocate);
10892 scratch_regno = split_stack_prologue_scratch_regno ();
10893 if (scratch_regno == INVALID_REGNUM)
10895 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10896 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10898 /* We don't use ix86_gen_add3 in this case because it will
10899 want to split to lea, but when not optimizing the insn
10900 will not be split after this point. */
10901 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10902 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10907 emit_move_insn (scratch_reg, offset);
10908 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
10909 stack_pointer_rtx));
10911 current = scratch_reg;
10914 ix86_expand_branch (GEU, current, limit, label);
10915 jump_insn = get_last_insn ();
10916 JUMP_LABEL (jump_insn) = label;
10918 /* Mark the jump as very likely to be taken. */
10919 add_reg_note (jump_insn, REG_BR_PROB,
10920 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
10922 if (split_stack_fn == NULL_RTX)
10923 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10924 fn = split_stack_fn;
10926 /* Get more stack space. We pass in the desired stack space and the
10927 size of the arguments to copy to the new stack. In 32-bit mode
10928 we push the parameters; __morestack will return on a new stack
10929 anyhow. In 64-bit mode we pass the parameters in r10 and
10931 allocate_rtx = GEN_INT (allocate);
10932 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
10933 call_fusage = NULL_RTX;
10938 reg10 = gen_rtx_REG (Pmode, R10_REG);
10939 reg11 = gen_rtx_REG (Pmode, R11_REG);
10941 /* If this function uses a static chain, it will be in %r10.
10942 Preserve it across the call to __morestack. */
10943 if (DECL_STATIC_CHAIN (cfun->decl))
10947 rax = gen_rtx_REG (Pmode, AX_REG);
10948 emit_move_insn (rax, reg10);
10949 use_reg (&call_fusage, rax);
10952 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
10954 HOST_WIDE_INT argval;
10956 /* When using the large model we need to load the address
10957 into a register, and we've run out of registers. So we
10958 switch to a different calling convention, and we call a
10959 different function: __morestack_large. We pass the
10960 argument size in the upper 32 bits of r10 and pass the
10961 frame size in the lower 32 bits. */
10962 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
10963 gcc_assert ((args_size & 0xffffffff) == args_size);
10965 if (split_stack_fn_large == NULL_RTX)
10966 split_stack_fn_large =
10967 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
10969 if (ix86_cmodel == CM_LARGE_PIC)
10973 label = gen_label_rtx ();
10974 emit_label (label);
10975 LABEL_PRESERVE_P (label) = 1;
10976 emit_insn (gen_set_rip_rex64 (reg10, label));
10977 emit_insn (gen_set_got_offset_rex64 (reg11, label));
10978 emit_insn (gen_adddi3 (reg10, reg10, reg11));
10979 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
10981 x = gen_rtx_CONST (Pmode, x);
10982 emit_move_insn (reg11, x);
10983 x = gen_rtx_PLUS (Pmode, reg10, reg11);
10984 x = gen_const_mem (Pmode, x);
10985 emit_move_insn (reg11, x);
10988 emit_move_insn (reg11, split_stack_fn_large);
10992 argval = ((args_size << 16) << 16) + allocate;
10993 emit_move_insn (reg10, GEN_INT (argval));
10997 emit_move_insn (reg10, allocate_rtx);
10998 emit_move_insn (reg11, GEN_INT (args_size));
10999 use_reg (&call_fusage, reg11);
11002 use_reg (&call_fusage, reg10);
11006 emit_insn (gen_push (GEN_INT (args_size)));
11007 emit_insn (gen_push (allocate_rtx));
11009 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11010 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11012 add_function_usage_to (call_insn, call_fusage);
11014 /* In order to make call/return prediction work right, we now need
11015 to execute a return instruction. See
11016 libgcc/config/i386/morestack.S for the details on how this works.
11018 For flow purposes gcc must not see this as a return
11019 instruction--we need control flow to continue at the subsequent
11020 label. Therefore, we use an unspec. */
11021 gcc_assert (crtl->args.pops_args < 65536);
11022 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11024 /* If we are in 64-bit mode and this function uses a static chain,
11025 we saved %r10 in %rax before calling _morestack. */
11026 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11027 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11028 gen_rtx_REG (Pmode, AX_REG));
11030 /* If this function calls va_start, we need to store a pointer to
11031 the arguments on the old stack, because they may not have been
11032 all copied to the new stack. At this point the old stack can be
11033 found at the frame pointer value used by __morestack, because
11034 __morestack has set that up before calling back to us. Here we
11035 store that pointer in a scratch register, and in
11036 ix86_expand_prologue we store the scratch register in a stack
11038 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11040 unsigned int scratch_regno;
11044 scratch_regno = split_stack_prologue_scratch_regno ();
11045 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11046 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11050 return address within this function
11051 return address of caller of this function
11053 So we add three words to get to the stack arguments.
11057 return address within this function
11058 first argument to __morestack
11059 second argument to __morestack
11060 return address of caller of this function
11062 So we add five words to get to the stack arguments.
11064 words = TARGET_64BIT ? 3 : 5;
11065 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11066 gen_rtx_PLUS (Pmode, frame_reg,
11067 GEN_INT (words * UNITS_PER_WORD))));
11069 varargs_label = gen_label_rtx ();
11070 emit_jump_insn (gen_jump (varargs_label));
11071 JUMP_LABEL (get_last_insn ()) = varargs_label;
11076 emit_label (label);
11077 LABEL_NUSES (label) = 1;
11079 /* If this function calls va_start, we now have to set the scratch
11080 register for the case where we do not call __morestack. In this
11081 case we need to set it based on the stack pointer. */
11082 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11084 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11085 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11086 GEN_INT (UNITS_PER_WORD))));
11088 emit_label (varargs_label);
11089 LABEL_NUSES (varargs_label) = 1;
11093 /* We may have to tell the dataflow pass that the split stack prologue
11094 is initializing a scratch register. */
11097 ix86_live_on_entry (bitmap regs)
11099 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11101 gcc_assert (flag_split_stack);
11102 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11106 /* Determine if op is suitable SUBREG RTX for address. */
11109 ix86_address_subreg_operand (rtx op)
11111 enum machine_mode mode;
11116 mode = GET_MODE (op);
11118 if (GET_MODE_CLASS (mode) != MODE_INT)
11121 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11122 failures when the register is one word out of a two word structure. */
11123 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11126 /* Allow only SUBREGs of non-eliminable hard registers. */
11127 return register_no_elim_operand (op, mode);
11130 /* Extract the parts of an RTL expression that is a valid memory address
11131 for an instruction. Return 0 if the structure of the address is
11132 grossly off. Return -1 if the address contains ASHIFT, so it is not
11133 strictly valid, but still used for computing length of lea instruction. */
11136 ix86_decompose_address (rtx addr, struct ix86_address *out)
11138 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11139 rtx base_reg, index_reg;
11140 HOST_WIDE_INT scale = 1;
11141 rtx scale_rtx = NULL_RTX;
11144 enum ix86_address_seg seg = SEG_DEFAULT;
11146 /* Allow zero-extended SImode addresses,
11147 they will be emitted with addr32 prefix. */
11148 if (TARGET_64BIT && GET_MODE (addr) == DImode)
11150 if (GET_CODE (addr) == ZERO_EXTEND
11151 && GET_MODE (XEXP (addr, 0)) == SImode)
11152 addr = XEXP (addr, 0);
11153 else if (GET_CODE (addr) == AND
11154 && const_32bit_mask (XEXP (addr, 1), DImode))
11156 addr = XEXP (addr, 0);
11158 /* Strip subreg. */
11159 if (GET_CODE (addr) == SUBREG
11160 && GET_MODE (SUBREG_REG (addr)) == SImode)
11161 addr = SUBREG_REG (addr);
11167 else if (GET_CODE (addr) == SUBREG)
11169 if (ix86_address_subreg_operand (SUBREG_REG (addr)))
11174 else if (GET_CODE (addr) == PLUS)
11176 rtx addends[4], op;
11184 addends[n++] = XEXP (op, 1);
11187 while (GET_CODE (op) == PLUS);
11192 for (i = n; i >= 0; --i)
11195 switch (GET_CODE (op))
11200 index = XEXP (op, 0);
11201 scale_rtx = XEXP (op, 1);
11207 index = XEXP (op, 0);
11208 tmp = XEXP (op, 1);
11209 if (!CONST_INT_P (tmp))
11211 scale = INTVAL (tmp);
11212 if ((unsigned HOST_WIDE_INT) scale > 3)
11214 scale = 1 << scale;
11218 if (XINT (op, 1) == UNSPEC_TP
11219 && TARGET_TLS_DIRECT_SEG_REFS
11220 && seg == SEG_DEFAULT)
11221 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11227 if (!ix86_address_subreg_operand (SUBREG_REG (op)))
11254 else if (GET_CODE (addr) == MULT)
11256 index = XEXP (addr, 0); /* index*scale */
11257 scale_rtx = XEXP (addr, 1);
11259 else if (GET_CODE (addr) == ASHIFT)
11261 /* We're called for lea too, which implements ashift on occasion. */
11262 index = XEXP (addr, 0);
11263 tmp = XEXP (addr, 1);
11264 if (!CONST_INT_P (tmp))
11266 scale = INTVAL (tmp);
11267 if ((unsigned HOST_WIDE_INT) scale > 3)
11269 scale = 1 << scale;
11273 disp = addr; /* displacement */
11279 else if (GET_CODE (index) == SUBREG
11280 && ix86_address_subreg_operand (SUBREG_REG (index)))
11286 /* Extract the integral value of scale. */
11289 if (!CONST_INT_P (scale_rtx))
11291 scale = INTVAL (scale_rtx);
11294 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11295 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11297 /* Avoid useless 0 displacement. */
11298 if (disp == const0_rtx && (base || index))
11301 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11302 if (base_reg && index_reg && scale == 1
11303 && (index_reg == arg_pointer_rtx
11304 || index_reg == frame_pointer_rtx
11305 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11308 tmp = base, base = index, index = tmp;
11309 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11312 /* Special case: %ebp cannot be encoded as a base without a displacement.
11316 && (base_reg == hard_frame_pointer_rtx
11317 || base_reg == frame_pointer_rtx
11318 || base_reg == arg_pointer_rtx
11319 || (REG_P (base_reg)
11320 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11321 || REGNO (base_reg) == R13_REG))))
11324 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11325 Avoid this by transforming to [%esi+0].
11326 Reload calls address legitimization without cfun defined, so we need
11327 to test cfun for being non-NULL. */
11328 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11329 && base_reg && !index_reg && !disp
11330 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11333 /* Special case: encode reg+reg instead of reg*2. */
11334 if (!base && index && scale == 2)
11335 base = index, base_reg = index_reg, scale = 1;
11337 /* Special case: scaling cannot be encoded without base or displacement. */
11338 if (!base && !disp && index && scale != 1)
11342 out->index = index;
11344 out->scale = scale;
11350 /* Return cost of the memory address x.
11351 For i386, it is better to use a complex address than let gcc copy
11352 the address into a reg and make a new pseudo. But not if the address
11353 requires to two regs - that would mean more pseudos with longer
11356 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11358 struct ix86_address parts;
11360 int ok = ix86_decompose_address (x, &parts);
11364 if (parts.base && GET_CODE (parts.base) == SUBREG)
11365 parts.base = SUBREG_REG (parts.base);
11366 if (parts.index && GET_CODE (parts.index) == SUBREG)
11367 parts.index = SUBREG_REG (parts.index);
11369 /* Attempt to minimize number of registers in the address. */
11371 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11373 && (!REG_P (parts.index)
11374 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11378 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11380 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11381 && parts.base != parts.index)
11384 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11385 since it's predecode logic can't detect the length of instructions
11386 and it degenerates to vector decoded. Increase cost of such
11387 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11388 to split such addresses or even refuse such addresses at all.
11390 Following addressing modes are affected:
11395 The first and last case may be avoidable by explicitly coding the zero in
11396 memory address, but I don't have AMD-K6 machine handy to check this
11400 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11401 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11402 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11408 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11409 this is used for to form addresses to local data when -fPIC is in
11413 darwin_local_data_pic (rtx disp)
11415 return (GET_CODE (disp) == UNSPEC
11416 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11419 /* Determine if a given RTX is a valid constant. We already know this
11420 satisfies CONSTANT_P. */
11423 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11425 switch (GET_CODE (x))
11430 if (GET_CODE (x) == PLUS)
11432 if (!CONST_INT_P (XEXP (x, 1)))
11437 if (TARGET_MACHO && darwin_local_data_pic (x))
11440 /* Only some unspecs are valid as "constants". */
11441 if (GET_CODE (x) == UNSPEC)
11442 switch (XINT (x, 1))
11445 case UNSPEC_GOTOFF:
11446 case UNSPEC_PLTOFF:
11447 return TARGET_64BIT;
11449 case UNSPEC_NTPOFF:
11450 x = XVECEXP (x, 0, 0);
11451 return (GET_CODE (x) == SYMBOL_REF
11452 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11453 case UNSPEC_DTPOFF:
11454 x = XVECEXP (x, 0, 0);
11455 return (GET_CODE (x) == SYMBOL_REF
11456 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11461 /* We must have drilled down to a symbol. */
11462 if (GET_CODE (x) == LABEL_REF)
11464 if (GET_CODE (x) != SYMBOL_REF)
11469 /* TLS symbols are never valid. */
11470 if (SYMBOL_REF_TLS_MODEL (x))
11473 /* DLLIMPORT symbols are never valid. */
11474 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11475 && SYMBOL_REF_DLLIMPORT_P (x))
11479 /* mdynamic-no-pic */
11480 if (MACHO_DYNAMIC_NO_PIC_P)
11481 return machopic_symbol_defined_p (x);
11486 if (GET_MODE (x) == TImode
11487 && x != CONST0_RTX (TImode)
11493 if (!standard_sse_constant_p (x))
11500 /* Otherwise we handle everything else in the move patterns. */
11504 /* Determine if it's legal to put X into the constant pool. This
11505 is not possible for the address of thread-local symbols, which
11506 is checked above. */
11509 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11511 /* We can always put integral constants and vectors in memory. */
11512 switch (GET_CODE (x))
11522 return !ix86_legitimate_constant_p (mode, x);
11526 /* Nonzero if the constant value X is a legitimate general operand
11527 when generating PIC code. It is given that flag_pic is on and
11528 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11531 legitimate_pic_operand_p (rtx x)
11535 switch (GET_CODE (x))
11538 inner = XEXP (x, 0);
11539 if (GET_CODE (inner) == PLUS
11540 && CONST_INT_P (XEXP (inner, 1)))
11541 inner = XEXP (inner, 0);
11543 /* Only some unspecs are valid as "constants". */
11544 if (GET_CODE (inner) == UNSPEC)
11545 switch (XINT (inner, 1))
11548 case UNSPEC_GOTOFF:
11549 case UNSPEC_PLTOFF:
11550 return TARGET_64BIT;
11552 x = XVECEXP (inner, 0, 0);
11553 return (GET_CODE (x) == SYMBOL_REF
11554 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11555 case UNSPEC_MACHOPIC_OFFSET:
11556 return legitimate_pic_address_disp_p (x);
11564 return legitimate_pic_address_disp_p (x);
11571 /* Determine if a given CONST RTX is a valid memory displacement
11575 legitimate_pic_address_disp_p (rtx disp)
11579 /* In 64bit mode we can allow direct addresses of symbols and labels
11580 when they are not dynamic symbols. */
11583 rtx op0 = disp, op1;
11585 switch (GET_CODE (disp))
11591 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11593 op0 = XEXP (XEXP (disp, 0), 0);
11594 op1 = XEXP (XEXP (disp, 0), 1);
11595 if (!CONST_INT_P (op1)
11596 || INTVAL (op1) >= 16*1024*1024
11597 || INTVAL (op1) < -16*1024*1024)
11599 if (GET_CODE (op0) == LABEL_REF)
11601 if (GET_CODE (op0) != SYMBOL_REF)
11606 /* TLS references should always be enclosed in UNSPEC. */
11607 if (SYMBOL_REF_TLS_MODEL (op0))
11609 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11610 && ix86_cmodel != CM_LARGE_PIC)
11618 if (GET_CODE (disp) != CONST)
11620 disp = XEXP (disp, 0);
11624 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11625 of GOT tables. We should not need these anyway. */
11626 if (GET_CODE (disp) != UNSPEC
11627 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11628 && XINT (disp, 1) != UNSPEC_GOTOFF
11629 && XINT (disp, 1) != UNSPEC_PCREL
11630 && XINT (disp, 1) != UNSPEC_PLTOFF))
11633 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11634 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11640 if (GET_CODE (disp) == PLUS)
11642 if (!CONST_INT_P (XEXP (disp, 1)))
11644 disp = XEXP (disp, 0);
11648 if (TARGET_MACHO && darwin_local_data_pic (disp))
11651 if (GET_CODE (disp) != UNSPEC)
11654 switch (XINT (disp, 1))
11659 /* We need to check for both symbols and labels because VxWorks loads
11660 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11662 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11663 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11664 case UNSPEC_GOTOFF:
11665 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11666 While ABI specify also 32bit relocation but we don't produce it in
11667 small PIC model at all. */
11668 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11669 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11671 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11673 case UNSPEC_GOTTPOFF:
11674 case UNSPEC_GOTNTPOFF:
11675 case UNSPEC_INDNTPOFF:
11678 disp = XVECEXP (disp, 0, 0);
11679 return (GET_CODE (disp) == SYMBOL_REF
11680 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11681 case UNSPEC_NTPOFF:
11682 disp = XVECEXP (disp, 0, 0);
11683 return (GET_CODE (disp) == SYMBOL_REF
11684 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11685 case UNSPEC_DTPOFF:
11686 disp = XVECEXP (disp, 0, 0);
11687 return (GET_CODE (disp) == SYMBOL_REF
11688 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11694 /* Recognizes RTL expressions that are valid memory addresses for an
11695 instruction. The MODE argument is the machine mode for the MEM
11696 expression that wants to use this address.
11698 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11699 convert common non-canonical forms to canonical form so that they will
11703 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11704 rtx addr, bool strict)
11706 struct ix86_address parts;
11707 rtx base, index, disp;
11708 HOST_WIDE_INT scale;
11710 if (ix86_decompose_address (addr, &parts) <= 0)
11711 /* Decomposition failed. */
11715 index = parts.index;
11717 scale = parts.scale;
11719 /* Validate base register. */
11726 else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
11727 reg = SUBREG_REG (base);
11729 /* Base is not a register. */
11732 if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
11735 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11736 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11737 /* Base is not valid. */
11741 /* Validate index register. */
11748 else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
11749 reg = SUBREG_REG (index);
11751 /* Index is not a register. */
11754 if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
11757 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11758 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11759 /* Index is not valid. */
11763 /* Index and base should have the same mode. */
11765 && GET_MODE (base) != GET_MODE (index))
11768 /* Validate scale factor. */
11772 /* Scale without index. */
11775 if (scale != 2 && scale != 4 && scale != 8)
11776 /* Scale is not a valid multiplier. */
11780 /* Validate displacement. */
11783 if (GET_CODE (disp) == CONST
11784 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11785 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11786 switch (XINT (XEXP (disp, 0), 1))
11788 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11789 used. While ABI specify also 32bit relocations, we don't produce
11790 them at all and use IP relative instead. */
11792 case UNSPEC_GOTOFF:
11793 gcc_assert (flag_pic);
11795 goto is_legitimate_pic;
11797 /* 64bit address unspec. */
11800 case UNSPEC_GOTPCREL:
11802 gcc_assert (flag_pic);
11803 goto is_legitimate_pic;
11805 case UNSPEC_GOTTPOFF:
11806 case UNSPEC_GOTNTPOFF:
11807 case UNSPEC_INDNTPOFF:
11808 case UNSPEC_NTPOFF:
11809 case UNSPEC_DTPOFF:
11812 case UNSPEC_STACK_CHECK:
11813 gcc_assert (flag_split_stack);
11817 /* Invalid address unspec. */
11821 else if (SYMBOLIC_CONST (disp)
11825 && MACHOPIC_INDIRECT
11826 && !machopic_operand_p (disp)
11832 if (TARGET_64BIT && (index || base))
11834 /* foo@dtpoff(%rX) is ok. */
11835 if (GET_CODE (disp) != CONST
11836 || GET_CODE (XEXP (disp, 0)) != PLUS
11837 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11838 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11839 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11840 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11841 /* Non-constant pic memory reference. */
11844 else if ((!TARGET_MACHO || flag_pic)
11845 && ! legitimate_pic_address_disp_p (disp))
11846 /* Displacement is an invalid pic construct. */
11849 else if (MACHO_DYNAMIC_NO_PIC_P
11850 && !ix86_legitimate_constant_p (Pmode, disp))
11851 /* displacment must be referenced via non_lazy_pointer */
11855 /* This code used to verify that a symbolic pic displacement
11856 includes the pic_offset_table_rtx register.
11858 While this is good idea, unfortunately these constructs may
11859 be created by "adds using lea" optimization for incorrect
11868 This code is nonsensical, but results in addressing
11869 GOT table with pic_offset_table_rtx base. We can't
11870 just refuse it easily, since it gets matched by
11871 "addsi3" pattern, that later gets split to lea in the
11872 case output register differs from input. While this
11873 can be handled by separate addsi pattern for this case
11874 that never results in lea, this seems to be easier and
11875 correct fix for crash to disable this test. */
11877 else if (GET_CODE (disp) != LABEL_REF
11878 && !CONST_INT_P (disp)
11879 && (GET_CODE (disp) != CONST
11880 || !ix86_legitimate_constant_p (Pmode, disp))
11881 && (GET_CODE (disp) != SYMBOL_REF
11882 || !ix86_legitimate_constant_p (Pmode, disp)))
11883 /* Displacement is not constant. */
11885 else if (TARGET_64BIT
11886 && !x86_64_immediate_operand (disp, VOIDmode))
11887 /* Displacement is out of range. */
11891 /* Everything looks valid. */
11895 /* Determine if a given RTX is a valid constant address. */
11898 constant_address_p (rtx x)
11900 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11903 /* Return a unique alias set for the GOT. */
11905 static alias_set_type
11906 ix86_GOT_alias_set (void)
11908 static alias_set_type set = -1;
11910 set = new_alias_set ();
11914 /* Return a legitimate reference for ORIG (an address) using the
11915 register REG. If REG is 0, a new pseudo is generated.
11917 There are two types of references that must be handled:
11919 1. Global data references must load the address from the GOT, via
11920 the PIC reg. An insn is emitted to do this load, and the reg is
11923 2. Static data references, constant pool addresses, and code labels
11924 compute the address as an offset from the GOT, whose base is in
11925 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11926 differentiate them from global data objects. The returned
11927 address is the PIC reg + an unspec constant.
11929 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11930 reg also appears in the address. */
11933 legitimize_pic_address (rtx orig, rtx reg)
11936 rtx new_rtx = orig;
11940 if (TARGET_MACHO && !TARGET_64BIT)
11943 reg = gen_reg_rtx (Pmode);
11944 /* Use the generic Mach-O PIC machinery. */
11945 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11949 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11951 else if (TARGET_64BIT
11952 && ix86_cmodel != CM_SMALL_PIC
11953 && gotoff_operand (addr, Pmode))
11956 /* This symbol may be referenced via a displacement from the PIC
11957 base address (@GOTOFF). */
11959 if (reload_in_progress)
11960 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11961 if (GET_CODE (addr) == CONST)
11962 addr = XEXP (addr, 0);
11963 if (GET_CODE (addr) == PLUS)
11965 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11967 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11970 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11971 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11973 tmpreg = gen_reg_rtx (Pmode);
11976 emit_move_insn (tmpreg, new_rtx);
11980 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11981 tmpreg, 1, OPTAB_DIRECT);
11984 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11986 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11988 /* This symbol may be referenced via a displacement from the PIC
11989 base address (@GOTOFF). */
11991 if (reload_in_progress)
11992 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11993 if (GET_CODE (addr) == CONST)
11994 addr = XEXP (addr, 0);
11995 if (GET_CODE (addr) == PLUS)
11997 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11999 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12002 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12003 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12004 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12008 emit_move_insn (reg, new_rtx);
12012 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12013 /* We can't use @GOTOFF for text labels on VxWorks;
12014 see gotoff_operand. */
12015 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12017 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12019 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12020 return legitimize_dllimport_symbol (addr, true);
12021 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12022 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12023 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12025 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12026 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12030 /* For x64 PE-COFF there is no GOT table. So we use address
12032 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12034 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12035 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12038 reg = gen_reg_rtx (Pmode);
12039 emit_move_insn (reg, new_rtx);
12042 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12044 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12045 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12046 new_rtx = gen_const_mem (Pmode, new_rtx);
12047 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12050 reg = gen_reg_rtx (Pmode);
12051 /* Use directly gen_movsi, otherwise the address is loaded
12052 into register for CSE. We don't want to CSE this addresses,
12053 instead we CSE addresses from the GOT table, so skip this. */
12054 emit_insn (gen_movsi (reg, new_rtx));
12059 /* This symbol must be referenced via a load from the
12060 Global Offset Table (@GOT). */
12062 if (reload_in_progress)
12063 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12064 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12065 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12067 new_rtx = force_reg (Pmode, new_rtx);
12068 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12069 new_rtx = gen_const_mem (Pmode, new_rtx);
12070 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12073 reg = gen_reg_rtx (Pmode);
12074 emit_move_insn (reg, new_rtx);
12080 if (CONST_INT_P (addr)
12081 && !x86_64_immediate_operand (addr, VOIDmode))
12085 emit_move_insn (reg, addr);
12089 new_rtx = force_reg (Pmode, addr);
12091 else if (GET_CODE (addr) == CONST)
12093 addr = XEXP (addr, 0);
12095 /* We must match stuff we generate before. Assume the only
12096 unspecs that can get here are ours. Not that we could do
12097 anything with them anyway.... */
12098 if (GET_CODE (addr) == UNSPEC
12099 || (GET_CODE (addr) == PLUS
12100 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12102 gcc_assert (GET_CODE (addr) == PLUS);
12104 if (GET_CODE (addr) == PLUS)
12106 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12108 /* Check first to see if this is a constant offset from a @GOTOFF
12109 symbol reference. */
12110 if (gotoff_operand (op0, Pmode)
12111 && CONST_INT_P (op1))
12115 if (reload_in_progress)
12116 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12117 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12119 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12120 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12121 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12125 emit_move_insn (reg, new_rtx);
12131 if (INTVAL (op1) < -16*1024*1024
12132 || INTVAL (op1) >= 16*1024*1024)
12134 if (!x86_64_immediate_operand (op1, Pmode))
12135 op1 = force_reg (Pmode, op1);
12136 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12142 base = legitimize_pic_address (XEXP (addr, 0), reg);
12143 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12144 base == reg ? NULL_RTX : reg);
12146 if (CONST_INT_P (new_rtx))
12147 new_rtx = plus_constant (base, INTVAL (new_rtx));
12150 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12152 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12153 new_rtx = XEXP (new_rtx, 1);
12155 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12163 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12166 get_thread_pointer (bool to_reg)
12168 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12170 if (GET_MODE (tp) != Pmode)
12171 tp = convert_to_mode (Pmode, tp, 1);
12174 tp = copy_addr_to_reg (tp);
12179 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12181 static GTY(()) rtx ix86_tls_symbol;
12184 ix86_tls_get_addr (void)
12186 if (!ix86_tls_symbol)
12189 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12190 ? "___tls_get_addr" : "__tls_get_addr");
12192 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12195 return ix86_tls_symbol;
12198 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12200 static GTY(()) rtx ix86_tls_module_base_symbol;
12203 ix86_tls_module_base (void)
12205 if (!ix86_tls_module_base_symbol)
12207 ix86_tls_module_base_symbol
12208 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12210 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12211 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12214 return ix86_tls_module_base_symbol;
12217 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12218 false if we expect this to be used for a memory address and true if
12219 we expect to load the address into a register. */
12222 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12224 rtx dest, base, off;
12225 rtx pic = NULL_RTX, tp = NULL_RTX;
12230 case TLS_MODEL_GLOBAL_DYNAMIC:
12231 dest = gen_reg_rtx (Pmode);
12236 pic = pic_offset_table_rtx;
12239 pic = gen_reg_rtx (Pmode);
12240 emit_insn (gen_set_got (pic));
12244 if (TARGET_GNU2_TLS)
12247 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12249 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12251 tp = get_thread_pointer (true);
12252 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12254 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12258 rtx caddr = ix86_tls_get_addr ();
12262 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12265 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12266 insns = get_insns ();
12269 RTL_CONST_CALL_P (insns) = 1;
12270 emit_libcall_block (insns, dest, rax, x);
12273 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12277 case TLS_MODEL_LOCAL_DYNAMIC:
12278 base = gen_reg_rtx (Pmode);
12283 pic = pic_offset_table_rtx;
12286 pic = gen_reg_rtx (Pmode);
12287 emit_insn (gen_set_got (pic));
12291 if (TARGET_GNU2_TLS)
12293 rtx tmp = ix86_tls_module_base ();
12296 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12298 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12300 tp = get_thread_pointer (true);
12301 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12302 gen_rtx_MINUS (Pmode, tmp, tp));
12306 rtx caddr = ix86_tls_get_addr ();
12310 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12313 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12314 insns = get_insns ();
12317 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
12318 share the LD_BASE result with other LD model accesses. */
12319 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12320 UNSPEC_TLS_LD_BASE);
12322 RTL_CONST_CALL_P (insns) = 1;
12323 emit_libcall_block (insns, base, rax, eqv);
12326 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12329 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12330 off = gen_rtx_CONST (Pmode, off);
12332 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12334 if (TARGET_GNU2_TLS)
12336 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12338 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12342 case TLS_MODEL_INITIAL_EXEC:
12345 if (TARGET_SUN_TLS)
12347 /* The Sun linker took the AMD64 TLS spec literally
12348 and can only handle %rax as destination of the
12349 initial executable code sequence. */
12351 dest = gen_reg_rtx (Pmode);
12352 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12357 type = UNSPEC_GOTNTPOFF;
12361 if (reload_in_progress)
12362 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12363 pic = pic_offset_table_rtx;
12364 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12366 else if (!TARGET_ANY_GNU_TLS)
12368 pic = gen_reg_rtx (Pmode);
12369 emit_insn (gen_set_got (pic));
12370 type = UNSPEC_GOTTPOFF;
12375 type = UNSPEC_INDNTPOFF;
12378 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12379 off = gen_rtx_CONST (Pmode, off);
12381 off = gen_rtx_PLUS (Pmode, pic, off);
12382 off = gen_const_mem (Pmode, off);
12383 set_mem_alias_set (off, ix86_GOT_alias_set ());
12385 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12387 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12388 off = force_reg (Pmode, off);
12389 return gen_rtx_PLUS (Pmode, base, off);
12393 base = get_thread_pointer (true);
12394 dest = gen_reg_rtx (Pmode);
12395 emit_insn (gen_subsi3 (dest, base, off));
12399 case TLS_MODEL_LOCAL_EXEC:
12400 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12401 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12402 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12403 off = gen_rtx_CONST (Pmode, off);
12405 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12407 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12408 return gen_rtx_PLUS (Pmode, base, off);
12412 base = get_thread_pointer (true);
12413 dest = gen_reg_rtx (Pmode);
12414 emit_insn (gen_subsi3 (dest, base, off));
12419 gcc_unreachable ();
12425 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12428 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12429 htab_t dllimport_map;
12432 get_dllimport_decl (tree decl)
12434 struct tree_map *h, in;
12437 const char *prefix;
12438 size_t namelen, prefixlen;
12443 if (!dllimport_map)
12444 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12446 in.hash = htab_hash_pointer (decl);
12447 in.base.from = decl;
12448 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12449 h = (struct tree_map *) *loc;
12453 *loc = h = ggc_alloc_tree_map ();
12455 h->base.from = decl;
12456 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12457 VAR_DECL, NULL, ptr_type_node);
12458 DECL_ARTIFICIAL (to) = 1;
12459 DECL_IGNORED_P (to) = 1;
12460 DECL_EXTERNAL (to) = 1;
12461 TREE_READONLY (to) = 1;
12463 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12464 name = targetm.strip_name_encoding (name);
12465 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12466 ? "*__imp_" : "*__imp__";
12467 namelen = strlen (name);
12468 prefixlen = strlen (prefix);
12469 imp_name = (char *) alloca (namelen + prefixlen + 1);
12470 memcpy (imp_name, prefix, prefixlen);
12471 memcpy (imp_name + prefixlen, name, namelen + 1);
12473 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12474 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12475 SET_SYMBOL_REF_DECL (rtl, to);
12476 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12478 rtl = gen_const_mem (Pmode, rtl);
12479 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12481 SET_DECL_RTL (to, rtl);
12482 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12487 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12488 true if we require the result be a register. */
12491 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12496 gcc_assert (SYMBOL_REF_DECL (symbol));
12497 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12499 x = DECL_RTL (imp_decl);
12501 x = force_reg (Pmode, x);
12505 /* Try machine-dependent ways of modifying an illegitimate address
12506 to be legitimate. If we find one, return the new, valid address.
12507 This macro is used in only one place: `memory_address' in explow.c.
12509 OLDX is the address as it was before break_out_memory_refs was called.
12510 In some cases it is useful to look at this to decide what needs to be done.
12512 It is always safe for this macro to do nothing. It exists to recognize
12513 opportunities to optimize the output.
12515 For the 80386, we handle X+REG by loading X into a register R and
12516 using R+REG. R will go in a general reg and indexing will be used.
12517 However, if REG is a broken-out memory address or multiplication,
12518 nothing needs to be done because REG can certainly go in a general reg.
12520 When -fpic is used, special handling is needed for symbolic references.
12521 See comments by legitimize_pic_address in i386.c for details. */
12524 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12525 enum machine_mode mode)
12530 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12532 return legitimize_tls_address (x, (enum tls_model) log, false);
12533 if (GET_CODE (x) == CONST
12534 && GET_CODE (XEXP (x, 0)) == PLUS
12535 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12536 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12538 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12539 (enum tls_model) log, false);
12540 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12543 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12545 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12546 return legitimize_dllimport_symbol (x, true);
12547 if (GET_CODE (x) == CONST
12548 && GET_CODE (XEXP (x, 0)) == PLUS
12549 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12550 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12552 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12553 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12557 if (flag_pic && SYMBOLIC_CONST (x))
12558 return legitimize_pic_address (x, 0);
12561 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12562 return machopic_indirect_data_reference (x, 0);
12565 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12566 if (GET_CODE (x) == ASHIFT
12567 && CONST_INT_P (XEXP (x, 1))
12568 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12571 log = INTVAL (XEXP (x, 1));
12572 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12573 GEN_INT (1 << log));
12576 if (GET_CODE (x) == PLUS)
12578 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12580 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12581 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12582 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12585 log = INTVAL (XEXP (XEXP (x, 0), 1));
12586 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12587 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12588 GEN_INT (1 << log));
12591 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12592 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12593 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12596 log = INTVAL (XEXP (XEXP (x, 1), 1));
12597 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12598 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12599 GEN_INT (1 << log));
12602 /* Put multiply first if it isn't already. */
12603 if (GET_CODE (XEXP (x, 1)) == MULT)
12605 rtx tmp = XEXP (x, 0);
12606 XEXP (x, 0) = XEXP (x, 1);
12611 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12612 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12613 created by virtual register instantiation, register elimination, and
12614 similar optimizations. */
12615 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12618 x = gen_rtx_PLUS (Pmode,
12619 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12620 XEXP (XEXP (x, 1), 0)),
12621 XEXP (XEXP (x, 1), 1));
12625 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12626 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12627 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12628 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12629 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12630 && CONSTANT_P (XEXP (x, 1)))
12633 rtx other = NULL_RTX;
12635 if (CONST_INT_P (XEXP (x, 1)))
12637 constant = XEXP (x, 1);
12638 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12640 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12642 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12643 other = XEXP (x, 1);
12651 x = gen_rtx_PLUS (Pmode,
12652 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12653 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12654 plus_constant (other, INTVAL (constant)));
12658 if (changed && ix86_legitimate_address_p (mode, x, false))
12661 if (GET_CODE (XEXP (x, 0)) == MULT)
12664 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12667 if (GET_CODE (XEXP (x, 1)) == MULT)
12670 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12674 && REG_P (XEXP (x, 1))
12675 && REG_P (XEXP (x, 0)))
12678 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12681 x = legitimize_pic_address (x, 0);
12684 if (changed && ix86_legitimate_address_p (mode, x, false))
12687 if (REG_P (XEXP (x, 0)))
12689 rtx temp = gen_reg_rtx (Pmode);
12690 rtx val = force_operand (XEXP (x, 1), temp);
12693 if (GET_MODE (val) != Pmode)
12694 val = convert_to_mode (Pmode, val, 1);
12695 emit_move_insn (temp, val);
12698 XEXP (x, 1) = temp;
12702 else if (REG_P (XEXP (x, 1)))
12704 rtx temp = gen_reg_rtx (Pmode);
12705 rtx val = force_operand (XEXP (x, 0), temp);
12708 if (GET_MODE (val) != Pmode)
12709 val = convert_to_mode (Pmode, val, 1);
12710 emit_move_insn (temp, val);
12713 XEXP (x, 0) = temp;
12721 /* Print an integer constant expression in assembler syntax. Addition
12722 and subtraction are the only arithmetic that may appear in these
12723 expressions. FILE is the stdio stream to write to, X is the rtx, and
12724 CODE is the operand print code from the output string. */
12727 output_pic_addr_const (FILE *file, rtx x, int code)
12731 switch (GET_CODE (x))
12734 gcc_assert (flag_pic);
12739 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12740 output_addr_const (file, x);
12743 const char *name = XSTR (x, 0);
12745 /* Mark the decl as referenced so that cgraph will
12746 output the function. */
12747 if (SYMBOL_REF_DECL (x))
12748 mark_decl_referenced (SYMBOL_REF_DECL (x));
12751 if (MACHOPIC_INDIRECT
12752 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12753 name = machopic_indirection_name (x, /*stub_p=*/true);
12755 assemble_name (file, name);
12757 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12758 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12759 fputs ("@PLT", file);
12766 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12767 assemble_name (asm_out_file, buf);
12771 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12775 /* This used to output parentheses around the expression,
12776 but that does not work on the 386 (either ATT or BSD assembler). */
12777 output_pic_addr_const (file, XEXP (x, 0), code);
12781 if (GET_MODE (x) == VOIDmode)
12783 /* We can use %d if the number is <32 bits and positive. */
12784 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12785 fprintf (file, "0x%lx%08lx",
12786 (unsigned long) CONST_DOUBLE_HIGH (x),
12787 (unsigned long) CONST_DOUBLE_LOW (x));
12789 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12792 /* We can't handle floating point constants;
12793 TARGET_PRINT_OPERAND must handle them. */
12794 output_operand_lossage ("floating constant misused");
12798 /* Some assemblers need integer constants to appear first. */
12799 if (CONST_INT_P (XEXP (x, 0)))
12801 output_pic_addr_const (file, XEXP (x, 0), code);
12803 output_pic_addr_const (file, XEXP (x, 1), code);
12807 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12808 output_pic_addr_const (file, XEXP (x, 1), code);
12810 output_pic_addr_const (file, XEXP (x, 0), code);
12816 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12817 output_pic_addr_const (file, XEXP (x, 0), code);
12819 output_pic_addr_const (file, XEXP (x, 1), code);
12821 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12825 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12827 bool f = i386_asm_output_addr_const_extra (file, x);
12832 gcc_assert (XVECLEN (x, 0) == 1);
12833 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12834 switch (XINT (x, 1))
12837 fputs ("@GOT", file);
12839 case UNSPEC_GOTOFF:
12840 fputs ("@GOTOFF", file);
12842 case UNSPEC_PLTOFF:
12843 fputs ("@PLTOFF", file);
12846 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12847 "(%rip)" : "[rip]", file);
12849 case UNSPEC_GOTPCREL:
12850 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12851 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12853 case UNSPEC_GOTTPOFF:
12854 /* FIXME: This might be @TPOFF in Sun ld too. */
12855 fputs ("@gottpoff", file);
12858 fputs ("@tpoff", file);
12860 case UNSPEC_NTPOFF:
12862 fputs ("@tpoff", file);
12864 fputs ("@ntpoff", file);
12866 case UNSPEC_DTPOFF:
12867 fputs ("@dtpoff", file);
12869 case UNSPEC_GOTNTPOFF:
12871 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12872 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12874 fputs ("@gotntpoff", file);
12876 case UNSPEC_INDNTPOFF:
12877 fputs ("@indntpoff", file);
12880 case UNSPEC_MACHOPIC_OFFSET:
12882 machopic_output_function_base_name (file);
12886 output_operand_lossage ("invalid UNSPEC as operand");
12892 output_operand_lossage ("invalid expression as operand");
12896 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12897 We need to emit DTP-relative relocations. */
12899 static void ATTRIBUTE_UNUSED
12900 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12902 fputs (ASM_LONG, file);
12903 output_addr_const (file, x);
12904 fputs ("@dtpoff", file);
12910 fputs (", 0", file);
12913 gcc_unreachable ();
12917 /* Return true if X is a representation of the PIC register. This copes
12918 with calls from ix86_find_base_term, where the register might have
12919 been replaced by a cselib value. */
12922 ix86_pic_register_p (rtx x)
12924 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12925 return (pic_offset_table_rtx
12926 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12928 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12931 /* Helper function for ix86_delegitimize_address.
12932 Attempt to delegitimize TLS local-exec accesses. */
12935 ix86_delegitimize_tls_address (rtx orig_x)
12937 rtx x = orig_x, unspec;
12938 struct ix86_address addr;
12940 if (!TARGET_TLS_DIRECT_SEG_REFS)
12944 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12946 if (ix86_decompose_address (x, &addr) == 0
12947 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
12948 || addr.disp == NULL_RTX
12949 || GET_CODE (addr.disp) != CONST)
12951 unspec = XEXP (addr.disp, 0);
12952 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12953 unspec = XEXP (unspec, 0);
12954 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12956 x = XVECEXP (unspec, 0, 0);
12957 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12958 if (unspec != XEXP (addr.disp, 0))
12959 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12962 rtx idx = addr.index;
12963 if (addr.scale != 1)
12964 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12965 x = gen_rtx_PLUS (Pmode, idx, x);
12968 x = gen_rtx_PLUS (Pmode, addr.base, x);
12969 if (MEM_P (orig_x))
12970 x = replace_equiv_address_nv (orig_x, x);
12974 /* In the name of slightly smaller debug output, and to cater to
12975 general assembler lossage, recognize PIC+GOTOFF and turn it back
12976 into a direct symbol reference.
12978 On Darwin, this is necessary to avoid a crash, because Darwin
12979 has a different PIC label for each routine but the DWARF debugging
12980 information is not associated with any particular routine, so it's
12981 necessary to remove references to the PIC label from RTL stored by
12982 the DWARF output code. */
12985 ix86_delegitimize_address (rtx x)
12987 rtx orig_x = delegitimize_mem_from_attrs (x);
12988 /* addend is NULL or some rtx if x is something+GOTOFF where
12989 something doesn't include the PIC register. */
12990 rtx addend = NULL_RTX;
12991 /* reg_addend is NULL or a multiple of some register. */
12992 rtx reg_addend = NULL_RTX;
12993 /* const_addend is NULL or a const_int. */
12994 rtx const_addend = NULL_RTX;
12995 /* This is the result, or NULL. */
12996 rtx result = NULL_RTX;
13005 if (GET_CODE (x) != CONST
13006 || GET_CODE (XEXP (x, 0)) != UNSPEC
13007 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13008 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13009 || !MEM_P (orig_x))
13010 return ix86_delegitimize_tls_address (orig_x);
13011 x = XVECEXP (XEXP (x, 0), 0, 0);
13012 if (GET_MODE (orig_x) != GET_MODE (x))
13014 x = simplify_gen_subreg (GET_MODE (orig_x), x,
13022 if (GET_CODE (x) != PLUS
13023 || GET_CODE (XEXP (x, 1)) != CONST)
13024 return ix86_delegitimize_tls_address (orig_x);
13026 if (ix86_pic_register_p (XEXP (x, 0)))
13027 /* %ebx + GOT/GOTOFF */
13029 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13031 /* %ebx + %reg * scale + GOT/GOTOFF */
13032 reg_addend = XEXP (x, 0);
13033 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13034 reg_addend = XEXP (reg_addend, 1);
13035 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13036 reg_addend = XEXP (reg_addend, 0);
13039 reg_addend = NULL_RTX;
13040 addend = XEXP (x, 0);
13044 addend = XEXP (x, 0);
13046 x = XEXP (XEXP (x, 1), 0);
13047 if (GET_CODE (x) == PLUS
13048 && CONST_INT_P (XEXP (x, 1)))
13050 const_addend = XEXP (x, 1);
13054 if (GET_CODE (x) == UNSPEC
13055 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13056 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13057 result = XVECEXP (x, 0, 0);
13059 if (TARGET_MACHO && darwin_local_data_pic (x)
13060 && !MEM_P (orig_x))
13061 result = XVECEXP (x, 0, 0);
13064 return ix86_delegitimize_tls_address (orig_x);
13067 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13069 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13072 /* If the rest of original X doesn't involve the PIC register, add
13073 addend and subtract pic_offset_table_rtx. This can happen e.g.
13075 leal (%ebx, %ecx, 4), %ecx
13077 movl foo@GOTOFF(%ecx), %edx
13078 in which case we return (%ecx - %ebx) + foo. */
13079 if (pic_offset_table_rtx)
13080 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13081 pic_offset_table_rtx),
13086 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13088 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13089 if (result == NULL_RTX)
13095 /* If X is a machine specific address (i.e. a symbol or label being
13096 referenced as a displacement from the GOT implemented using an
13097 UNSPEC), then return the base term. Otherwise return X. */
13100 ix86_find_base_term (rtx x)
13106 if (GET_CODE (x) != CONST)
13108 term = XEXP (x, 0);
13109 if (GET_CODE (term) == PLUS
13110 && (CONST_INT_P (XEXP (term, 1))
13111 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13112 term = XEXP (term, 0);
13113 if (GET_CODE (term) != UNSPEC
13114 || (XINT (term, 1) != UNSPEC_GOTPCREL
13115 && XINT (term, 1) != UNSPEC_PCREL))
13118 return XVECEXP (term, 0, 0);
13121 return ix86_delegitimize_address (x);
13125 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13126 int fp, FILE *file)
13128 const char *suffix;
13130 if (mode == CCFPmode || mode == CCFPUmode)
13132 code = ix86_fp_compare_code_to_integer (code);
13136 code = reverse_condition (code);
13187 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13191 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13192 Those same assemblers have the same but opposite lossage on cmov. */
13193 if (mode == CCmode)
13194 suffix = fp ? "nbe" : "a";
13195 else if (mode == CCCmode)
13198 gcc_unreachable ();
13214 gcc_unreachable ();
13218 gcc_assert (mode == CCmode || mode == CCCmode);
13235 gcc_unreachable ();
13239 /* ??? As above. */
13240 gcc_assert (mode == CCmode || mode == CCCmode);
13241 suffix = fp ? "nb" : "ae";
13244 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13248 /* ??? As above. */
13249 if (mode == CCmode)
13251 else if (mode == CCCmode)
13252 suffix = fp ? "nb" : "ae";
13254 gcc_unreachable ();
13257 suffix = fp ? "u" : "p";
13260 suffix = fp ? "nu" : "np";
13263 gcc_unreachable ();
13265 fputs (suffix, file);
13268 /* Print the name of register X to FILE based on its machine mode and number.
13269 If CODE is 'w', pretend the mode is HImode.
13270 If CODE is 'b', pretend the mode is QImode.
13271 If CODE is 'k', pretend the mode is SImode.
13272 If CODE is 'q', pretend the mode is DImode.
13273 If CODE is 'x', pretend the mode is V4SFmode.
13274 If CODE is 't', pretend the mode is V8SFmode.
13275 If CODE is 'h', pretend the reg is the 'high' byte register.
13276 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13277 If CODE is 'd', duplicate the operand for AVX instruction.
13281 print_reg (rtx x, int code, FILE *file)
13284 bool duplicated = code == 'd' && TARGET_AVX;
13286 gcc_assert (x == pc_rtx
13287 || (REGNO (x) != ARG_POINTER_REGNUM
13288 && REGNO (x) != FRAME_POINTER_REGNUM
13289 && REGNO (x) != FLAGS_REG
13290 && REGNO (x) != FPSR_REG
13291 && REGNO (x) != FPCR_REG));
13293 if (ASSEMBLER_DIALECT == ASM_ATT)
13298 gcc_assert (TARGET_64BIT);
13299 fputs ("rip", file);
13303 if (code == 'w' || MMX_REG_P (x))
13305 else if (code == 'b')
13307 else if (code == 'k')
13309 else if (code == 'q')
13311 else if (code == 'y')
13313 else if (code == 'h')
13315 else if (code == 'x')
13317 else if (code == 't')
13320 code = GET_MODE_SIZE (GET_MODE (x));
13322 /* Irritatingly, AMD extended registers use different naming convention
13323 from the normal registers. */
13324 if (REX_INT_REG_P (x))
13326 gcc_assert (TARGET_64BIT);
13330 error ("extended registers have no high halves");
13333 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13336 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13339 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13342 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13345 error ("unsupported operand size for extended register");
13355 if (STACK_TOP_P (x))
13364 if (! ANY_FP_REG_P (x))
13365 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13370 reg = hi_reg_name[REGNO (x)];
13373 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13375 reg = qi_reg_name[REGNO (x)];
13378 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13380 reg = qi_high_reg_name[REGNO (x)];
13385 gcc_assert (!duplicated);
13387 fputs (hi_reg_name[REGNO (x)] + 1, file);
13392 gcc_unreachable ();
13398 if (ASSEMBLER_DIALECT == ASM_ATT)
13399 fprintf (file, ", %%%s", reg);
13401 fprintf (file, ", %s", reg);
13405 /* Locate some local-dynamic symbol still in use by this function
13406 so that we can print its name in some tls_local_dynamic_base
13410 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13414 if (GET_CODE (x) == SYMBOL_REF
13415 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13417 cfun->machine->some_ld_name = XSTR (x, 0);
13424 static const char *
13425 get_some_local_dynamic_name (void)
13429 if (cfun->machine->some_ld_name)
13430 return cfun->machine->some_ld_name;
13432 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13433 if (NONDEBUG_INSN_P (insn)
13434 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13435 return cfun->machine->some_ld_name;
13440 /* Meaning of CODE:
13441 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13442 C -- print opcode suffix for set/cmov insn.
13443 c -- like C, but print reversed condition
13444 F,f -- likewise, but for floating-point.
13445 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13447 R -- print the prefix for register names.
13448 z -- print the opcode suffix for the size of the current operand.
13449 Z -- likewise, with special suffixes for x87 instructions.
13450 * -- print a star (in certain assembler syntax)
13451 A -- print an absolute memory reference.
13452 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13453 s -- print a shift double count, followed by the assemblers argument
13455 b -- print the QImode name of the register for the indicated operand.
13456 %b0 would print %al if operands[0] is reg 0.
13457 w -- likewise, print the HImode name of the register.
13458 k -- likewise, print the SImode name of the register.
13459 q -- likewise, print the DImode name of the register.
13460 x -- likewise, print the V4SFmode name of the register.
13461 t -- likewise, print the V8SFmode name of the register.
13462 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13463 y -- print "st(0)" instead of "st" as a register.
13464 d -- print duplicated register operand for AVX instruction.
13465 D -- print condition for SSE cmp instruction.
13466 P -- if PIC, print an @PLT suffix.
13467 p -- print raw symbol name.
13468 X -- don't print any sort of PIC '@' suffix for a symbol.
13469 & -- print some in-use local-dynamic symbol name.
13470 H -- print a memory address offset by 8; used for sse high-parts
13471 Y -- print condition for XOP pcom* instruction.
13472 + -- print a branch hint as 'cs' or 'ds' prefix
13473 ; -- print a semicolon (after prefixes due to bug in older gas).
13474 @ -- print a segment register of thread base pointer load
13478 ix86_print_operand (FILE *file, rtx x, int code)
13485 if (ASSEMBLER_DIALECT == ASM_ATT)
13491 const char *name = get_some_local_dynamic_name ();
13493 output_operand_lossage ("'%%&' used without any "
13494 "local dynamic TLS references");
13496 assemble_name (file, name);
13501 switch (ASSEMBLER_DIALECT)
13508 /* Intel syntax. For absolute addresses, registers should not
13509 be surrounded by braces. */
13513 ix86_print_operand (file, x, 0);
13520 gcc_unreachable ();
13523 ix86_print_operand (file, x, 0);
13528 if (ASSEMBLER_DIALECT == ASM_ATT)
13533 if (ASSEMBLER_DIALECT == ASM_ATT)
13538 if (ASSEMBLER_DIALECT == ASM_ATT)
13543 if (ASSEMBLER_DIALECT == ASM_ATT)
13548 if (ASSEMBLER_DIALECT == ASM_ATT)
13553 if (ASSEMBLER_DIALECT == ASM_ATT)
13558 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13560 /* Opcodes don't get size suffixes if using Intel opcodes. */
13561 if (ASSEMBLER_DIALECT == ASM_INTEL)
13564 switch (GET_MODE_SIZE (GET_MODE (x)))
13583 output_operand_lossage
13584 ("invalid operand size for operand code '%c'", code);
13589 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13591 (0, "non-integer operand used with operand code '%c'", code);
13595 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13596 if (ASSEMBLER_DIALECT == ASM_INTEL)
13599 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13601 switch (GET_MODE_SIZE (GET_MODE (x)))
13604 #ifdef HAVE_AS_IX86_FILDS
13614 #ifdef HAVE_AS_IX86_FILDQ
13617 fputs ("ll", file);
13625 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13627 /* 387 opcodes don't get size suffixes
13628 if the operands are registers. */
13629 if (STACK_REG_P (x))
13632 switch (GET_MODE_SIZE (GET_MODE (x)))
13653 output_operand_lossage
13654 ("invalid operand type used with operand code '%c'", code);
13658 output_operand_lossage
13659 ("invalid operand size for operand code '%c'", code);
13677 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13679 ix86_print_operand (file, x, 0);
13680 fputs (", ", file);
13685 /* Little bit of braindamage here. The SSE compare instructions
13686 does use completely different names for the comparisons that the
13687 fp conditional moves. */
13690 switch (GET_CODE (x))
13693 fputs ("eq", file);
13696 fputs ("eq_us", file);
13699 fputs ("lt", file);
13702 fputs ("nge", file);
13705 fputs ("le", file);
13708 fputs ("ngt", file);
13711 fputs ("unord", file);
13714 fputs ("neq", file);
13717 fputs ("neq_oq", file);
13720 fputs ("ge", file);
13723 fputs ("nlt", file);
13726 fputs ("gt", file);
13729 fputs ("nle", file);
13732 fputs ("ord", file);
13735 output_operand_lossage ("operand is not a condition code, "
13736 "invalid operand code 'D'");
13742 switch (GET_CODE (x))
13746 fputs ("eq", file);
13750 fputs ("lt", file);
13754 fputs ("le", file);
13757 fputs ("unord", file);
13761 fputs ("neq", file);
13765 fputs ("nlt", file);
13769 fputs ("nle", file);
13772 fputs ("ord", file);
13775 output_operand_lossage ("operand is not a condition code, "
13776 "invalid operand code 'D'");
13782 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13783 if (ASSEMBLER_DIALECT == ASM_ATT)
13785 switch (GET_MODE (x))
13787 case HImode: putc ('w', file); break;
13789 case SFmode: putc ('l', file); break;
13791 case DFmode: putc ('q', file); break;
13792 default: gcc_unreachable ();
13799 if (!COMPARISON_P (x))
13801 output_operand_lossage ("operand is neither a constant nor a "
13802 "condition code, invalid operand code "
13806 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13809 if (!COMPARISON_P (x))
13811 output_operand_lossage ("operand is neither a constant nor a "
13812 "condition code, invalid operand code "
13816 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13817 if (ASSEMBLER_DIALECT == ASM_ATT)
13820 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13823 /* Like above, but reverse condition */
13825 /* Check to see if argument to %c is really a constant
13826 and not a condition code which needs to be reversed. */
13827 if (!COMPARISON_P (x))
13829 output_operand_lossage ("operand is neither a constant nor a "
13830 "condition code, invalid operand "
13834 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13837 if (!COMPARISON_P (x))
13839 output_operand_lossage ("operand is neither a constant nor a "
13840 "condition code, invalid operand "
13844 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13845 if (ASSEMBLER_DIALECT == ASM_ATT)
13848 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13852 /* It doesn't actually matter what mode we use here, as we're
13853 only going to use this for printing. */
13854 x = adjust_address_nv (x, DImode, 8);
13862 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13865 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13868 int pred_val = INTVAL (XEXP (x, 0));
13870 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13871 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13873 int taken = pred_val > REG_BR_PROB_BASE / 2;
13874 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13876 /* Emit hints only in the case default branch prediction
13877 heuristics would fail. */
13878 if (taken != cputaken)
13880 /* We use 3e (DS) prefix for taken branches and
13881 2e (CS) prefix for not taken branches. */
13883 fputs ("ds ; ", file);
13885 fputs ("cs ; ", file);
13893 switch (GET_CODE (x))
13896 fputs ("neq", file);
13899 fputs ("eq", file);
13903 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13907 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13911 fputs ("le", file);
13915 fputs ("lt", file);
13918 fputs ("unord", file);
13921 fputs ("ord", file);
13924 fputs ("ueq", file);
13927 fputs ("nlt", file);
13930 fputs ("nle", file);
13933 fputs ("ule", file);
13936 fputs ("ult", file);
13939 fputs ("une", file);
13942 output_operand_lossage ("operand is not a condition code, "
13943 "invalid operand code 'Y'");
13949 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13955 if (ASSEMBLER_DIALECT == ASM_ATT)
13958 /* The kernel uses a different segment register for performance
13959 reasons; a system call would not have to trash the userspace
13960 segment register, which would be expensive. */
13961 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
13962 fputs ("fs", file);
13964 fputs ("gs", file);
13968 output_operand_lossage ("invalid operand code '%c'", code);
13973 print_reg (x, code, file);
13975 else if (MEM_P (x))
13977 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
13978 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
13979 && GET_MODE (x) != BLKmode)
13982 switch (GET_MODE_SIZE (GET_MODE (x)))
13984 case 1: size = "BYTE"; break;
13985 case 2: size = "WORD"; break;
13986 case 4: size = "DWORD"; break;
13987 case 8: size = "QWORD"; break;
13988 case 12: size = "TBYTE"; break;
13990 if (GET_MODE (x) == XFmode)
13995 case 32: size = "YMMWORD"; break;
13997 gcc_unreachable ();
14000 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14003 else if (code == 'w')
14005 else if (code == 'k')
14008 fputs (size, file);
14009 fputs (" PTR ", file);
14013 /* Avoid (%rip) for call operands. */
14014 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14015 && !CONST_INT_P (x))
14016 output_addr_const (file, x);
14017 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14018 output_operand_lossage ("invalid constraints for operand");
14020 output_address (x);
14023 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14028 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14029 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14031 if (ASSEMBLER_DIALECT == ASM_ATT)
14033 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14035 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14037 fprintf (file, "0x%08x", (unsigned int) l);
14040 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14045 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14046 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14048 if (ASSEMBLER_DIALECT == ASM_ATT)
14050 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14053 /* These float cases don't actually occur as immediate operands. */
14054 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14058 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14059 fputs (dstr, file);
14064 /* We have patterns that allow zero sets of memory, for instance.
14065 In 64-bit mode, we should probably support all 8-byte vectors,
14066 since we can in fact encode that into an immediate. */
14067 if (GET_CODE (x) == CONST_VECTOR)
14069 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14073 if (code != 'P' && code != 'p')
14075 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14077 if (ASSEMBLER_DIALECT == ASM_ATT)
14080 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14081 || GET_CODE (x) == LABEL_REF)
14083 if (ASSEMBLER_DIALECT == ASM_ATT)
14086 fputs ("OFFSET FLAT:", file);
14089 if (CONST_INT_P (x))
14090 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14091 else if (flag_pic || MACHOPIC_INDIRECT)
14092 output_pic_addr_const (file, x, code);
14094 output_addr_const (file, x);
14099 ix86_print_operand_punct_valid_p (unsigned char code)
14101 return (code == '@' || code == '*' || code == '+'
14102 || code == '&' || code == ';');
14105 /* Print a memory operand whose address is ADDR. */
14108 ix86_print_operand_address (FILE *file, rtx addr)
14110 struct ix86_address parts;
14111 rtx base, index, disp;
14113 int ok = ix86_decompose_address (addr, &parts);
14117 if (parts.base && GET_CODE (parts.base) == SUBREG)
14119 rtx tmp = SUBREG_REG (parts.base);
14120 parts.base = simplify_subreg (GET_MODE (parts.base),
14121 tmp, GET_MODE (tmp), 0);
14124 if (parts.index && GET_CODE (parts.index) == SUBREG)
14126 rtx tmp = SUBREG_REG (parts.index);
14127 parts.index = simplify_subreg (GET_MODE (parts.index),
14128 tmp, GET_MODE (tmp), 0);
14132 index = parts.index;
14134 scale = parts.scale;
14142 if (ASSEMBLER_DIALECT == ASM_ATT)
14144 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14147 gcc_unreachable ();
14150 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14151 if (TARGET_64BIT && !base && !index)
14155 if (GET_CODE (disp) == CONST
14156 && GET_CODE (XEXP (disp, 0)) == PLUS
14157 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14158 symbol = XEXP (XEXP (disp, 0), 0);
14160 if (GET_CODE (symbol) == LABEL_REF
14161 || (GET_CODE (symbol) == SYMBOL_REF
14162 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14165 if (!base && !index)
14167 /* Displacement only requires special attention. */
14169 if (CONST_INT_P (disp))
14171 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14172 fputs ("ds:", file);
14173 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14176 output_pic_addr_const (file, disp, 0);
14178 output_addr_const (file, disp);
14184 /* Print SImode registers for zero-extended addresses to force
14185 addr32 prefix. Otherwise print DImode registers to avoid it. */
14187 code = ((GET_CODE (addr) == ZERO_EXTEND
14188 || GET_CODE (addr) == AND)
14192 if (ASSEMBLER_DIALECT == ASM_ATT)
14197 output_pic_addr_const (file, disp, 0);
14198 else if (GET_CODE (disp) == LABEL_REF)
14199 output_asm_label (disp);
14201 output_addr_const (file, disp);
14206 print_reg (base, code, file);
14210 print_reg (index, code, file);
14212 fprintf (file, ",%d", scale);
14218 rtx offset = NULL_RTX;
14222 /* Pull out the offset of a symbol; print any symbol itself. */
14223 if (GET_CODE (disp) == CONST
14224 && GET_CODE (XEXP (disp, 0)) == PLUS
14225 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14227 offset = XEXP (XEXP (disp, 0), 1);
14228 disp = gen_rtx_CONST (VOIDmode,
14229 XEXP (XEXP (disp, 0), 0));
14233 output_pic_addr_const (file, disp, 0);
14234 else if (GET_CODE (disp) == LABEL_REF)
14235 output_asm_label (disp);
14236 else if (CONST_INT_P (disp))
14239 output_addr_const (file, disp);
14245 print_reg (base, code, file);
14248 if (INTVAL (offset) >= 0)
14250 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14254 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14261 print_reg (index, code, file);
14263 fprintf (file, "*%d", scale);
14270 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14273 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14277 if (GET_CODE (x) != UNSPEC)
14280 op = XVECEXP (x, 0, 0);
14281 switch (XINT (x, 1))
14283 case UNSPEC_GOTTPOFF:
14284 output_addr_const (file, op);
14285 /* FIXME: This might be @TPOFF in Sun ld. */
14286 fputs ("@gottpoff", file);
14289 output_addr_const (file, op);
14290 fputs ("@tpoff", file);
14292 case UNSPEC_NTPOFF:
14293 output_addr_const (file, op);
14295 fputs ("@tpoff", file);
14297 fputs ("@ntpoff", file);
14299 case UNSPEC_DTPOFF:
14300 output_addr_const (file, op);
14301 fputs ("@dtpoff", file);
14303 case UNSPEC_GOTNTPOFF:
14304 output_addr_const (file, op);
14306 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14307 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14309 fputs ("@gotntpoff", file);
14311 case UNSPEC_INDNTPOFF:
14312 output_addr_const (file, op);
14313 fputs ("@indntpoff", file);
14316 case UNSPEC_MACHOPIC_OFFSET:
14317 output_addr_const (file, op);
14319 machopic_output_function_base_name (file);
14323 case UNSPEC_STACK_CHECK:
14327 gcc_assert (flag_split_stack);
14329 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14330 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14332 gcc_unreachable ();
14335 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14346 /* Split one or more double-mode RTL references into pairs of half-mode
14347 references. The RTL can be REG, offsettable MEM, integer constant, or
14348 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14349 split and "num" is its length. lo_half and hi_half are output arrays
14350 that parallel "operands". */
14353 split_double_mode (enum machine_mode mode, rtx operands[],
14354 int num, rtx lo_half[], rtx hi_half[])
14356 enum machine_mode half_mode;
14362 half_mode = DImode;
14365 half_mode = SImode;
14368 gcc_unreachable ();
14371 byte = GET_MODE_SIZE (half_mode);
14375 rtx op = operands[num];
14377 /* simplify_subreg refuse to split volatile memory addresses,
14378 but we still have to handle it. */
14381 lo_half[num] = adjust_address (op, half_mode, 0);
14382 hi_half[num] = adjust_address (op, half_mode, byte);
14386 lo_half[num] = simplify_gen_subreg (half_mode, op,
14387 GET_MODE (op) == VOIDmode
14388 ? mode : GET_MODE (op), 0);
14389 hi_half[num] = simplify_gen_subreg (half_mode, op,
14390 GET_MODE (op) == VOIDmode
14391 ? mode : GET_MODE (op), byte);
14396 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14397 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14398 is the expression of the binary operation. The output may either be
14399 emitted here, or returned to the caller, like all output_* functions.
14401 There is no guarantee that the operands are the same mode, as they
14402 might be within FLOAT or FLOAT_EXTEND expressions. */
14404 #ifndef SYSV386_COMPAT
14405 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14406 wants to fix the assemblers because that causes incompatibility
14407 with gcc. No-one wants to fix gcc because that causes
14408 incompatibility with assemblers... You can use the option of
14409 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14410 #define SYSV386_COMPAT 1
14414 output_387_binary_op (rtx insn, rtx *operands)
14416 static char buf[40];
14419 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14421 #ifdef ENABLE_CHECKING
14422 /* Even if we do not want to check the inputs, this documents input
14423 constraints. Which helps in understanding the following code. */
14424 if (STACK_REG_P (operands[0])
14425 && ((REG_P (operands[1])
14426 && REGNO (operands[0]) == REGNO (operands[1])
14427 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14428 || (REG_P (operands[2])
14429 && REGNO (operands[0]) == REGNO (operands[2])
14430 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14431 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14434 gcc_assert (is_sse);
14437 switch (GET_CODE (operands[3]))
14440 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14441 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14449 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14450 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14458 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14459 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14467 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14468 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14476 gcc_unreachable ();
14483 strcpy (buf, ssep);
14484 if (GET_MODE (operands[0]) == SFmode)
14485 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14487 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14491 strcpy (buf, ssep + 1);
14492 if (GET_MODE (operands[0]) == SFmode)
14493 strcat (buf, "ss\t{%2, %0|%0, %2}");
14495 strcat (buf, "sd\t{%2, %0|%0, %2}");
14501 switch (GET_CODE (operands[3]))
14505 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14507 rtx temp = operands[2];
14508 operands[2] = operands[1];
14509 operands[1] = temp;
14512 /* know operands[0] == operands[1]. */
14514 if (MEM_P (operands[2]))
14520 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14522 if (STACK_TOP_P (operands[0]))
14523 /* How is it that we are storing to a dead operand[2]?
14524 Well, presumably operands[1] is dead too. We can't
14525 store the result to st(0) as st(0) gets popped on this
14526 instruction. Instead store to operands[2] (which I
14527 think has to be st(1)). st(1) will be popped later.
14528 gcc <= 2.8.1 didn't have this check and generated
14529 assembly code that the Unixware assembler rejected. */
14530 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14532 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14536 if (STACK_TOP_P (operands[0]))
14537 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14539 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14544 if (MEM_P (operands[1]))
14550 if (MEM_P (operands[2]))
14556 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14559 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14560 derived assemblers, confusingly reverse the direction of
14561 the operation for fsub{r} and fdiv{r} when the
14562 destination register is not st(0). The Intel assembler
14563 doesn't have this brain damage. Read !SYSV386_COMPAT to
14564 figure out what the hardware really does. */
14565 if (STACK_TOP_P (operands[0]))
14566 p = "{p\t%0, %2|rp\t%2, %0}";
14568 p = "{rp\t%2, %0|p\t%0, %2}";
14570 if (STACK_TOP_P (operands[0]))
14571 /* As above for fmul/fadd, we can't store to st(0). */
14572 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14574 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14579 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14582 if (STACK_TOP_P (operands[0]))
14583 p = "{rp\t%0, %1|p\t%1, %0}";
14585 p = "{p\t%1, %0|rp\t%0, %1}";
14587 if (STACK_TOP_P (operands[0]))
14588 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14590 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14595 if (STACK_TOP_P (operands[0]))
14597 if (STACK_TOP_P (operands[1]))
14598 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14600 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14603 else if (STACK_TOP_P (operands[1]))
14606 p = "{\t%1, %0|r\t%0, %1}";
14608 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14614 p = "{r\t%2, %0|\t%0, %2}";
14616 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14622 gcc_unreachable ();
14629 /* Return needed mode for entity in optimize_mode_switching pass. */
14632 ix86_mode_needed (int entity, rtx insn)
14634 enum attr_i387_cw mode;
14636 /* The mode UNINITIALIZED is used to store control word after a
14637 function call or ASM pattern. The mode ANY specify that function
14638 has no requirements on the control word and make no changes in the
14639 bits we are interested in. */
14642 || (NONJUMP_INSN_P (insn)
14643 && (asm_noperands (PATTERN (insn)) >= 0
14644 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14645 return I387_CW_UNINITIALIZED;
14647 if (recog_memoized (insn) < 0)
14648 return I387_CW_ANY;
14650 mode = get_attr_i387_cw (insn);
14655 if (mode == I387_CW_TRUNC)
14660 if (mode == I387_CW_FLOOR)
14665 if (mode == I387_CW_CEIL)
14670 if (mode == I387_CW_MASK_PM)
14675 gcc_unreachable ();
14678 return I387_CW_ANY;
14681 /* Output code to initialize control word copies used by trunc?f?i and
14682 rounding patterns. CURRENT_MODE is set to current control word,
14683 while NEW_MODE is set to new control word. */
14686 emit_i387_cw_initialization (int mode)
14688 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14691 enum ix86_stack_slot slot;
14693 rtx reg = gen_reg_rtx (HImode);
14695 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14696 emit_move_insn (reg, copy_rtx (stored_mode));
14698 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14699 || optimize_function_for_size_p (cfun))
14703 case I387_CW_TRUNC:
14704 /* round toward zero (truncate) */
14705 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14706 slot = SLOT_CW_TRUNC;
14709 case I387_CW_FLOOR:
14710 /* round down toward -oo */
14711 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14712 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14713 slot = SLOT_CW_FLOOR;
14717 /* round up toward +oo */
14718 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14719 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14720 slot = SLOT_CW_CEIL;
14723 case I387_CW_MASK_PM:
14724 /* mask precision exception for nearbyint() */
14725 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14726 slot = SLOT_CW_MASK_PM;
14730 gcc_unreachable ();
14737 case I387_CW_TRUNC:
14738 /* round toward zero (truncate) */
14739 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14740 slot = SLOT_CW_TRUNC;
14743 case I387_CW_FLOOR:
14744 /* round down toward -oo */
14745 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14746 slot = SLOT_CW_FLOOR;
14750 /* round up toward +oo */
14751 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14752 slot = SLOT_CW_CEIL;
14755 case I387_CW_MASK_PM:
14756 /* mask precision exception for nearbyint() */
14757 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14758 slot = SLOT_CW_MASK_PM;
14762 gcc_unreachable ();
14766 gcc_assert (slot < MAX_386_STACK_LOCALS);
14768 new_mode = assign_386_stack_local (HImode, slot);
14769 emit_move_insn (new_mode, reg);
14772 /* Output code for INSN to convert a float to a signed int. OPERANDS
14773 are the insn operands. The output may be [HSD]Imode and the input
14774 operand may be [SDX]Fmode. */
14777 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
14779 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14780 int dimode_p = GET_MODE (operands[0]) == DImode;
14781 int round_mode = get_attr_i387_cw (insn);
14783 /* Jump through a hoop or two for DImode, since the hardware has no
14784 non-popping instruction. We used to do this a different way, but
14785 that was somewhat fragile and broke with post-reload splitters. */
14786 if ((dimode_p || fisttp) && !stack_top_dies)
14787 output_asm_insn ("fld\t%y1", operands);
14789 gcc_assert (STACK_TOP_P (operands[1]));
14790 gcc_assert (MEM_P (operands[0]));
14791 gcc_assert (GET_MODE (operands[1]) != TFmode);
14794 output_asm_insn ("fisttp%Z0\t%0", operands);
14797 if (round_mode != I387_CW_ANY)
14798 output_asm_insn ("fldcw\t%3", operands);
14799 if (stack_top_dies || dimode_p)
14800 output_asm_insn ("fistp%Z0\t%0", operands);
14802 output_asm_insn ("fist%Z0\t%0", operands);
14803 if (round_mode != I387_CW_ANY)
14804 output_asm_insn ("fldcw\t%2", operands);
14810 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14811 have the values zero or one, indicates the ffreep insn's operand
14812 from the OPERANDS array. */
14814 static const char *
14815 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14817 if (TARGET_USE_FFREEP)
14818 #ifdef HAVE_AS_IX86_FFREEP
14819 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14822 static char retval[32];
14823 int regno = REGNO (operands[opno]);
14825 gcc_assert (FP_REGNO_P (regno));
14827 regno -= FIRST_STACK_REG;
14829 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14834 return opno ? "fstp\t%y1" : "fstp\t%y0";
14838 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14839 should be used. UNORDERED_P is true when fucom should be used. */
14842 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
14844 int stack_top_dies;
14845 rtx cmp_op0, cmp_op1;
14846 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14850 cmp_op0 = operands[0];
14851 cmp_op1 = operands[1];
14855 cmp_op0 = operands[1];
14856 cmp_op1 = operands[2];
14861 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
14862 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
14863 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
14864 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
14866 if (GET_MODE (operands[0]) == SFmode)
14868 return &ucomiss[TARGET_AVX ? 0 : 1];
14870 return &comiss[TARGET_AVX ? 0 : 1];
14873 return &ucomisd[TARGET_AVX ? 0 : 1];
14875 return &comisd[TARGET_AVX ? 0 : 1];
14878 gcc_assert (STACK_TOP_P (cmp_op0));
14880 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14882 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14884 if (stack_top_dies)
14886 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14887 return output_387_ffreep (operands, 1);
14890 return "ftst\n\tfnstsw\t%0";
14893 if (STACK_REG_P (cmp_op1)
14895 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14896 && REGNO (cmp_op1) != FIRST_STACK_REG)
14898 /* If both the top of the 387 stack dies, and the other operand
14899 is also a stack register that dies, then this must be a
14900 `fcompp' float compare */
14904 /* There is no double popping fcomi variant. Fortunately,
14905 eflags is immune from the fstp's cc clobbering. */
14907 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14909 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14910 return output_387_ffreep (operands, 0);
14915 return "fucompp\n\tfnstsw\t%0";
14917 return "fcompp\n\tfnstsw\t%0";
14922 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14924 static const char * const alt[16] =
14926 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14927 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14928 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14929 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14931 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14932 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14936 "fcomi\t{%y1, %0|%0, %y1}",
14937 "fcomip\t{%y1, %0|%0, %y1}",
14938 "fucomi\t{%y1, %0|%0, %y1}",
14939 "fucomip\t{%y1, %0|%0, %y1}",
14950 mask = eflags_p << 3;
14951 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14952 mask |= unordered_p << 1;
14953 mask |= stack_top_dies;
14955 gcc_assert (mask < 16);
14964 ix86_output_addr_vec_elt (FILE *file, int value)
14966 const char *directive = ASM_LONG;
14970 directive = ASM_QUAD;
14972 gcc_assert (!TARGET_64BIT);
14975 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14979 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14981 const char *directive = ASM_LONG;
14984 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14985 directive = ASM_QUAD;
14987 gcc_assert (!TARGET_64BIT);
14989 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14990 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14991 fprintf (file, "%s%s%d-%s%d\n",
14992 directive, LPREFIX, value, LPREFIX, rel);
14993 else if (HAVE_AS_GOTOFF_IN_DATA)
14994 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14996 else if (TARGET_MACHO)
14998 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14999 machopic_output_function_base_name (file);
15004 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15005 GOT_SYMBOL_NAME, LPREFIX, value);
15008 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15012 ix86_expand_clear (rtx dest)
15016 /* We play register width games, which are only valid after reload. */
15017 gcc_assert (reload_completed);
15019 /* Avoid HImode and its attendant prefix byte. */
15020 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15021 dest = gen_rtx_REG (SImode, REGNO (dest));
15022 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15024 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15025 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15027 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15028 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15034 /* X is an unchanging MEM. If it is a constant pool reference, return
15035 the constant pool rtx, else NULL. */
15038 maybe_get_pool_constant (rtx x)
15040 x = ix86_delegitimize_address (XEXP (x, 0));
15042 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15043 return get_pool_constant (x);
15049 ix86_expand_move (enum machine_mode mode, rtx operands[])
15052 enum tls_model model;
15057 if (GET_CODE (op1) == SYMBOL_REF)
15059 model = SYMBOL_REF_TLS_MODEL (op1);
15062 op1 = legitimize_tls_address (op1, model, true);
15063 op1 = force_operand (op1, op0);
15066 if (GET_MODE (op1) != mode)
15067 op1 = convert_to_mode (mode, op1, 1);
15069 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15070 && SYMBOL_REF_DLLIMPORT_P (op1))
15071 op1 = legitimize_dllimport_symbol (op1, false);
15073 else if (GET_CODE (op1) == CONST
15074 && GET_CODE (XEXP (op1, 0)) == PLUS
15075 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15077 rtx addend = XEXP (XEXP (op1, 0), 1);
15078 rtx symbol = XEXP (XEXP (op1, 0), 0);
15081 model = SYMBOL_REF_TLS_MODEL (symbol);
15083 tmp = legitimize_tls_address (symbol, model, true);
15084 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15085 && SYMBOL_REF_DLLIMPORT_P (symbol))
15086 tmp = legitimize_dllimport_symbol (symbol, true);
15090 tmp = force_operand (tmp, NULL);
15091 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15092 op0, 1, OPTAB_DIRECT);
15095 if (GET_MODE (tmp) != mode)
15096 op1 = convert_to_mode (mode, tmp, 1);
15100 if ((flag_pic || MACHOPIC_INDIRECT)
15101 && symbolic_operand (op1, mode))
15103 if (TARGET_MACHO && !TARGET_64BIT)
15106 /* dynamic-no-pic */
15107 if (MACHOPIC_INDIRECT)
15109 rtx temp = ((reload_in_progress
15110 || ((op0 && REG_P (op0))
15112 ? op0 : gen_reg_rtx (Pmode));
15113 op1 = machopic_indirect_data_reference (op1, temp);
15115 op1 = machopic_legitimize_pic_address (op1, mode,
15116 temp == op1 ? 0 : temp);
15118 if (op0 != op1 && GET_CODE (op0) != MEM)
15120 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15124 if (GET_CODE (op0) == MEM)
15125 op1 = force_reg (Pmode, op1);
15129 if (GET_CODE (temp) != REG)
15130 temp = gen_reg_rtx (Pmode);
15131 temp = legitimize_pic_address (op1, temp);
15136 /* dynamic-no-pic */
15142 op1 = force_reg (mode, op1);
15143 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
15145 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15146 op1 = legitimize_pic_address (op1, reg);
15149 if (GET_MODE (op1) != mode)
15150 op1 = convert_to_mode (mode, op1, 1);
15157 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15158 || !push_operand (op0, mode))
15160 op1 = force_reg (mode, op1);
15162 if (push_operand (op0, mode)
15163 && ! general_no_elim_operand (op1, mode))
15164 op1 = copy_to_mode_reg (mode, op1);
15166 /* Force large constants in 64bit compilation into register
15167 to get them CSEed. */
15168 if (can_create_pseudo_p ()
15169 && (mode == DImode) && TARGET_64BIT
15170 && immediate_operand (op1, mode)
15171 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15172 && !register_operand (op0, mode)
15174 op1 = copy_to_mode_reg (mode, op1);
15176 if (can_create_pseudo_p ()
15177 && FLOAT_MODE_P (mode)
15178 && GET_CODE (op1) == CONST_DOUBLE)
15180 /* If we are loading a floating point constant to a register,
15181 force the value to memory now, since we'll get better code
15182 out the back end. */
15184 op1 = validize_mem (force_const_mem (mode, op1));
15185 if (!register_operand (op0, mode))
15187 rtx temp = gen_reg_rtx (mode);
15188 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15189 emit_move_insn (op0, temp);
15195 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15199 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15201 rtx op0 = operands[0], op1 = operands[1];
15202 unsigned int align = GET_MODE_ALIGNMENT (mode);
15204 /* Force constants other than zero into memory. We do not know how
15205 the instructions used to build constants modify the upper 64 bits
15206 of the register, once we have that information we may be able
15207 to handle some of them more efficiently. */
15208 if (can_create_pseudo_p ()
15209 && register_operand (op0, mode)
15210 && (CONSTANT_P (op1)
15211 || (GET_CODE (op1) == SUBREG
15212 && CONSTANT_P (SUBREG_REG (op1))))
15213 && !standard_sse_constant_p (op1))
15214 op1 = validize_mem (force_const_mem (mode, op1));
15216 /* We need to check memory alignment for SSE mode since attribute
15217 can make operands unaligned. */
15218 if (can_create_pseudo_p ()
15219 && SSE_REG_MODE_P (mode)
15220 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15221 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15225 /* ix86_expand_vector_move_misalign() does not like constants ... */
15226 if (CONSTANT_P (op1)
15227 || (GET_CODE (op1) == SUBREG
15228 && CONSTANT_P (SUBREG_REG (op1))))
15229 op1 = validize_mem (force_const_mem (mode, op1));
15231 /* ... nor both arguments in memory. */
15232 if (!register_operand (op0, mode)
15233 && !register_operand (op1, mode))
15234 op1 = force_reg (mode, op1);
15236 tmp[0] = op0; tmp[1] = op1;
15237 ix86_expand_vector_move_misalign (mode, tmp);
15241 /* Make operand1 a register if it isn't already. */
15242 if (can_create_pseudo_p ()
15243 && !register_operand (op0, mode)
15244 && !register_operand (op1, mode))
15246 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15250 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15253 /* Split 32-byte AVX unaligned load and store if needed. */
15256 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15259 rtx (*extract) (rtx, rtx, rtx);
15260 rtx (*move_unaligned) (rtx, rtx);
15261 enum machine_mode mode;
15263 switch (GET_MODE (op0))
15266 gcc_unreachable ();
15268 extract = gen_avx_vextractf128v32qi;
15269 move_unaligned = gen_avx_movdqu256;
15273 extract = gen_avx_vextractf128v8sf;
15274 move_unaligned = gen_avx_movups256;
15278 extract = gen_avx_vextractf128v4df;
15279 move_unaligned = gen_avx_movupd256;
15284 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15286 rtx r = gen_reg_rtx (mode);
15287 m = adjust_address (op1, mode, 0);
15288 emit_move_insn (r, m);
15289 m = adjust_address (op1, mode, 16);
15290 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15291 emit_move_insn (op0, r);
15293 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15295 m = adjust_address (op0, mode, 0);
15296 emit_insn (extract (m, op1, const0_rtx));
15297 m = adjust_address (op0, mode, 16);
15298 emit_insn (extract (m, op1, const1_rtx));
15301 emit_insn (move_unaligned (op0, op1));
15304 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15305 straight to ix86_expand_vector_move. */
15306 /* Code generation for scalar reg-reg moves of single and double precision data:
15307 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15311 if (x86_sse_partial_reg_dependency == true)
15316 Code generation for scalar loads of double precision data:
15317 if (x86_sse_split_regs == true)
15318 movlpd mem, reg (gas syntax)
15322 Code generation for unaligned packed loads of single precision data
15323 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15324 if (x86_sse_unaligned_move_optimal)
15327 if (x86_sse_partial_reg_dependency == true)
15339 Code generation for unaligned packed loads of double precision data
15340 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15341 if (x86_sse_unaligned_move_optimal)
15344 if (x86_sse_split_regs == true)
15357 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15366 switch (GET_MODE_CLASS (mode))
15368 case MODE_VECTOR_INT:
15370 switch (GET_MODE_SIZE (mode))
15373 /* If we're optimizing for size, movups is the smallest. */
15374 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15376 op0 = gen_lowpart (V4SFmode, op0);
15377 op1 = gen_lowpart (V4SFmode, op1);
15378 emit_insn (gen_sse_movups (op0, op1));
15381 op0 = gen_lowpart (V16QImode, op0);
15382 op1 = gen_lowpart (V16QImode, op1);
15383 emit_insn (gen_sse2_movdqu (op0, op1));
15386 op0 = gen_lowpart (V32QImode, op0);
15387 op1 = gen_lowpart (V32QImode, op1);
15388 ix86_avx256_split_vector_move_misalign (op0, op1);
15391 gcc_unreachable ();
15394 case MODE_VECTOR_FLOAT:
15395 op0 = gen_lowpart (mode, op0);
15396 op1 = gen_lowpart (mode, op1);
15401 emit_insn (gen_sse_movups (op0, op1));
15404 ix86_avx256_split_vector_move_misalign (op0, op1);
15407 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15409 op0 = gen_lowpart (V4SFmode, op0);
15410 op1 = gen_lowpart (V4SFmode, op1);
15411 emit_insn (gen_sse_movups (op0, op1));
15414 emit_insn (gen_sse2_movupd (op0, op1));
15417 ix86_avx256_split_vector_move_misalign (op0, op1);
15420 gcc_unreachable ();
15425 gcc_unreachable ();
15433 /* If we're optimizing for size, movups is the smallest. */
15434 if (optimize_insn_for_size_p ()
15435 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15437 op0 = gen_lowpart (V4SFmode, op0);
15438 op1 = gen_lowpart (V4SFmode, op1);
15439 emit_insn (gen_sse_movups (op0, op1));
15443 /* ??? If we have typed data, then it would appear that using
15444 movdqu is the only way to get unaligned data loaded with
15446 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15448 op0 = gen_lowpart (V16QImode, op0);
15449 op1 = gen_lowpart (V16QImode, op1);
15450 emit_insn (gen_sse2_movdqu (op0, op1));
15454 if (TARGET_SSE2 && mode == V2DFmode)
15458 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15460 op0 = gen_lowpart (V2DFmode, op0);
15461 op1 = gen_lowpart (V2DFmode, op1);
15462 emit_insn (gen_sse2_movupd (op0, op1));
15466 /* When SSE registers are split into halves, we can avoid
15467 writing to the top half twice. */
15468 if (TARGET_SSE_SPLIT_REGS)
15470 emit_clobber (op0);
15475 /* ??? Not sure about the best option for the Intel chips.
15476 The following would seem to satisfy; the register is
15477 entirely cleared, breaking the dependency chain. We
15478 then store to the upper half, with a dependency depth
15479 of one. A rumor has it that Intel recommends two movsd
15480 followed by an unpacklpd, but this is unconfirmed. And
15481 given that the dependency depth of the unpacklpd would
15482 still be one, I'm not sure why this would be better. */
15483 zero = CONST0_RTX (V2DFmode);
15486 m = adjust_address (op1, DFmode, 0);
15487 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15488 m = adjust_address (op1, DFmode, 8);
15489 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15493 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15495 op0 = gen_lowpart (V4SFmode, op0);
15496 op1 = gen_lowpart (V4SFmode, op1);
15497 emit_insn (gen_sse_movups (op0, op1));
15501 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15502 emit_move_insn (op0, CONST0_RTX (mode));
15504 emit_clobber (op0);
15506 if (mode != V4SFmode)
15507 op0 = gen_lowpart (V4SFmode, op0);
15508 m = adjust_address (op1, V2SFmode, 0);
15509 emit_insn (gen_sse_loadlps (op0, op0, m));
15510 m = adjust_address (op1, V2SFmode, 8);
15511 emit_insn (gen_sse_loadhps (op0, op0, m));
15514 else if (MEM_P (op0))
15516 /* If we're optimizing for size, movups is the smallest. */
15517 if (optimize_insn_for_size_p ()
15518 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15520 op0 = gen_lowpart (V4SFmode, op0);
15521 op1 = gen_lowpart (V4SFmode, op1);
15522 emit_insn (gen_sse_movups (op0, op1));
15526 /* ??? Similar to above, only less clear because of quote
15527 typeless stores unquote. */
15528 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15529 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15531 op0 = gen_lowpart (V16QImode, op0);
15532 op1 = gen_lowpart (V16QImode, op1);
15533 emit_insn (gen_sse2_movdqu (op0, op1));
15537 if (TARGET_SSE2 && mode == V2DFmode)
15539 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15541 op0 = gen_lowpart (V2DFmode, op0);
15542 op1 = gen_lowpart (V2DFmode, op1);
15543 emit_insn (gen_sse2_movupd (op0, op1));
15547 m = adjust_address (op0, DFmode, 0);
15548 emit_insn (gen_sse2_storelpd (m, op1));
15549 m = adjust_address (op0, DFmode, 8);
15550 emit_insn (gen_sse2_storehpd (m, op1));
15555 if (mode != V4SFmode)
15556 op1 = gen_lowpart (V4SFmode, op1);
15558 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15560 op0 = gen_lowpart (V4SFmode, op0);
15561 emit_insn (gen_sse_movups (op0, op1));
15565 m = adjust_address (op0, V2SFmode, 0);
15566 emit_insn (gen_sse_storelps (m, op1));
15567 m = adjust_address (op0, V2SFmode, 8);
15568 emit_insn (gen_sse_storehps (m, op1));
15573 gcc_unreachable ();
15576 /* Expand a push in MODE. This is some mode for which we do not support
15577 proper push instructions, at least from the registers that we expect
15578 the value to live in. */
15581 ix86_expand_push (enum machine_mode mode, rtx x)
15585 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15586 GEN_INT (-GET_MODE_SIZE (mode)),
15587 stack_pointer_rtx, 1, OPTAB_DIRECT);
15588 if (tmp != stack_pointer_rtx)
15589 emit_move_insn (stack_pointer_rtx, tmp);
15591 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15593 /* When we push an operand onto stack, it has to be aligned at least
15594 at the function argument boundary. However since we don't have
15595 the argument type, we can't determine the actual argument
15597 emit_move_insn (tmp, x);
15600 /* Helper function of ix86_fixup_binary_operands to canonicalize
15601 operand order. Returns true if the operands should be swapped. */
15604 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15607 rtx dst = operands[0];
15608 rtx src1 = operands[1];
15609 rtx src2 = operands[2];
15611 /* If the operation is not commutative, we can't do anything. */
15612 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15615 /* Highest priority is that src1 should match dst. */
15616 if (rtx_equal_p (dst, src1))
15618 if (rtx_equal_p (dst, src2))
15621 /* Next highest priority is that immediate constants come second. */
15622 if (immediate_operand (src2, mode))
15624 if (immediate_operand (src1, mode))
15627 /* Lowest priority is that memory references should come second. */
15637 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15638 destination to use for the operation. If different from the true
15639 destination in operands[0], a copy operation will be required. */
15642 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15645 rtx dst = operands[0];
15646 rtx src1 = operands[1];
15647 rtx src2 = operands[2];
15649 /* Canonicalize operand order. */
15650 if (ix86_swap_binary_operands_p (code, mode, operands))
15654 /* It is invalid to swap operands of different modes. */
15655 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15662 /* Both source operands cannot be in memory. */
15663 if (MEM_P (src1) && MEM_P (src2))
15665 /* Optimization: Only read from memory once. */
15666 if (rtx_equal_p (src1, src2))
15668 src2 = force_reg (mode, src2);
15672 src2 = force_reg (mode, src2);
15675 /* If the destination is memory, and we do not have matching source
15676 operands, do things in registers. */
15677 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15678 dst = gen_reg_rtx (mode);
15680 /* Source 1 cannot be a constant. */
15681 if (CONSTANT_P (src1))
15682 src1 = force_reg (mode, src1);
15684 /* Source 1 cannot be a non-matching memory. */
15685 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15686 src1 = force_reg (mode, src1);
15688 operands[1] = src1;
15689 operands[2] = src2;
15693 /* Similarly, but assume that the destination has already been
15694 set up properly. */
15697 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15698 enum machine_mode mode, rtx operands[])
15700 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15701 gcc_assert (dst == operands[0]);
15704 /* Attempt to expand a binary operator. Make the expansion closer to the
15705 actual machine, then just general_operand, which will allow 3 separate
15706 memory references (one output, two input) in a single insn. */
15709 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15712 rtx src1, src2, dst, op, clob;
15714 dst = ix86_fixup_binary_operands (code, mode, operands);
15715 src1 = operands[1];
15716 src2 = operands[2];
15718 /* Emit the instruction. */
15720 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15721 if (reload_in_progress)
15723 /* Reload doesn't know about the flags register, and doesn't know that
15724 it doesn't want to clobber it. We can only do this with PLUS. */
15725 gcc_assert (code == PLUS);
15728 else if (reload_completed
15730 && !rtx_equal_p (dst, src1))
15732 /* This is going to be an LEA; avoid splitting it later. */
15737 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15738 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15741 /* Fix up the destination if needed. */
15742 if (dst != operands[0])
15743 emit_move_insn (operands[0], dst);
15746 /* Return TRUE or FALSE depending on whether the binary operator meets the
15747 appropriate constraints. */
15750 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15753 rtx dst = operands[0];
15754 rtx src1 = operands[1];
15755 rtx src2 = operands[2];
15757 /* Both source operands cannot be in memory. */
15758 if (MEM_P (src1) && MEM_P (src2))
15761 /* Canonicalize operand order for commutative operators. */
15762 if (ix86_swap_binary_operands_p (code, mode, operands))
15769 /* If the destination is memory, we must have a matching source operand. */
15770 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15773 /* Source 1 cannot be a constant. */
15774 if (CONSTANT_P (src1))
15777 /* Source 1 cannot be a non-matching memory. */
15778 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15780 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15781 return (code == AND
15784 || (TARGET_64BIT && mode == DImode))
15785 && CONST_INT_P (src2)
15786 && (INTVAL (src2) == 0xff
15787 || INTVAL (src2) == 0xffff));
15793 /* Attempt to expand a unary operator. Make the expansion closer to the
15794 actual machine, then just general_operand, which will allow 2 separate
15795 memory references (one output, one input) in a single insn. */
15798 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15801 int matching_memory;
15802 rtx src, dst, op, clob;
15807 /* If the destination is memory, and we do not have matching source
15808 operands, do things in registers. */
15809 matching_memory = 0;
15812 if (rtx_equal_p (dst, src))
15813 matching_memory = 1;
15815 dst = gen_reg_rtx (mode);
15818 /* When source operand is memory, destination must match. */
15819 if (MEM_P (src) && !matching_memory)
15820 src = force_reg (mode, src);
15822 /* Emit the instruction. */
15824 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15825 if (reload_in_progress || code == NOT)
15827 /* Reload doesn't know about the flags register, and doesn't know that
15828 it doesn't want to clobber it. */
15829 gcc_assert (code == NOT);
15834 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15835 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15838 /* Fix up the destination if needed. */
15839 if (dst != operands[0])
15840 emit_move_insn (operands[0], dst);
15843 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15844 divisor are within the range [0-255]. */
15847 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15850 rtx end_label, qimode_label;
15851 rtx insn, div, mod;
15852 rtx scratch, tmp0, tmp1, tmp2;
15853 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15854 rtx (*gen_zero_extend) (rtx, rtx);
15855 rtx (*gen_test_ccno_1) (rtx, rtx);
15860 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15861 gen_test_ccno_1 = gen_testsi_ccno_1;
15862 gen_zero_extend = gen_zero_extendqisi2;
15865 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15866 gen_test_ccno_1 = gen_testdi_ccno_1;
15867 gen_zero_extend = gen_zero_extendqidi2;
15870 gcc_unreachable ();
15873 end_label = gen_label_rtx ();
15874 qimode_label = gen_label_rtx ();
15876 scratch = gen_reg_rtx (mode);
15878 /* Use 8bit unsigned divimod if dividend and divisor are within
15879 the range [0-255]. */
15880 emit_move_insn (scratch, operands[2]);
15881 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15882 scratch, 1, OPTAB_DIRECT);
15883 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15884 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15885 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15886 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15887 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15889 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15890 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15891 JUMP_LABEL (insn) = qimode_label;
15893 /* Generate original signed/unsigned divimod. */
15894 div = gen_divmod4_1 (operands[0], operands[1],
15895 operands[2], operands[3]);
15898 /* Branch to the end. */
15899 emit_jump_insn (gen_jump (end_label));
15902 /* Generate 8bit unsigned divide. */
15903 emit_label (qimode_label);
15904 /* Don't use operands[0] for result of 8bit divide since not all
15905 registers support QImode ZERO_EXTRACT. */
15906 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15907 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15908 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15909 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15913 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15914 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15918 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15919 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15922 /* Extract remainder from AH. */
15923 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15924 if (REG_P (operands[1]))
15925 insn = emit_move_insn (operands[1], tmp1);
15928 /* Need a new scratch register since the old one has result
15930 scratch = gen_reg_rtx (mode);
15931 emit_move_insn (scratch, tmp1);
15932 insn = emit_move_insn (operands[1], scratch);
15934 set_unique_reg_note (insn, REG_EQUAL, mod);
15936 /* Zero extend quotient from AL. */
15937 tmp1 = gen_lowpart (QImode, tmp0);
15938 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15939 set_unique_reg_note (insn, REG_EQUAL, div);
15941 emit_label (end_label);
15944 #define LEA_SEARCH_THRESHOLD 12
15946 /* Search backward for non-agu definition of register number REGNO1
15947 or register number REGNO2 in INSN's basic block until
15948 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15949 2. Reach BB boundary, or
15950 3. Reach agu definition.
15951 Returns the distance between the non-agu definition point and INSN.
15952 If no definition point, returns -1. */
15955 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15958 basic_block bb = BLOCK_FOR_INSN (insn);
15961 enum attr_type insn_type;
15963 if (insn != BB_HEAD (bb))
15965 rtx prev = PREV_INSN (insn);
15966 while (prev && distance < LEA_SEARCH_THRESHOLD)
15968 if (NONDEBUG_INSN_P (prev))
15971 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15972 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15973 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15974 && (regno1 == DF_REF_REGNO (*def_rec)
15975 || regno2 == DF_REF_REGNO (*def_rec)))
15977 insn_type = get_attr_type (prev);
15978 if (insn_type != TYPE_LEA)
15982 if (prev == BB_HEAD (bb))
15984 prev = PREV_INSN (prev);
15988 if (distance < LEA_SEARCH_THRESHOLD)
15992 bool simple_loop = false;
15994 FOR_EACH_EDGE (e, ei, bb->preds)
15997 simple_loop = true;
16003 rtx prev = BB_END (bb);
16006 && distance < LEA_SEARCH_THRESHOLD)
16008 if (NONDEBUG_INSN_P (prev))
16011 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16012 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16013 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16014 && (regno1 == DF_REF_REGNO (*def_rec)
16015 || regno2 == DF_REF_REGNO (*def_rec)))
16017 insn_type = get_attr_type (prev);
16018 if (insn_type != TYPE_LEA)
16022 prev = PREV_INSN (prev);
16030 /* get_attr_type may modify recog data. We want to make sure
16031 that recog data is valid for instruction INSN, on which
16032 distance_non_agu_define is called. INSN is unchanged here. */
16033 extract_insn_cached (insn);
16037 /* Return the distance between INSN and the next insn that uses
16038 register number REGNO0 in memory address. Return -1 if no such
16039 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16042 distance_agu_use (unsigned int regno0, rtx insn)
16044 basic_block bb = BLOCK_FOR_INSN (insn);
16049 if (insn != BB_END (bb))
16051 rtx next = NEXT_INSN (insn);
16052 while (next && distance < LEA_SEARCH_THRESHOLD)
16054 if (NONDEBUG_INSN_P (next))
16058 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16059 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16060 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16061 && regno0 == DF_REF_REGNO (*use_rec))
16063 /* Return DISTANCE if OP0 is used in memory
16064 address in NEXT. */
16068 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16069 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16070 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16071 && regno0 == DF_REF_REGNO (*def_rec))
16073 /* Return -1 if OP0 is set in NEXT. */
16077 if (next == BB_END (bb))
16079 next = NEXT_INSN (next);
16083 if (distance < LEA_SEARCH_THRESHOLD)
16087 bool simple_loop = false;
16089 FOR_EACH_EDGE (e, ei, bb->succs)
16092 simple_loop = true;
16098 rtx next = BB_HEAD (bb);
16101 && distance < LEA_SEARCH_THRESHOLD)
16103 if (NONDEBUG_INSN_P (next))
16107 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16108 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16109 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16110 && regno0 == DF_REF_REGNO (*use_rec))
16112 /* Return DISTANCE if OP0 is used in memory
16113 address in NEXT. */
16117 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16118 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16119 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16120 && regno0 == DF_REF_REGNO (*def_rec))
16122 /* Return -1 if OP0 is set in NEXT. */
16127 next = NEXT_INSN (next);
16135 /* Define this macro to tune LEA priority vs ADD, it take effect when
16136 there is a dilemma of choicing LEA or ADD
16137 Negative value: ADD is more preferred than LEA
16139 Positive value: LEA is more preferred than ADD*/
16140 #define IX86_LEA_PRIORITY 2
16142 /* Return true if it is ok to optimize an ADD operation to LEA
16143 operation to avoid flag register consumation. For most processors,
16144 ADD is faster than LEA. For the processors like ATOM, if the
16145 destination register of LEA holds an actual address which will be
16146 used soon, LEA is better and otherwise ADD is better. */
16149 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16151 unsigned int regno0 = true_regnum (operands[0]);
16152 unsigned int regno1 = true_regnum (operands[1]);
16153 unsigned int regno2 = true_regnum (operands[2]);
16155 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16156 if (regno0 != regno1 && regno0 != regno2)
16159 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16163 int dist_define, dist_use;
16165 /* Return false if REGNO0 isn't used in memory address. */
16166 dist_use = distance_agu_use (regno0, insn);
16170 dist_define = distance_non_agu_define (regno1, regno2, insn);
16171 if (dist_define <= 0)
16174 /* If this insn has both backward non-agu dependence and forward
16175 agu dependence, the one with short distance take effect. */
16176 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
16183 /* Return true if destination reg of SET_BODY is shift count of
16187 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16193 /* Retrieve destination of SET_BODY. */
16194 switch (GET_CODE (set_body))
16197 set_dest = SET_DEST (set_body);
16198 if (!set_dest || !REG_P (set_dest))
16202 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16203 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16211 /* Retrieve shift count of USE_BODY. */
16212 switch (GET_CODE (use_body))
16215 shift_rtx = XEXP (use_body, 1);
16218 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16219 if (ix86_dep_by_shift_count_body (set_body,
16220 XVECEXP (use_body, 0, i)))
16228 && (GET_CODE (shift_rtx) == ASHIFT
16229 || GET_CODE (shift_rtx) == LSHIFTRT
16230 || GET_CODE (shift_rtx) == ASHIFTRT
16231 || GET_CODE (shift_rtx) == ROTATE
16232 || GET_CODE (shift_rtx) == ROTATERT))
16234 rtx shift_count = XEXP (shift_rtx, 1);
16236 /* Return true if shift count is dest of SET_BODY. */
16237 if (REG_P (shift_count)
16238 && true_regnum (set_dest) == true_regnum (shift_count))
16245 /* Return true if destination reg of SET_INSN is shift count of
16249 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16251 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16252 PATTERN (use_insn));
16255 /* Return TRUE or FALSE depending on whether the unary operator meets the
16256 appropriate constraints. */
16259 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16260 enum machine_mode mode ATTRIBUTE_UNUSED,
16261 rtx operands[2] ATTRIBUTE_UNUSED)
16263 /* If one of operands is memory, source and destination must match. */
16264 if ((MEM_P (operands[0])
16265 || MEM_P (operands[1]))
16266 && ! rtx_equal_p (operands[0], operands[1]))
16271 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16272 are ok, keeping in mind the possible movddup alternative. */
16275 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16277 if (MEM_P (operands[0]))
16278 return rtx_equal_p (operands[0], operands[1 + high]);
16279 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16280 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16284 /* Post-reload splitter for converting an SF or DFmode value in an
16285 SSE register into an unsigned SImode. */
16288 ix86_split_convert_uns_si_sse (rtx operands[])
16290 enum machine_mode vecmode;
16291 rtx value, large, zero_or_two31, input, two31, x;
16293 large = operands[1];
16294 zero_or_two31 = operands[2];
16295 input = operands[3];
16296 two31 = operands[4];
16297 vecmode = GET_MODE (large);
16298 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16300 /* Load up the value into the low element. We must ensure that the other
16301 elements are valid floats -- zero is the easiest such value. */
16304 if (vecmode == V4SFmode)
16305 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16307 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16311 input = gen_rtx_REG (vecmode, REGNO (input));
16312 emit_move_insn (value, CONST0_RTX (vecmode));
16313 if (vecmode == V4SFmode)
16314 emit_insn (gen_sse_movss (value, value, input));
16316 emit_insn (gen_sse2_movsd (value, value, input));
16319 emit_move_insn (large, two31);
16320 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16322 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16323 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16325 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16326 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16328 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16329 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16331 large = gen_rtx_REG (V4SImode, REGNO (large));
16332 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16334 x = gen_rtx_REG (V4SImode, REGNO (value));
16335 if (vecmode == V4SFmode)
16336 emit_insn (gen_sse2_cvttps2dq (x, value));
16338 emit_insn (gen_sse2_cvttpd2dq (x, value));
16341 emit_insn (gen_xorv4si3 (value, value, large));
16344 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16345 Expects the 64-bit DImode to be supplied in a pair of integral
16346 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16347 -mfpmath=sse, !optimize_size only. */
16350 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16352 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16353 rtx int_xmm, fp_xmm;
16354 rtx biases, exponents;
16357 int_xmm = gen_reg_rtx (V4SImode);
16358 if (TARGET_INTER_UNIT_MOVES)
16359 emit_insn (gen_movdi_to_sse (int_xmm, input));
16360 else if (TARGET_SSE_SPLIT_REGS)
16362 emit_clobber (int_xmm);
16363 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16367 x = gen_reg_rtx (V2DImode);
16368 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16369 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16372 x = gen_rtx_CONST_VECTOR (V4SImode,
16373 gen_rtvec (4, GEN_INT (0x43300000UL),
16374 GEN_INT (0x45300000UL),
16375 const0_rtx, const0_rtx));
16376 exponents = validize_mem (force_const_mem (V4SImode, x));
16378 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16379 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16381 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16382 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16383 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16384 (0x1.0p84 + double(fp_value_hi_xmm)).
16385 Note these exponents differ by 32. */
16387 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16389 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16390 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16391 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16392 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16393 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16394 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16395 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16396 biases = validize_mem (force_const_mem (V2DFmode, biases));
16397 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16399 /* Add the upper and lower DFmode values together. */
16401 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16404 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16405 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16406 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16409 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16412 /* Not used, but eases macroization of patterns. */
16414 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16415 rtx input ATTRIBUTE_UNUSED)
16417 gcc_unreachable ();
16420 /* Convert an unsigned SImode value into a DFmode. Only currently used
16421 for SSE, but applicable anywhere. */
16424 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16426 REAL_VALUE_TYPE TWO31r;
16429 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16430 NULL, 1, OPTAB_DIRECT);
16432 fp = gen_reg_rtx (DFmode);
16433 emit_insn (gen_floatsidf2 (fp, x));
16435 real_ldexp (&TWO31r, &dconst1, 31);
16436 x = const_double_from_real_value (TWO31r, DFmode);
16438 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16440 emit_move_insn (target, x);
16443 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16444 32-bit mode; otherwise we have a direct convert instruction. */
16447 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16449 REAL_VALUE_TYPE TWO32r;
16450 rtx fp_lo, fp_hi, x;
16452 fp_lo = gen_reg_rtx (DFmode);
16453 fp_hi = gen_reg_rtx (DFmode);
16455 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16457 real_ldexp (&TWO32r, &dconst1, 32);
16458 x = const_double_from_real_value (TWO32r, DFmode);
16459 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16461 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16463 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16466 emit_move_insn (target, x);
16469 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16470 For x86_32, -mfpmath=sse, !optimize_size only. */
16472 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16474 REAL_VALUE_TYPE ONE16r;
16475 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16477 real_ldexp (&ONE16r, &dconst1, 16);
16478 x = const_double_from_real_value (ONE16r, SFmode);
16479 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16480 NULL, 0, OPTAB_DIRECT);
16481 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16482 NULL, 0, OPTAB_DIRECT);
16483 fp_hi = gen_reg_rtx (SFmode);
16484 fp_lo = gen_reg_rtx (SFmode);
16485 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16486 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16487 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16489 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16491 if (!rtx_equal_p (target, fp_hi))
16492 emit_move_insn (target, fp_hi);
16495 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16496 then replicate the value for all elements of the vector
16500 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16507 v = gen_rtvec (4, value, value, value, value);
16508 return gen_rtx_CONST_VECTOR (V4SImode, v);
16512 v = gen_rtvec (2, value, value);
16513 return gen_rtx_CONST_VECTOR (V2DImode, v);
16517 v = gen_rtvec (8, value, value, value, value,
16518 value, value, value, value);
16520 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16521 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16522 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16523 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16524 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16528 v = gen_rtvec (4, value, value, value, value);
16530 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16531 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16532 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16536 v = gen_rtvec (4, value, value, value, value);
16538 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16539 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16540 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16544 v = gen_rtvec (2, value, value);
16546 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16547 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16550 gcc_unreachable ();
16554 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16555 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16556 for an SSE register. If VECT is true, then replicate the mask for
16557 all elements of the vector register. If INVERT is true, then create
16558 a mask excluding the sign bit. */
16561 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16563 enum machine_mode vec_mode, imode;
16564 HOST_WIDE_INT hi, lo;
16569 /* Find the sign bit, sign extended to 2*HWI. */
16576 mode = GET_MODE_INNER (mode);
16578 lo = 0x80000000, hi = lo < 0;
16585 mode = GET_MODE_INNER (mode);
16587 if (HOST_BITS_PER_WIDE_INT >= 64)
16588 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
16590 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16595 vec_mode = VOIDmode;
16596 if (HOST_BITS_PER_WIDE_INT >= 64)
16599 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
16606 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16610 lo = ~lo, hi = ~hi;
16616 mask = immed_double_const (lo, hi, imode);
16618 vec = gen_rtvec (2, v, mask);
16619 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
16620 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
16627 gcc_unreachable ();
16631 lo = ~lo, hi = ~hi;
16633 /* Force this value into the low part of a fp vector constant. */
16634 mask = immed_double_const (lo, hi, imode);
16635 mask = gen_lowpart (mode, mask);
16637 if (vec_mode == VOIDmode)
16638 return force_reg (mode, mask);
16640 v = ix86_build_const_vector (vec_mode, vect, mask);
16641 return force_reg (vec_mode, v);
16644 /* Generate code for floating point ABS or NEG. */
16647 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
16650 rtx mask, set, dst, src;
16651 bool use_sse = false;
16652 bool vector_mode = VECTOR_MODE_P (mode);
16653 enum machine_mode vmode = mode;
16657 else if (mode == TFmode)
16659 else if (TARGET_SSE_MATH)
16661 use_sse = SSE_FLOAT_MODE_P (mode);
16662 if (mode == SFmode)
16664 else if (mode == DFmode)
16668 /* NEG and ABS performed with SSE use bitwise mask operations.
16669 Create the appropriate mask now. */
16671 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
16678 set = gen_rtx_fmt_e (code, mode, src);
16679 set = gen_rtx_SET (VOIDmode, dst, set);
16686 use = gen_rtx_USE (VOIDmode, mask);
16688 par = gen_rtvec (2, set, use);
16691 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16692 par = gen_rtvec (3, set, use, clob);
16694 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16700 /* Expand a copysign operation. Special case operand 0 being a constant. */
16703 ix86_expand_copysign (rtx operands[])
16705 enum machine_mode mode, vmode;
16706 rtx dest, op0, op1, mask, nmask;
16708 dest = operands[0];
16712 mode = GET_MODE (dest);
16714 if (mode == SFmode)
16716 else if (mode == DFmode)
16721 if (GET_CODE (op0) == CONST_DOUBLE)
16723 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
16725 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
16726 op0 = simplify_unary_operation (ABS, mode, op0, mode);
16728 if (mode == SFmode || mode == DFmode)
16730 if (op0 == CONST0_RTX (mode))
16731 op0 = CONST0_RTX (vmode);
16734 rtx v = ix86_build_const_vector (vmode, false, op0);
16736 op0 = force_reg (vmode, v);
16739 else if (op0 != CONST0_RTX (mode))
16740 op0 = force_reg (mode, op0);
16742 mask = ix86_build_signbit_mask (vmode, 0, 0);
16744 if (mode == SFmode)
16745 copysign_insn = gen_copysignsf3_const;
16746 else if (mode == DFmode)
16747 copysign_insn = gen_copysigndf3_const;
16749 copysign_insn = gen_copysigntf3_const;
16751 emit_insn (copysign_insn (dest, op0, op1, mask));
16755 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
16757 nmask = ix86_build_signbit_mask (vmode, 0, 1);
16758 mask = ix86_build_signbit_mask (vmode, 0, 0);
16760 if (mode == SFmode)
16761 copysign_insn = gen_copysignsf3_var;
16762 else if (mode == DFmode)
16763 copysign_insn = gen_copysigndf3_var;
16765 copysign_insn = gen_copysigntf3_var;
16767 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
16771 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
16772 be a constant, and so has already been expanded into a vector constant. */
16775 ix86_split_copysign_const (rtx operands[])
16777 enum machine_mode mode, vmode;
16778 rtx dest, op0, mask, x;
16780 dest = operands[0];
16782 mask = operands[3];
16784 mode = GET_MODE (dest);
16785 vmode = GET_MODE (mask);
16787 dest = simplify_gen_subreg (vmode, dest, mode, 0);
16788 x = gen_rtx_AND (vmode, dest, mask);
16789 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16791 if (op0 != CONST0_RTX (vmode))
16793 x = gen_rtx_IOR (vmode, dest, op0);
16794 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16798 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
16799 so we have to do two masks. */
16802 ix86_split_copysign_var (rtx operands[])
16804 enum machine_mode mode, vmode;
16805 rtx dest, scratch, op0, op1, mask, nmask, x;
16807 dest = operands[0];
16808 scratch = operands[1];
16811 nmask = operands[4];
16812 mask = operands[5];
16814 mode = GET_MODE (dest);
16815 vmode = GET_MODE (mask);
16817 if (rtx_equal_p (op0, op1))
16819 /* Shouldn't happen often (it's useless, obviously), but when it does
16820 we'd generate incorrect code if we continue below. */
16821 emit_move_insn (dest, op0);
16825 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
16827 gcc_assert (REGNO (op1) == REGNO (scratch));
16829 x = gen_rtx_AND (vmode, scratch, mask);
16830 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16833 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16834 x = gen_rtx_NOT (vmode, dest);
16835 x = gen_rtx_AND (vmode, x, op0);
16836 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16840 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16842 x = gen_rtx_AND (vmode, scratch, mask);
16844 else /* alternative 2,4 */
16846 gcc_assert (REGNO (mask) == REGNO (scratch));
16847 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16848 x = gen_rtx_AND (vmode, scratch, op1);
16850 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16852 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16854 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16855 x = gen_rtx_AND (vmode, dest, nmask);
16857 else /* alternative 3,4 */
16859 gcc_assert (REGNO (nmask) == REGNO (dest));
16861 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16862 x = gen_rtx_AND (vmode, dest, op0);
16864 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16867 x = gen_rtx_IOR (vmode, dest, scratch);
16868 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16871 /* Return TRUE or FALSE depending on whether the first SET in INSN
16872 has source and destination with matching CC modes, and that the
16873 CC mode is at least as constrained as REQ_MODE. */
16876 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16879 enum machine_mode set_mode;
16881 set = PATTERN (insn);
16882 if (GET_CODE (set) == PARALLEL)
16883 set = XVECEXP (set, 0, 0);
16884 gcc_assert (GET_CODE (set) == SET);
16885 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16887 set_mode = GET_MODE (SET_DEST (set));
16891 if (req_mode != CCNOmode
16892 && (req_mode != CCmode
16893 || XEXP (SET_SRC (set), 1) != const0_rtx))
16897 if (req_mode == CCGCmode)
16901 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16905 if (req_mode == CCZmode)
16915 if (set_mode != req_mode)
16920 gcc_unreachable ();
16923 return GET_MODE (SET_SRC (set)) == set_mode;
16926 /* Generate insn patterns to do an integer compare of OPERANDS. */
16929 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
16931 enum machine_mode cmpmode;
16934 cmpmode = SELECT_CC_MODE (code, op0, op1);
16935 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
16937 /* This is very simple, but making the interface the same as in the
16938 FP case makes the rest of the code easier. */
16939 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
16940 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
16942 /* Return the test that should be put into the flags user, i.e.
16943 the bcc, scc, or cmov instruction. */
16944 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
16947 /* Figure out whether to use ordered or unordered fp comparisons.
16948 Return the appropriate mode to use. */
16951 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
16953 /* ??? In order to make all comparisons reversible, we do all comparisons
16954 non-trapping when compiling for IEEE. Once gcc is able to distinguish
16955 all forms trapping and nontrapping comparisons, we can make inequality
16956 comparisons trapping again, since it results in better code when using
16957 FCOM based compares. */
16958 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
16962 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16964 enum machine_mode mode = GET_MODE (op0);
16966 if (SCALAR_FLOAT_MODE_P (mode))
16968 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16969 return ix86_fp_compare_mode (code);
16974 /* Only zero flag is needed. */
16975 case EQ: /* ZF=0 */
16976 case NE: /* ZF!=0 */
16978 /* Codes needing carry flag. */
16979 case GEU: /* CF=0 */
16980 case LTU: /* CF=1 */
16981 /* Detect overflow checks. They need just the carry flag. */
16982 if (GET_CODE (op0) == PLUS
16983 && rtx_equal_p (op1, XEXP (op0, 0)))
16987 case GTU: /* CF=0 & ZF=0 */
16988 case LEU: /* CF=1 | ZF=1 */
16989 /* Detect overflow checks. They need just the carry flag. */
16990 if (GET_CODE (op0) == MINUS
16991 && rtx_equal_p (op1, XEXP (op0, 0)))
16995 /* Codes possibly doable only with sign flag when
16996 comparing against zero. */
16997 case GE: /* SF=OF or SF=0 */
16998 case LT: /* SF<>OF or SF=1 */
16999 if (op1 == const0_rtx)
17002 /* For other cases Carry flag is not required. */
17004 /* Codes doable only with sign flag when comparing
17005 against zero, but we miss jump instruction for it
17006 so we need to use relational tests against overflow
17007 that thus needs to be zero. */
17008 case GT: /* ZF=0 & SF=OF */
17009 case LE: /* ZF=1 | SF<>OF */
17010 if (op1 == const0_rtx)
17014 /* strcmp pattern do (use flags) and combine may ask us for proper
17019 gcc_unreachable ();
17023 /* Return the fixed registers used for condition codes. */
17026 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17033 /* If two condition code modes are compatible, return a condition code
17034 mode which is compatible with both. Otherwise, return
17037 static enum machine_mode
17038 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17043 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17046 if ((m1 == CCGCmode && m2 == CCGOCmode)
17047 || (m1 == CCGOCmode && m2 == CCGCmode))
17053 gcc_unreachable ();
17083 /* These are only compatible with themselves, which we already
17090 /* Return a comparison we can do and that it is equivalent to
17091 swap_condition (code) apart possibly from orderedness.
17092 But, never change orderedness if TARGET_IEEE_FP, returning
17093 UNKNOWN in that case if necessary. */
17095 static enum rtx_code
17096 ix86_fp_swap_condition (enum rtx_code code)
17100 case GT: /* GTU - CF=0 & ZF=0 */
17101 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17102 case GE: /* GEU - CF=0 */
17103 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17104 case UNLT: /* LTU - CF=1 */
17105 return TARGET_IEEE_FP ? UNKNOWN : GT;
17106 case UNLE: /* LEU - CF=1 | ZF=1 */
17107 return TARGET_IEEE_FP ? UNKNOWN : GE;
17109 return swap_condition (code);
17113 /* Return cost of comparison CODE using the best strategy for performance.
17114 All following functions do use number of instructions as a cost metrics.
17115 In future this should be tweaked to compute bytes for optimize_size and
17116 take into account performance of various instructions on various CPUs. */
17119 ix86_fp_comparison_cost (enum rtx_code code)
17123 /* The cost of code using bit-twiddling on %ah. */
17140 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17144 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17147 gcc_unreachable ();
17150 switch (ix86_fp_comparison_strategy (code))
17152 case IX86_FPCMP_COMI:
17153 return arith_cost > 4 ? 3 : 2;
17154 case IX86_FPCMP_SAHF:
17155 return arith_cost > 4 ? 4 : 3;
17161 /* Return strategy to use for floating-point. We assume that fcomi is always
17162 preferrable where available, since that is also true when looking at size
17163 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17165 enum ix86_fpcmp_strategy
17166 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17168 /* Do fcomi/sahf based test when profitable. */
17171 return IX86_FPCMP_COMI;
17173 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17174 return IX86_FPCMP_SAHF;
17176 return IX86_FPCMP_ARITH;
17179 /* Swap, force into registers, or otherwise massage the two operands
17180 to a fp comparison. The operands are updated in place; the new
17181 comparison code is returned. */
17183 static enum rtx_code
17184 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17186 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17187 rtx op0 = *pop0, op1 = *pop1;
17188 enum machine_mode op_mode = GET_MODE (op0);
17189 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17191 /* All of the unordered compare instructions only work on registers.
17192 The same is true of the fcomi compare instructions. The XFmode
17193 compare instructions require registers except when comparing
17194 against zero or when converting operand 1 from fixed point to
17198 && (fpcmp_mode == CCFPUmode
17199 || (op_mode == XFmode
17200 && ! (standard_80387_constant_p (op0) == 1
17201 || standard_80387_constant_p (op1) == 1)
17202 && GET_CODE (op1) != FLOAT)
17203 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17205 op0 = force_reg (op_mode, op0);
17206 op1 = force_reg (op_mode, op1);
17210 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17211 things around if they appear profitable, otherwise force op0
17212 into a register. */
17214 if (standard_80387_constant_p (op0) == 0
17216 && ! (standard_80387_constant_p (op1) == 0
17219 enum rtx_code new_code = ix86_fp_swap_condition (code);
17220 if (new_code != UNKNOWN)
17223 tmp = op0, op0 = op1, op1 = tmp;
17229 op0 = force_reg (op_mode, op0);
17231 if (CONSTANT_P (op1))
17233 int tmp = standard_80387_constant_p (op1);
17235 op1 = validize_mem (force_const_mem (op_mode, op1));
17239 op1 = force_reg (op_mode, op1);
17242 op1 = force_reg (op_mode, op1);
17246 /* Try to rearrange the comparison to make it cheaper. */
17247 if (ix86_fp_comparison_cost (code)
17248 > ix86_fp_comparison_cost (swap_condition (code))
17249 && (REG_P (op1) || can_create_pseudo_p ()))
17252 tmp = op0, op0 = op1, op1 = tmp;
17253 code = swap_condition (code);
17255 op0 = force_reg (op_mode, op0);
17263 /* Convert comparison codes we use to represent FP comparison to integer
17264 code that will result in proper branch. Return UNKNOWN if no such code
17268 ix86_fp_compare_code_to_integer (enum rtx_code code)
17297 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17300 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17302 enum machine_mode fpcmp_mode, intcmp_mode;
17305 fpcmp_mode = ix86_fp_compare_mode (code);
17306 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17308 /* Do fcomi/sahf based test when profitable. */
17309 switch (ix86_fp_comparison_strategy (code))
17311 case IX86_FPCMP_COMI:
17312 intcmp_mode = fpcmp_mode;
17313 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17314 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17319 case IX86_FPCMP_SAHF:
17320 intcmp_mode = fpcmp_mode;
17321 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17322 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17326 scratch = gen_reg_rtx (HImode);
17327 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17328 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17331 case IX86_FPCMP_ARITH:
17332 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17333 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17334 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17336 scratch = gen_reg_rtx (HImode);
17337 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17339 /* In the unordered case, we have to check C2 for NaN's, which
17340 doesn't happen to work out to anything nice combination-wise.
17341 So do some bit twiddling on the value we've got in AH to come
17342 up with an appropriate set of condition codes. */
17344 intcmp_mode = CCNOmode;
17349 if (code == GT || !TARGET_IEEE_FP)
17351 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17356 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17357 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17358 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17359 intcmp_mode = CCmode;
17365 if (code == LT && TARGET_IEEE_FP)
17367 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17368 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17369 intcmp_mode = CCmode;
17374 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17380 if (code == GE || !TARGET_IEEE_FP)
17382 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17387 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17388 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17394 if (code == LE && TARGET_IEEE_FP)
17396 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17397 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17398 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17399 intcmp_mode = CCmode;
17404 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17410 if (code == EQ && TARGET_IEEE_FP)
17412 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17413 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17414 intcmp_mode = CCmode;
17419 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17425 if (code == NE && TARGET_IEEE_FP)
17427 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17428 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17434 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17440 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17444 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17449 gcc_unreachable ();
17457 /* Return the test that should be put into the flags user, i.e.
17458 the bcc, scc, or cmov instruction. */
17459 return gen_rtx_fmt_ee (code, VOIDmode,
17460 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17465 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17469 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17470 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17472 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17474 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17475 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17478 ret = ix86_expand_int_compare (code, op0, op1);
17484 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17486 enum machine_mode mode = GET_MODE (op0);
17498 tmp = ix86_expand_compare (code, op0, op1);
17499 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17500 gen_rtx_LABEL_REF (VOIDmode, label),
17502 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17509 /* Expand DImode branch into multiple compare+branch. */
17511 rtx lo[2], hi[2], label2;
17512 enum rtx_code code1, code2, code3;
17513 enum machine_mode submode;
17515 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17517 tmp = op0, op0 = op1, op1 = tmp;
17518 code = swap_condition (code);
17521 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17522 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17524 submode = mode == DImode ? SImode : DImode;
17526 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17527 avoid two branches. This costs one extra insn, so disable when
17528 optimizing for size. */
17530 if ((code == EQ || code == NE)
17531 && (!optimize_insn_for_size_p ()
17532 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17537 if (hi[1] != const0_rtx)
17538 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17539 NULL_RTX, 0, OPTAB_WIDEN);
17542 if (lo[1] != const0_rtx)
17543 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17544 NULL_RTX, 0, OPTAB_WIDEN);
17546 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17547 NULL_RTX, 0, OPTAB_WIDEN);
17549 ix86_expand_branch (code, tmp, const0_rtx, label);
17553 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17554 op1 is a constant and the low word is zero, then we can just
17555 examine the high word. Similarly for low word -1 and
17556 less-or-equal-than or greater-than. */
17558 if (CONST_INT_P (hi[1]))
17561 case LT: case LTU: case GE: case GEU:
17562 if (lo[1] == const0_rtx)
17564 ix86_expand_branch (code, hi[0], hi[1], label);
17568 case LE: case LEU: case GT: case GTU:
17569 if (lo[1] == constm1_rtx)
17571 ix86_expand_branch (code, hi[0], hi[1], label);
17579 /* Otherwise, we need two or three jumps. */
17581 label2 = gen_label_rtx ();
17584 code2 = swap_condition (code);
17585 code3 = unsigned_condition (code);
17589 case LT: case GT: case LTU: case GTU:
17592 case LE: code1 = LT; code2 = GT; break;
17593 case GE: code1 = GT; code2 = LT; break;
17594 case LEU: code1 = LTU; code2 = GTU; break;
17595 case GEU: code1 = GTU; code2 = LTU; break;
17597 case EQ: code1 = UNKNOWN; code2 = NE; break;
17598 case NE: code2 = UNKNOWN; break;
17601 gcc_unreachable ();
17606 * if (hi(a) < hi(b)) goto true;
17607 * if (hi(a) > hi(b)) goto false;
17608 * if (lo(a) < lo(b)) goto true;
17612 if (code1 != UNKNOWN)
17613 ix86_expand_branch (code1, hi[0], hi[1], label);
17614 if (code2 != UNKNOWN)
17615 ix86_expand_branch (code2, hi[0], hi[1], label2);
17617 ix86_expand_branch (code3, lo[0], lo[1], label);
17619 if (code2 != UNKNOWN)
17620 emit_label (label2);
17625 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
17630 /* Split branch based on floating point condition. */
17632 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
17633 rtx target1, rtx target2, rtx tmp, rtx pushed)
17638 if (target2 != pc_rtx)
17641 code = reverse_condition_maybe_unordered (code);
17646 condition = ix86_expand_fp_compare (code, op1, op2,
17649 /* Remove pushed operand from stack. */
17651 ix86_free_from_memory (GET_MODE (pushed));
17653 i = emit_jump_insn (gen_rtx_SET
17655 gen_rtx_IF_THEN_ELSE (VOIDmode,
17656 condition, target1, target2)));
17657 if (split_branch_probability >= 0)
17658 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
17662 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
17666 gcc_assert (GET_MODE (dest) == QImode);
17668 ret = ix86_expand_compare (code, op0, op1);
17669 PUT_MODE (ret, QImode);
17670 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
17673 /* Expand comparison setting or clearing carry flag. Return true when
17674 successful and set pop for the operation. */
17676 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
17678 enum machine_mode mode =
17679 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
17681 /* Do not handle double-mode compares that go through special path. */
17682 if (mode == (TARGET_64BIT ? TImode : DImode))
17685 if (SCALAR_FLOAT_MODE_P (mode))
17687 rtx compare_op, compare_seq;
17689 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17691 /* Shortcut: following common codes never translate
17692 into carry flag compares. */
17693 if (code == EQ || code == NE || code == UNEQ || code == LTGT
17694 || code == ORDERED || code == UNORDERED)
17697 /* These comparisons require zero flag; swap operands so they won't. */
17698 if ((code == GT || code == UNLE || code == LE || code == UNGT)
17699 && !TARGET_IEEE_FP)
17704 code = swap_condition (code);
17707 /* Try to expand the comparison and verify that we end up with
17708 carry flag based comparison. This fails to be true only when
17709 we decide to expand comparison using arithmetic that is not
17710 too common scenario. */
17712 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17713 compare_seq = get_insns ();
17716 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
17717 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
17718 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
17720 code = GET_CODE (compare_op);
17722 if (code != LTU && code != GEU)
17725 emit_insn (compare_seq);
17730 if (!INTEGRAL_MODE_P (mode))
17739 /* Convert a==0 into (unsigned)a<1. */
17742 if (op1 != const0_rtx)
17745 code = (code == EQ ? LTU : GEU);
17748 /* Convert a>b into b<a or a>=b-1. */
17751 if (CONST_INT_P (op1))
17753 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
17754 /* Bail out on overflow. We still can swap operands but that
17755 would force loading of the constant into register. */
17756 if (op1 == const0_rtx
17757 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
17759 code = (code == GTU ? GEU : LTU);
17766 code = (code == GTU ? LTU : GEU);
17770 /* Convert a>=0 into (unsigned)a<0x80000000. */
17773 if (mode == DImode || op1 != const0_rtx)
17775 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17776 code = (code == LT ? GEU : LTU);
17780 if (mode == DImode || op1 != constm1_rtx)
17782 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17783 code = (code == LE ? GEU : LTU);
17789 /* Swapping operands may cause constant to appear as first operand. */
17790 if (!nonimmediate_operand (op0, VOIDmode))
17792 if (!can_create_pseudo_p ())
17794 op0 = force_reg (mode, op0);
17796 *pop = ix86_expand_compare (code, op0, op1);
17797 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
17802 ix86_expand_int_movcc (rtx operands[])
17804 enum rtx_code code = GET_CODE (operands[1]), compare_code;
17805 rtx compare_seq, compare_op;
17806 enum machine_mode mode = GET_MODE (operands[0]);
17807 bool sign_bit_compare_p = false;
17808 rtx op0 = XEXP (operands[1], 0);
17809 rtx op1 = XEXP (operands[1], 1);
17812 compare_op = ix86_expand_compare (code, op0, op1);
17813 compare_seq = get_insns ();
17816 compare_code = GET_CODE (compare_op);
17818 if ((op1 == const0_rtx && (code == GE || code == LT))
17819 || (op1 == constm1_rtx && (code == GT || code == LE)))
17820 sign_bit_compare_p = true;
17822 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17823 HImode insns, we'd be swallowed in word prefix ops. */
17825 if ((mode != HImode || TARGET_FAST_PREFIX)
17826 && (mode != (TARGET_64BIT ? TImode : DImode))
17827 && CONST_INT_P (operands[2])
17828 && CONST_INT_P (operands[3]))
17830 rtx out = operands[0];
17831 HOST_WIDE_INT ct = INTVAL (operands[2]);
17832 HOST_WIDE_INT cf = INTVAL (operands[3]);
17833 HOST_WIDE_INT diff;
17836 /* Sign bit compares are better done using shifts than we do by using
17838 if (sign_bit_compare_p
17839 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17841 /* Detect overlap between destination and compare sources. */
17844 if (!sign_bit_compare_p)
17847 bool fpcmp = false;
17849 compare_code = GET_CODE (compare_op);
17851 flags = XEXP (compare_op, 0);
17853 if (GET_MODE (flags) == CCFPmode
17854 || GET_MODE (flags) == CCFPUmode)
17858 = ix86_fp_compare_code_to_integer (compare_code);
17861 /* To simplify rest of code, restrict to the GEU case. */
17862 if (compare_code == LTU)
17864 HOST_WIDE_INT tmp = ct;
17867 compare_code = reverse_condition (compare_code);
17868 code = reverse_condition (code);
17873 PUT_CODE (compare_op,
17874 reverse_condition_maybe_unordered
17875 (GET_CODE (compare_op)));
17877 PUT_CODE (compare_op,
17878 reverse_condition (GET_CODE (compare_op)));
17882 if (reg_overlap_mentioned_p (out, op0)
17883 || reg_overlap_mentioned_p (out, op1))
17884 tmp = gen_reg_rtx (mode);
17886 if (mode == DImode)
17887 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17889 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17890 flags, compare_op));
17894 if (code == GT || code == GE)
17895 code = reverse_condition (code);
17898 HOST_WIDE_INT tmp = ct;
17903 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17916 tmp = expand_simple_binop (mode, PLUS,
17918 copy_rtx (tmp), 1, OPTAB_DIRECT);
17929 tmp = expand_simple_binop (mode, IOR,
17931 copy_rtx (tmp), 1, OPTAB_DIRECT);
17933 else if (diff == -1 && ct)
17943 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17945 tmp = expand_simple_binop (mode, PLUS,
17946 copy_rtx (tmp), GEN_INT (cf),
17947 copy_rtx (tmp), 1, OPTAB_DIRECT);
17955 * andl cf - ct, dest
17965 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17968 tmp = expand_simple_binop (mode, AND,
17970 gen_int_mode (cf - ct, mode),
17971 copy_rtx (tmp), 1, OPTAB_DIRECT);
17973 tmp = expand_simple_binop (mode, PLUS,
17974 copy_rtx (tmp), GEN_INT (ct),
17975 copy_rtx (tmp), 1, OPTAB_DIRECT);
17978 if (!rtx_equal_p (tmp, out))
17979 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
17986 enum machine_mode cmp_mode = GET_MODE (op0);
17989 tmp = ct, ct = cf, cf = tmp;
17992 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17994 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17996 /* We may be reversing unordered compare to normal compare, that
17997 is not valid in general (we may convert non-trapping condition
17998 to trapping one), however on i386 we currently emit all
17999 comparisons unordered. */
18000 compare_code = reverse_condition_maybe_unordered (compare_code);
18001 code = reverse_condition_maybe_unordered (code);
18005 compare_code = reverse_condition (compare_code);
18006 code = reverse_condition (code);
18010 compare_code = UNKNOWN;
18011 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18012 && CONST_INT_P (op1))
18014 if (op1 == const0_rtx
18015 && (code == LT || code == GE))
18016 compare_code = code;
18017 else if (op1 == constm1_rtx)
18021 else if (code == GT)
18026 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18027 if (compare_code != UNKNOWN
18028 && GET_MODE (op0) == GET_MODE (out)
18029 && (cf == -1 || ct == -1))
18031 /* If lea code below could be used, only optimize
18032 if it results in a 2 insn sequence. */
18034 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18035 || diff == 3 || diff == 5 || diff == 9)
18036 || (compare_code == LT && ct == -1)
18037 || (compare_code == GE && cf == -1))
18040 * notl op1 (if necessary)
18048 code = reverse_condition (code);
18051 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18053 out = expand_simple_binop (mode, IOR,
18055 out, 1, OPTAB_DIRECT);
18056 if (out != operands[0])
18057 emit_move_insn (operands[0], out);
18064 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18065 || diff == 3 || diff == 5 || diff == 9)
18066 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18068 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18074 * lea cf(dest*(ct-cf)),dest
18078 * This also catches the degenerate setcc-only case.
18084 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18087 /* On x86_64 the lea instruction operates on Pmode, so we need
18088 to get arithmetics done in proper mode to match. */
18090 tmp = copy_rtx (out);
18094 out1 = copy_rtx (out);
18095 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18099 tmp = gen_rtx_PLUS (mode, tmp, out1);
18105 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18108 if (!rtx_equal_p (tmp, out))
18111 out = force_operand (tmp, copy_rtx (out));
18113 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18115 if (!rtx_equal_p (out, operands[0]))
18116 emit_move_insn (operands[0], copy_rtx (out));
18122 * General case: Jumpful:
18123 * xorl dest,dest cmpl op1, op2
18124 * cmpl op1, op2 movl ct, dest
18125 * setcc dest jcc 1f
18126 * decl dest movl cf, dest
18127 * andl (cf-ct),dest 1:
18130 * Size 20. Size 14.
18132 * This is reasonably steep, but branch mispredict costs are
18133 * high on modern cpus, so consider failing only if optimizing
18137 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18138 && BRANCH_COST (optimize_insn_for_speed_p (),
18143 enum machine_mode cmp_mode = GET_MODE (op0);
18148 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18150 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18152 /* We may be reversing unordered compare to normal compare,
18153 that is not valid in general (we may convert non-trapping
18154 condition to trapping one), however on i386 we currently
18155 emit all comparisons unordered. */
18156 code = reverse_condition_maybe_unordered (code);
18160 code = reverse_condition (code);
18161 if (compare_code != UNKNOWN)
18162 compare_code = reverse_condition (compare_code);
18166 if (compare_code != UNKNOWN)
18168 /* notl op1 (if needed)
18173 For x < 0 (resp. x <= -1) there will be no notl,
18174 so if possible swap the constants to get rid of the
18176 True/false will be -1/0 while code below (store flag
18177 followed by decrement) is 0/-1, so the constants need
18178 to be exchanged once more. */
18180 if (compare_code == GE || !cf)
18182 code = reverse_condition (code);
18187 HOST_WIDE_INT tmp = cf;
18192 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18196 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18198 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18200 copy_rtx (out), 1, OPTAB_DIRECT);
18203 out = expand_simple_binop (mode, AND, copy_rtx (out),
18204 gen_int_mode (cf - ct, mode),
18205 copy_rtx (out), 1, OPTAB_DIRECT);
18207 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18208 copy_rtx (out), 1, OPTAB_DIRECT);
18209 if (!rtx_equal_p (out, operands[0]))
18210 emit_move_insn (operands[0], copy_rtx (out));
18216 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18218 /* Try a few things more with specific constants and a variable. */
18221 rtx var, orig_out, out, tmp;
18223 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18226 /* If one of the two operands is an interesting constant, load a
18227 constant with the above and mask it in with a logical operation. */
18229 if (CONST_INT_P (operands[2]))
18232 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18233 operands[3] = constm1_rtx, op = and_optab;
18234 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18235 operands[3] = const0_rtx, op = ior_optab;
18239 else if (CONST_INT_P (operands[3]))
18242 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18243 operands[2] = constm1_rtx, op = and_optab;
18244 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18245 operands[2] = const0_rtx, op = ior_optab;
18252 orig_out = operands[0];
18253 tmp = gen_reg_rtx (mode);
18256 /* Recurse to get the constant loaded. */
18257 if (ix86_expand_int_movcc (operands) == 0)
18260 /* Mask in the interesting variable. */
18261 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18263 if (!rtx_equal_p (out, orig_out))
18264 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18270 * For comparison with above,
18280 if (! nonimmediate_operand (operands[2], mode))
18281 operands[2] = force_reg (mode, operands[2]);
18282 if (! nonimmediate_operand (operands[3], mode))
18283 operands[3] = force_reg (mode, operands[3]);
18285 if (! register_operand (operands[2], VOIDmode)
18287 || ! register_operand (operands[3], VOIDmode)))
18288 operands[2] = force_reg (mode, operands[2]);
18291 && ! register_operand (operands[3], VOIDmode))
18292 operands[3] = force_reg (mode, operands[3]);
18294 emit_insn (compare_seq);
18295 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18296 gen_rtx_IF_THEN_ELSE (mode,
18297 compare_op, operands[2],
18302 /* Swap, force into registers, or otherwise massage the two operands
18303 to an sse comparison with a mask result. Thus we differ a bit from
18304 ix86_prepare_fp_compare_args which expects to produce a flags result.
18306 The DEST operand exists to help determine whether to commute commutative
18307 operators. The POP0/POP1 operands are updated in place. The new
18308 comparison code is returned, or UNKNOWN if not implementable. */
18310 static enum rtx_code
18311 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18312 rtx *pop0, rtx *pop1)
18320 /* We have no LTGT as an operator. We could implement it with
18321 NE & ORDERED, but this requires an extra temporary. It's
18322 not clear that it's worth it. */
18329 /* These are supported directly. */
18336 /* For commutative operators, try to canonicalize the destination
18337 operand to be first in the comparison - this helps reload to
18338 avoid extra moves. */
18339 if (!dest || !rtx_equal_p (dest, *pop1))
18347 /* These are not supported directly. Swap the comparison operands
18348 to transform into something that is supported. */
18352 code = swap_condition (code);
18356 gcc_unreachable ();
18362 /* Detect conditional moves that exactly match min/max operational
18363 semantics. Note that this is IEEE safe, as long as we don't
18364 interchange the operands.
18366 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18367 and TRUE if the operation is successful and instructions are emitted. */
18370 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18371 rtx cmp_op1, rtx if_true, rtx if_false)
18373 enum machine_mode mode;
18379 else if (code == UNGE)
18382 if_true = if_false;
18388 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18390 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18395 mode = GET_MODE (dest);
18397 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18398 but MODE may be a vector mode and thus not appropriate. */
18399 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18401 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18404 if_true = force_reg (mode, if_true);
18405 v = gen_rtvec (2, if_true, if_false);
18406 tmp = gen_rtx_UNSPEC (mode, v, u);
18410 code = is_min ? SMIN : SMAX;
18411 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18414 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18418 /* Expand an sse vector comparison. Return the register with the result. */
18421 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18422 rtx op_true, rtx op_false)
18424 enum machine_mode mode = GET_MODE (dest);
18427 cmp_op0 = force_reg (mode, cmp_op0);
18428 if (!nonimmediate_operand (cmp_op1, mode))
18429 cmp_op1 = force_reg (mode, cmp_op1);
18432 || reg_overlap_mentioned_p (dest, op_true)
18433 || reg_overlap_mentioned_p (dest, op_false))
18434 dest = gen_reg_rtx (mode);
18436 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18437 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18442 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18443 operations. This is used for both scalar and vector conditional moves. */
18446 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18448 enum machine_mode mode = GET_MODE (dest);
18451 if (op_false == CONST0_RTX (mode))
18453 op_true = force_reg (mode, op_true);
18454 x = gen_rtx_AND (mode, cmp, op_true);
18455 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18457 else if (op_true == CONST0_RTX (mode))
18459 op_false = force_reg (mode, op_false);
18460 x = gen_rtx_NOT (mode, cmp);
18461 x = gen_rtx_AND (mode, x, op_false);
18462 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18464 else if (TARGET_XOP)
18466 rtx pcmov = gen_rtx_SET (mode, dest,
18467 gen_rtx_IF_THEN_ELSE (mode, cmp,
18474 op_true = force_reg (mode, op_true);
18475 op_false = force_reg (mode, op_false);
18477 t2 = gen_reg_rtx (mode);
18479 t3 = gen_reg_rtx (mode);
18483 x = gen_rtx_AND (mode, op_true, cmp);
18484 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18486 x = gen_rtx_NOT (mode, cmp);
18487 x = gen_rtx_AND (mode, x, op_false);
18488 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18490 x = gen_rtx_IOR (mode, t3, t2);
18491 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18495 /* Expand a floating-point conditional move. Return true if successful. */
18498 ix86_expand_fp_movcc (rtx operands[])
18500 enum machine_mode mode = GET_MODE (operands[0]);
18501 enum rtx_code code = GET_CODE (operands[1]);
18502 rtx tmp, compare_op;
18503 rtx op0 = XEXP (operands[1], 0);
18504 rtx op1 = XEXP (operands[1], 1);
18506 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18508 enum machine_mode cmode;
18510 /* Since we've no cmove for sse registers, don't force bad register
18511 allocation just to gain access to it. Deny movcc when the
18512 comparison mode doesn't match the move mode. */
18513 cmode = GET_MODE (op0);
18514 if (cmode == VOIDmode)
18515 cmode = GET_MODE (op1);
18519 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18520 if (code == UNKNOWN)
18523 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18524 operands[2], operands[3]))
18527 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18528 operands[2], operands[3]);
18529 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18533 /* The floating point conditional move instructions don't directly
18534 support conditions resulting from a signed integer comparison. */
18536 compare_op = ix86_expand_compare (code, op0, op1);
18537 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18539 tmp = gen_reg_rtx (QImode);
18540 ix86_expand_setcc (tmp, code, op0, op1);
18542 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18545 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18546 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18547 operands[2], operands[3])));
18552 /* Expand a floating-point vector conditional move; a vcond operation
18553 rather than a movcc operation. */
18556 ix86_expand_fp_vcond (rtx operands[])
18558 enum rtx_code code = GET_CODE (operands[3]);
18561 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18562 &operands[4], &operands[5]);
18563 if (code == UNKNOWN)
18566 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18567 operands[5], operands[1], operands[2]))
18570 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18571 operands[1], operands[2]);
18572 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18576 /* Expand a signed/unsigned integral vector conditional move. */
18579 ix86_expand_int_vcond (rtx operands[])
18581 enum machine_mode mode = GET_MODE (operands[0]);
18582 enum rtx_code code = GET_CODE (operands[3]);
18583 bool negate = false;
18586 cop0 = operands[4];
18587 cop1 = operands[5];
18589 /* XOP supports all of the comparisons on all vector int types. */
18592 /* Canonicalize the comparison to EQ, GT, GTU. */
18603 code = reverse_condition (code);
18609 code = reverse_condition (code);
18615 code = swap_condition (code);
18616 x = cop0, cop0 = cop1, cop1 = x;
18620 gcc_unreachable ();
18623 /* Only SSE4.1/SSE4.2 supports V2DImode. */
18624 if (mode == V2DImode)
18629 /* SSE4.1 supports EQ. */
18630 if (!TARGET_SSE4_1)
18636 /* SSE4.2 supports GT/GTU. */
18637 if (!TARGET_SSE4_2)
18642 gcc_unreachable ();
18646 /* Unsigned parallel compare is not supported by the hardware.
18647 Play some tricks to turn this into a signed comparison
18651 cop0 = force_reg (mode, cop0);
18659 rtx (*gen_sub3) (rtx, rtx, rtx);
18661 /* Subtract (-(INT MAX) - 1) from both operands to make
18663 mask = ix86_build_signbit_mask (mode, true, false);
18664 gen_sub3 = (mode == V4SImode
18665 ? gen_subv4si3 : gen_subv2di3);
18666 t1 = gen_reg_rtx (mode);
18667 emit_insn (gen_sub3 (t1, cop0, mask));
18669 t2 = gen_reg_rtx (mode);
18670 emit_insn (gen_sub3 (t2, cop1, mask));
18680 /* Perform a parallel unsigned saturating subtraction. */
18681 x = gen_reg_rtx (mode);
18682 emit_insn (gen_rtx_SET (VOIDmode, x,
18683 gen_rtx_US_MINUS (mode, cop0, cop1)));
18686 cop1 = CONST0_RTX (mode);
18692 gcc_unreachable ();
18697 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
18698 operands[1+negate], operands[2-negate]);
18700 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
18701 operands[2-negate]);
18705 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
18706 true if we should do zero extension, else sign extension. HIGH_P is
18707 true if we want the N/2 high elements, else the low elements. */
18710 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18712 enum machine_mode imode = GET_MODE (operands[1]);
18717 rtx (*unpack)(rtx, rtx);
18723 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
18725 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
18729 unpack = gen_sse4_1_zero_extendv4hiv4si2;
18731 unpack = gen_sse4_1_sign_extendv4hiv4si2;
18735 unpack = gen_sse4_1_zero_extendv2siv2di2;
18737 unpack = gen_sse4_1_sign_extendv2siv2di2;
18740 gcc_unreachable ();
18745 /* Shift higher 8 bytes to lower 8 bytes. */
18746 tmp = gen_reg_rtx (imode);
18747 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
18748 gen_lowpart (V1TImode, operands[1]),
18754 emit_insn (unpack (operands[0], tmp));
18758 rtx (*unpack)(rtx, rtx, rtx);
18764 unpack = gen_vec_interleave_highv16qi;
18766 unpack = gen_vec_interleave_lowv16qi;
18770 unpack = gen_vec_interleave_highv8hi;
18772 unpack = gen_vec_interleave_lowv8hi;
18776 unpack = gen_vec_interleave_highv4si;
18778 unpack = gen_vec_interleave_lowv4si;
18781 gcc_unreachable ();
18784 dest = gen_lowpart (imode, operands[0]);
18787 tmp = force_reg (imode, CONST0_RTX (imode));
18789 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
18790 operands[1], pc_rtx, pc_rtx);
18792 emit_insn (unpack (dest, operands[1], tmp));
18796 /* Expand conditional increment or decrement using adb/sbb instructions.
18797 The default case using setcc followed by the conditional move can be
18798 done by generic code. */
18800 ix86_expand_int_addcc (rtx operands[])
18802 enum rtx_code code = GET_CODE (operands[1]);
18804 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18806 rtx val = const0_rtx;
18807 bool fpcmp = false;
18808 enum machine_mode mode;
18809 rtx op0 = XEXP (operands[1], 0);
18810 rtx op1 = XEXP (operands[1], 1);
18812 if (operands[3] != const1_rtx
18813 && operands[3] != constm1_rtx)
18815 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18817 code = GET_CODE (compare_op);
18819 flags = XEXP (compare_op, 0);
18821 if (GET_MODE (flags) == CCFPmode
18822 || GET_MODE (flags) == CCFPUmode)
18825 code = ix86_fp_compare_code_to_integer (code);
18832 PUT_CODE (compare_op,
18833 reverse_condition_maybe_unordered
18834 (GET_CODE (compare_op)));
18836 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18839 mode = GET_MODE (operands[0]);
18841 /* Construct either adc or sbb insn. */
18842 if ((code == LTU) == (operands[3] == constm1_rtx))
18847 insn = gen_subqi3_carry;
18850 insn = gen_subhi3_carry;
18853 insn = gen_subsi3_carry;
18856 insn = gen_subdi3_carry;
18859 gcc_unreachable ();
18867 insn = gen_addqi3_carry;
18870 insn = gen_addhi3_carry;
18873 insn = gen_addsi3_carry;
18876 insn = gen_adddi3_carry;
18879 gcc_unreachable ();
18882 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18888 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18889 but works for floating pointer parameters and nonoffsetable memories.
18890 For pushes, it returns just stack offsets; the values will be saved
18891 in the right order. Maximally three parts are generated. */
18894 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18899 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18901 size = (GET_MODE_SIZE (mode) + 4) / 8;
18903 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18904 gcc_assert (size >= 2 && size <= 4);
18906 /* Optimize constant pool reference to immediates. This is used by fp
18907 moves, that force all constants to memory to allow combining. */
18908 if (MEM_P (operand) && MEM_READONLY_P (operand))
18910 rtx tmp = maybe_get_pool_constant (operand);
18915 if (MEM_P (operand) && !offsettable_memref_p (operand))
18917 /* The only non-offsetable memories we handle are pushes. */
18918 int ok = push_operand (operand, VOIDmode);
18922 operand = copy_rtx (operand);
18923 PUT_MODE (operand, Pmode);
18924 parts[0] = parts[1] = parts[2] = parts[3] = operand;
18928 if (GET_CODE (operand) == CONST_VECTOR)
18930 enum machine_mode imode = int_mode_for_mode (mode);
18931 /* Caution: if we looked through a constant pool memory above,
18932 the operand may actually have a different mode now. That's
18933 ok, since we want to pun this all the way back to an integer. */
18934 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
18935 gcc_assert (operand != NULL);
18941 if (mode == DImode)
18942 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18947 if (REG_P (operand))
18949 gcc_assert (reload_completed);
18950 for (i = 0; i < size; i++)
18951 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
18953 else if (offsettable_memref_p (operand))
18955 operand = adjust_address (operand, SImode, 0);
18956 parts[0] = operand;
18957 for (i = 1; i < size; i++)
18958 parts[i] = adjust_address (operand, SImode, 4 * i);
18960 else if (GET_CODE (operand) == CONST_DOUBLE)
18965 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18969 real_to_target (l, &r, mode);
18970 parts[3] = gen_int_mode (l[3], SImode);
18971 parts[2] = gen_int_mode (l[2], SImode);
18974 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
18975 parts[2] = gen_int_mode (l[2], SImode);
18978 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
18981 gcc_unreachable ();
18983 parts[1] = gen_int_mode (l[1], SImode);
18984 parts[0] = gen_int_mode (l[0], SImode);
18987 gcc_unreachable ();
18992 if (mode == TImode)
18993 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18994 if (mode == XFmode || mode == TFmode)
18996 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
18997 if (REG_P (operand))
18999 gcc_assert (reload_completed);
19000 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
19001 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
19003 else if (offsettable_memref_p (operand))
19005 operand = adjust_address (operand, DImode, 0);
19006 parts[0] = operand;
19007 parts[1] = adjust_address (operand, upper_mode, 8);
19009 else if (GET_CODE (operand) == CONST_DOUBLE)
19014 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19015 real_to_target (l, &r, mode);
19017 /* Do not use shift by 32 to avoid warning on 32bit systems. */
19018 if (HOST_BITS_PER_WIDE_INT >= 64)
19021 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
19022 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
19025 parts[0] = immed_double_const (l[0], l[1], DImode);
19027 if (upper_mode == SImode)
19028 parts[1] = gen_int_mode (l[2], SImode);
19029 else if (HOST_BITS_PER_WIDE_INT >= 64)
19032 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
19033 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
19036 parts[1] = immed_double_const (l[2], l[3], DImode);
19039 gcc_unreachable ();
19046 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
19047 Return false when normal moves are needed; true when all required
19048 insns have been emitted. Operands 2-4 contain the input values
19049 int the correct order; operands 5-7 contain the output values. */
19052 ix86_split_long_move (rtx operands[])
19057 int collisions = 0;
19058 enum machine_mode mode = GET_MODE (operands[0]);
19059 bool collisionparts[4];
19061 /* The DFmode expanders may ask us to move double.
19062 For 64bit target this is single move. By hiding the fact
19063 here we simplify i386.md splitters. */
19064 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
19066 /* Optimize constant pool reference to immediates. This is used by
19067 fp moves, that force all constants to memory to allow combining. */
19069 if (MEM_P (operands[1])
19070 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19071 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19072 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19073 if (push_operand (operands[0], VOIDmode))
19075 operands[0] = copy_rtx (operands[0]);
19076 PUT_MODE (operands[0], Pmode);
19079 operands[0] = gen_lowpart (DImode, operands[0]);
19080 operands[1] = gen_lowpart (DImode, operands[1]);
19081 emit_move_insn (operands[0], operands[1]);
19085 /* The only non-offsettable memory we handle is push. */
19086 if (push_operand (operands[0], VOIDmode))
19089 gcc_assert (!MEM_P (operands[0])
19090 || offsettable_memref_p (operands[0]));
19092 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19093 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19095 /* When emitting push, take care for source operands on the stack. */
19096 if (push && MEM_P (operands[1])
19097 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19099 rtx src_base = XEXP (part[1][nparts - 1], 0);
19101 /* Compensate for the stack decrement by 4. */
19102 if (!TARGET_64BIT && nparts == 3
19103 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19104 src_base = plus_constant (src_base, 4);
19106 /* src_base refers to the stack pointer and is
19107 automatically decreased by emitted push. */
19108 for (i = 0; i < nparts; i++)
19109 part[1][i] = change_address (part[1][i],
19110 GET_MODE (part[1][i]), src_base);
19113 /* We need to do copy in the right order in case an address register
19114 of the source overlaps the destination. */
19115 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19119 for (i = 0; i < nparts; i++)
19122 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19123 if (collisionparts[i])
19127 /* Collision in the middle part can be handled by reordering. */
19128 if (collisions == 1 && nparts == 3 && collisionparts [1])
19130 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19131 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19133 else if (collisions == 1
19135 && (collisionparts [1] || collisionparts [2]))
19137 if (collisionparts [1])
19139 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19140 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19144 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19145 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19149 /* If there are more collisions, we can't handle it by reordering.
19150 Do an lea to the last part and use only one colliding move. */
19151 else if (collisions > 1)
19157 base = part[0][nparts - 1];
19159 /* Handle the case when the last part isn't valid for lea.
19160 Happens in 64-bit mode storing the 12-byte XFmode. */
19161 if (GET_MODE (base) != Pmode)
19162 base = gen_rtx_REG (Pmode, REGNO (base));
19164 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19165 part[1][0] = replace_equiv_address (part[1][0], base);
19166 for (i = 1; i < nparts; i++)
19168 tmp = plus_constant (base, UNITS_PER_WORD * i);
19169 part[1][i] = replace_equiv_address (part[1][i], tmp);
19180 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19181 emit_insn (gen_addsi3 (stack_pointer_rtx,
19182 stack_pointer_rtx, GEN_INT (-4)));
19183 emit_move_insn (part[0][2], part[1][2]);
19185 else if (nparts == 4)
19187 emit_move_insn (part[0][3], part[1][3]);
19188 emit_move_insn (part[0][2], part[1][2]);
19193 /* In 64bit mode we don't have 32bit push available. In case this is
19194 register, it is OK - we will just use larger counterpart. We also
19195 retype memory - these comes from attempt to avoid REX prefix on
19196 moving of second half of TFmode value. */
19197 if (GET_MODE (part[1][1]) == SImode)
19199 switch (GET_CODE (part[1][1]))
19202 part[1][1] = adjust_address (part[1][1], DImode, 0);
19206 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19210 gcc_unreachable ();
19213 if (GET_MODE (part[1][0]) == SImode)
19214 part[1][0] = part[1][1];
19217 emit_move_insn (part[0][1], part[1][1]);
19218 emit_move_insn (part[0][0], part[1][0]);
19222 /* Choose correct order to not overwrite the source before it is copied. */
19223 if ((REG_P (part[0][0])
19224 && REG_P (part[1][1])
19225 && (REGNO (part[0][0]) == REGNO (part[1][1])
19227 && REGNO (part[0][0]) == REGNO (part[1][2]))
19229 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19231 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19233 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19235 operands[2 + i] = part[0][j];
19236 operands[6 + i] = part[1][j];
19241 for (i = 0; i < nparts; i++)
19243 operands[2 + i] = part[0][i];
19244 operands[6 + i] = part[1][i];
19248 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19249 if (optimize_insn_for_size_p ())
19251 for (j = 0; j < nparts - 1; j++)
19252 if (CONST_INT_P (operands[6 + j])
19253 && operands[6 + j] != const0_rtx
19254 && REG_P (operands[2 + j]))
19255 for (i = j; i < nparts - 1; i++)
19256 if (CONST_INT_P (operands[7 + i])
19257 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19258 operands[7 + i] = operands[2 + j];
19261 for (i = 0; i < nparts; i++)
19262 emit_move_insn (operands[2 + i], operands[6 + i]);
19267 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19268 left shift by a constant, either using a single shift or
19269 a sequence of add instructions. */
19272 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19274 rtx (*insn)(rtx, rtx, rtx);
19277 || (count * ix86_cost->add <= ix86_cost->shift_const
19278 && !optimize_insn_for_size_p ()))
19280 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19281 while (count-- > 0)
19282 emit_insn (insn (operand, operand, operand));
19286 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19287 emit_insn (insn (operand, operand, GEN_INT (count)));
19292 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19294 rtx (*gen_ashl3)(rtx, rtx, rtx);
19295 rtx (*gen_shld)(rtx, rtx, rtx);
19296 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19298 rtx low[2], high[2];
19301 if (CONST_INT_P (operands[2]))
19303 split_double_mode (mode, operands, 2, low, high);
19304 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19306 if (count >= half_width)
19308 emit_move_insn (high[0], low[1]);
19309 emit_move_insn (low[0], const0_rtx);
19311 if (count > half_width)
19312 ix86_expand_ashl_const (high[0], count - half_width, mode);
19316 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19318 if (!rtx_equal_p (operands[0], operands[1]))
19319 emit_move_insn (operands[0], operands[1]);
19321 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19322 ix86_expand_ashl_const (low[0], count, mode);
19327 split_double_mode (mode, operands, 1, low, high);
19329 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19331 if (operands[1] == const1_rtx)
19333 /* Assuming we've chosen a QImode capable registers, then 1 << N
19334 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19335 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19337 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19339 ix86_expand_clear (low[0]);
19340 ix86_expand_clear (high[0]);
19341 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19343 d = gen_lowpart (QImode, low[0]);
19344 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19345 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19346 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19348 d = gen_lowpart (QImode, high[0]);
19349 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19350 s = gen_rtx_NE (QImode, flags, const0_rtx);
19351 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19354 /* Otherwise, we can get the same results by manually performing
19355 a bit extract operation on bit 5/6, and then performing the two
19356 shifts. The two methods of getting 0/1 into low/high are exactly
19357 the same size. Avoiding the shift in the bit extract case helps
19358 pentium4 a bit; no one else seems to care much either way. */
19361 enum machine_mode half_mode;
19362 rtx (*gen_lshr3)(rtx, rtx, rtx);
19363 rtx (*gen_and3)(rtx, rtx, rtx);
19364 rtx (*gen_xor3)(rtx, rtx, rtx);
19365 HOST_WIDE_INT bits;
19368 if (mode == DImode)
19370 half_mode = SImode;
19371 gen_lshr3 = gen_lshrsi3;
19372 gen_and3 = gen_andsi3;
19373 gen_xor3 = gen_xorsi3;
19378 half_mode = DImode;
19379 gen_lshr3 = gen_lshrdi3;
19380 gen_and3 = gen_anddi3;
19381 gen_xor3 = gen_xordi3;
19385 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19386 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19388 x = gen_lowpart (half_mode, operands[2]);
19389 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19391 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19392 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19393 emit_move_insn (low[0], high[0]);
19394 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19397 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19398 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19402 if (operands[1] == constm1_rtx)
19404 /* For -1 << N, we can avoid the shld instruction, because we
19405 know that we're shifting 0...31/63 ones into a -1. */
19406 emit_move_insn (low[0], constm1_rtx);
19407 if (optimize_insn_for_size_p ())
19408 emit_move_insn (high[0], low[0]);
19410 emit_move_insn (high[0], constm1_rtx);
19414 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19416 if (!rtx_equal_p (operands[0], operands[1]))
19417 emit_move_insn (operands[0], operands[1]);
19419 split_double_mode (mode, operands, 1, low, high);
19420 emit_insn (gen_shld (high[0], low[0], operands[2]));
19423 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19425 if (TARGET_CMOVE && scratch)
19427 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19428 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19430 ix86_expand_clear (scratch);
19431 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19435 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19436 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19438 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19443 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19445 rtx (*gen_ashr3)(rtx, rtx, rtx)
19446 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19447 rtx (*gen_shrd)(rtx, rtx, rtx);
19448 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19450 rtx low[2], high[2];
19453 if (CONST_INT_P (operands[2]))
19455 split_double_mode (mode, operands, 2, low, high);
19456 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19458 if (count == GET_MODE_BITSIZE (mode) - 1)
19460 emit_move_insn (high[0], high[1]);
19461 emit_insn (gen_ashr3 (high[0], high[0],
19462 GEN_INT (half_width - 1)));
19463 emit_move_insn (low[0], high[0]);
19466 else if (count >= half_width)
19468 emit_move_insn (low[0], high[1]);
19469 emit_move_insn (high[0], low[0]);
19470 emit_insn (gen_ashr3 (high[0], high[0],
19471 GEN_INT (half_width - 1)));
19473 if (count > half_width)
19474 emit_insn (gen_ashr3 (low[0], low[0],
19475 GEN_INT (count - half_width)));
19479 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19481 if (!rtx_equal_p (operands[0], operands[1]))
19482 emit_move_insn (operands[0], operands[1]);
19484 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19485 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19490 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19492 if (!rtx_equal_p (operands[0], operands[1]))
19493 emit_move_insn (operands[0], operands[1]);
19495 split_double_mode (mode, operands, 1, low, high);
19497 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19498 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19500 if (TARGET_CMOVE && scratch)
19502 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19503 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19505 emit_move_insn (scratch, high[0]);
19506 emit_insn (gen_ashr3 (scratch, scratch,
19507 GEN_INT (half_width - 1)));
19508 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19513 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19514 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19516 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19522 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19524 rtx (*gen_lshr3)(rtx, rtx, rtx)
19525 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19526 rtx (*gen_shrd)(rtx, rtx, rtx);
19527 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19529 rtx low[2], high[2];
19532 if (CONST_INT_P (operands[2]))
19534 split_double_mode (mode, operands, 2, low, high);
19535 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19537 if (count >= half_width)
19539 emit_move_insn (low[0], high[1]);
19540 ix86_expand_clear (high[0]);
19542 if (count > half_width)
19543 emit_insn (gen_lshr3 (low[0], low[0],
19544 GEN_INT (count - half_width)));
19548 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19550 if (!rtx_equal_p (operands[0], operands[1]))
19551 emit_move_insn (operands[0], operands[1]);
19553 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19554 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19559 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19561 if (!rtx_equal_p (operands[0], operands[1]))
19562 emit_move_insn (operands[0], operands[1]);
19564 split_double_mode (mode, operands, 1, low, high);
19566 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19567 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19569 if (TARGET_CMOVE && scratch)
19571 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19572 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19574 ix86_expand_clear (scratch);
19575 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19580 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19581 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19583 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
19588 /* Predict just emitted jump instruction to be taken with probability PROB. */
19590 predict_jump (int prob)
19592 rtx insn = get_last_insn ();
19593 gcc_assert (JUMP_P (insn));
19594 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
19597 /* Helper function for the string operations below. Dest VARIABLE whether
19598 it is aligned to VALUE bytes. If true, jump to the label. */
19600 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
19602 rtx label = gen_label_rtx ();
19603 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
19604 if (GET_MODE (variable) == DImode)
19605 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
19607 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
19608 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
19611 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19613 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19617 /* Adjust COUNTER by the VALUE. */
19619 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
19621 rtx (*gen_add)(rtx, rtx, rtx)
19622 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
19624 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
19627 /* Zero extend possibly SImode EXP to Pmode register. */
19629 ix86_zero_extend_to_Pmode (rtx exp)
19632 if (GET_MODE (exp) == VOIDmode)
19633 return force_reg (Pmode, exp);
19634 if (GET_MODE (exp) == Pmode)
19635 return copy_to_mode_reg (Pmode, exp);
19636 r = gen_reg_rtx (Pmode);
19637 emit_insn (gen_zero_extendsidi2 (r, exp));
19641 /* Divide COUNTREG by SCALE. */
19643 scale_counter (rtx countreg, int scale)
19649 if (CONST_INT_P (countreg))
19650 return GEN_INT (INTVAL (countreg) / scale);
19651 gcc_assert (REG_P (countreg));
19653 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
19654 GEN_INT (exact_log2 (scale)),
19655 NULL, 1, OPTAB_DIRECT);
19659 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
19660 DImode for constant loop counts. */
19662 static enum machine_mode
19663 counter_mode (rtx count_exp)
19665 if (GET_MODE (count_exp) != VOIDmode)
19666 return GET_MODE (count_exp);
19667 if (!CONST_INT_P (count_exp))
19669 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
19674 /* When SRCPTR is non-NULL, output simple loop to move memory
19675 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
19676 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
19677 equivalent loop to set memory by VALUE (supposed to be in MODE).
19679 The size is rounded down to whole number of chunk size moved at once.
19680 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
19684 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
19685 rtx destptr, rtx srcptr, rtx value,
19686 rtx count, enum machine_mode mode, int unroll,
19689 rtx out_label, top_label, iter, tmp;
19690 enum machine_mode iter_mode = counter_mode (count);
19691 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
19692 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
19698 top_label = gen_label_rtx ();
19699 out_label = gen_label_rtx ();
19700 iter = gen_reg_rtx (iter_mode);
19702 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
19703 NULL, 1, OPTAB_DIRECT);
19704 /* Those two should combine. */
19705 if (piece_size == const1_rtx)
19707 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
19709 predict_jump (REG_BR_PROB_BASE * 10 / 100);
19711 emit_move_insn (iter, const0_rtx);
19713 emit_label (top_label);
19715 tmp = convert_modes (Pmode, iter_mode, iter, true);
19716 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
19717 destmem = change_address (destmem, mode, x_addr);
19721 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
19722 srcmem = change_address (srcmem, mode, y_addr);
19724 /* When unrolling for chips that reorder memory reads and writes,
19725 we can save registers by using single temporary.
19726 Also using 4 temporaries is overkill in 32bit mode. */
19727 if (!TARGET_64BIT && 0)
19729 for (i = 0; i < unroll; i++)
19734 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19736 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19738 emit_move_insn (destmem, srcmem);
19744 gcc_assert (unroll <= 4);
19745 for (i = 0; i < unroll; i++)
19747 tmpreg[i] = gen_reg_rtx (mode);
19751 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19753 emit_move_insn (tmpreg[i], srcmem);
19755 for (i = 0; i < unroll; i++)
19760 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19762 emit_move_insn (destmem, tmpreg[i]);
19767 for (i = 0; i < unroll; i++)
19771 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19772 emit_move_insn (destmem, value);
19775 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
19776 true, OPTAB_LIB_WIDEN);
19778 emit_move_insn (iter, tmp);
19780 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
19782 if (expected_size != -1)
19784 expected_size /= GET_MODE_SIZE (mode) * unroll;
19785 if (expected_size == 0)
19787 else if (expected_size > REG_BR_PROB_BASE)
19788 predict_jump (REG_BR_PROB_BASE - 1);
19790 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
19793 predict_jump (REG_BR_PROB_BASE * 80 / 100);
19794 iter = ix86_zero_extend_to_Pmode (iter);
19795 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
19796 true, OPTAB_LIB_WIDEN);
19797 if (tmp != destptr)
19798 emit_move_insn (destptr, tmp);
19801 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
19802 true, OPTAB_LIB_WIDEN);
19804 emit_move_insn (srcptr, tmp);
19806 emit_label (out_label);
19809 /* Output "rep; mov" instruction.
19810 Arguments have same meaning as for previous function */
19812 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
19813 rtx destptr, rtx srcptr,
19815 enum machine_mode mode)
19820 HOST_WIDE_INT rounded_count;
19822 /* If the size is known, it is shorter to use rep movs. */
19823 if (mode == QImode && CONST_INT_P (count)
19824 && !(INTVAL (count) & 3))
19827 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19828 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19829 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19830 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19831 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19832 if (mode != QImode)
19834 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19835 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19836 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19837 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19838 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19839 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19843 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19844 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19846 if (CONST_INT_P (count))
19848 rounded_count = (INTVAL (count)
19849 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19850 destmem = shallow_copy_rtx (destmem);
19851 srcmem = shallow_copy_rtx (srcmem);
19852 set_mem_size (destmem, rounded_count);
19853 set_mem_size (srcmem, rounded_count);
19857 if (MEM_SIZE_KNOWN_P (destmem))
19858 clear_mem_size (destmem);
19859 if (MEM_SIZE_KNOWN_P (srcmem))
19860 clear_mem_size (srcmem);
19862 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19866 /* Output "rep; stos" instruction.
19867 Arguments have same meaning as for previous function */
19869 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19870 rtx count, enum machine_mode mode,
19875 HOST_WIDE_INT rounded_count;
19877 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19878 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19879 value = force_reg (mode, gen_lowpart (mode, value));
19880 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19881 if (mode != QImode)
19883 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19884 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19885 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19888 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19889 if (orig_value == const0_rtx && CONST_INT_P (count))
19891 rounded_count = (INTVAL (count)
19892 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19893 destmem = shallow_copy_rtx (destmem);
19894 set_mem_size (destmem, rounded_count);
19896 else if (MEM_SIZE_KNOWN_P (destmem))
19897 clear_mem_size (destmem);
19898 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19902 emit_strmov (rtx destmem, rtx srcmem,
19903 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19905 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19906 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19907 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19910 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19912 expand_movmem_epilogue (rtx destmem, rtx srcmem,
19913 rtx destptr, rtx srcptr, rtx count, int max_size)
19916 if (CONST_INT_P (count))
19918 HOST_WIDE_INT countval = INTVAL (count);
19921 if ((countval & 0x10) && max_size > 16)
19925 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19926 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
19929 gcc_unreachable ();
19932 if ((countval & 0x08) && max_size > 8)
19935 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19938 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19939 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
19943 if ((countval & 0x04) && max_size > 4)
19945 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19948 if ((countval & 0x02) && max_size > 2)
19950 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
19953 if ((countval & 0x01) && max_size > 1)
19955 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
19962 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
19963 count, 1, OPTAB_DIRECT);
19964 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
19965 count, QImode, 1, 4);
19969 /* When there are stringops, we can cheaply increase dest and src pointers.
19970 Otherwise we save code size by maintaining offset (zero is readily
19971 available from preceding rep operation) and using x86 addressing modes.
19973 if (TARGET_SINGLE_STRINGOP)
19977 rtx label = ix86_expand_aligntest (count, 4, true);
19978 src = change_address (srcmem, SImode, srcptr);
19979 dest = change_address (destmem, SImode, destptr);
19980 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19981 emit_label (label);
19982 LABEL_NUSES (label) = 1;
19986 rtx label = ix86_expand_aligntest (count, 2, true);
19987 src = change_address (srcmem, HImode, srcptr);
19988 dest = change_address (destmem, HImode, destptr);
19989 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19990 emit_label (label);
19991 LABEL_NUSES (label) = 1;
19995 rtx label = ix86_expand_aligntest (count, 1, true);
19996 src = change_address (srcmem, QImode, srcptr);
19997 dest = change_address (destmem, QImode, destptr);
19998 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19999 emit_label (label);
20000 LABEL_NUSES (label) = 1;
20005 rtx offset = force_reg (Pmode, const0_rtx);
20010 rtx label = ix86_expand_aligntest (count, 4, true);
20011 src = change_address (srcmem, SImode, srcptr);
20012 dest = change_address (destmem, SImode, destptr);
20013 emit_move_insn (dest, src);
20014 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
20015 true, OPTAB_LIB_WIDEN);
20017 emit_move_insn (offset, tmp);
20018 emit_label (label);
20019 LABEL_NUSES (label) = 1;
20023 rtx label = ix86_expand_aligntest (count, 2, true);
20024 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20025 src = change_address (srcmem, HImode, tmp);
20026 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20027 dest = change_address (destmem, HImode, tmp);
20028 emit_move_insn (dest, src);
20029 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
20030 true, OPTAB_LIB_WIDEN);
20032 emit_move_insn (offset, tmp);
20033 emit_label (label);
20034 LABEL_NUSES (label) = 1;
20038 rtx label = ix86_expand_aligntest (count, 1, true);
20039 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20040 src = change_address (srcmem, QImode, tmp);
20041 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20042 dest = change_address (destmem, QImode, tmp);
20043 emit_move_insn (dest, src);
20044 emit_label (label);
20045 LABEL_NUSES (label) = 1;
20050 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20052 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
20053 rtx count, int max_size)
20056 expand_simple_binop (counter_mode (count), AND, count,
20057 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
20058 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
20059 gen_lowpart (QImode, value), count, QImode,
20063 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20065 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20069 if (CONST_INT_P (count))
20071 HOST_WIDE_INT countval = INTVAL (count);
20074 if ((countval & 0x10) && max_size > 16)
20078 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20079 emit_insn (gen_strset (destptr, dest, value));
20080 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20081 emit_insn (gen_strset (destptr, dest, value));
20084 gcc_unreachable ();
20087 if ((countval & 0x08) && max_size > 8)
20091 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20092 emit_insn (gen_strset (destptr, dest, value));
20096 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20097 emit_insn (gen_strset (destptr, dest, value));
20098 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20099 emit_insn (gen_strset (destptr, dest, value));
20103 if ((countval & 0x04) && max_size > 4)
20105 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20106 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20109 if ((countval & 0x02) && max_size > 2)
20111 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20112 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20115 if ((countval & 0x01) && max_size > 1)
20117 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20118 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20125 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20130 rtx label = ix86_expand_aligntest (count, 16, true);
20133 dest = change_address (destmem, DImode, destptr);
20134 emit_insn (gen_strset (destptr, dest, value));
20135 emit_insn (gen_strset (destptr, dest, value));
20139 dest = change_address (destmem, SImode, destptr);
20140 emit_insn (gen_strset (destptr, dest, value));
20141 emit_insn (gen_strset (destptr, dest, value));
20142 emit_insn (gen_strset (destptr, dest, value));
20143 emit_insn (gen_strset (destptr, dest, value));
20145 emit_label (label);
20146 LABEL_NUSES (label) = 1;
20150 rtx label = ix86_expand_aligntest (count, 8, true);
20153 dest = change_address (destmem, DImode, destptr);
20154 emit_insn (gen_strset (destptr, dest, value));
20158 dest = change_address (destmem, SImode, destptr);
20159 emit_insn (gen_strset (destptr, dest, value));
20160 emit_insn (gen_strset (destptr, dest, value));
20162 emit_label (label);
20163 LABEL_NUSES (label) = 1;
20167 rtx label = ix86_expand_aligntest (count, 4, true);
20168 dest = change_address (destmem, SImode, destptr);
20169 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20170 emit_label (label);
20171 LABEL_NUSES (label) = 1;
20175 rtx label = ix86_expand_aligntest (count, 2, true);
20176 dest = change_address (destmem, HImode, destptr);
20177 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20178 emit_label (label);
20179 LABEL_NUSES (label) = 1;
20183 rtx label = ix86_expand_aligntest (count, 1, true);
20184 dest = change_address (destmem, QImode, destptr);
20185 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20186 emit_label (label);
20187 LABEL_NUSES (label) = 1;
20191 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20192 DESIRED_ALIGNMENT. */
20194 expand_movmem_prologue (rtx destmem, rtx srcmem,
20195 rtx destptr, rtx srcptr, rtx count,
20196 int align, int desired_alignment)
20198 if (align <= 1 && desired_alignment > 1)
20200 rtx label = ix86_expand_aligntest (destptr, 1, false);
20201 srcmem = change_address (srcmem, QImode, srcptr);
20202 destmem = change_address (destmem, QImode, destptr);
20203 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20204 ix86_adjust_counter (count, 1);
20205 emit_label (label);
20206 LABEL_NUSES (label) = 1;
20208 if (align <= 2 && desired_alignment > 2)
20210 rtx label = ix86_expand_aligntest (destptr, 2, false);
20211 srcmem = change_address (srcmem, HImode, srcptr);
20212 destmem = change_address (destmem, HImode, destptr);
20213 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20214 ix86_adjust_counter (count, 2);
20215 emit_label (label);
20216 LABEL_NUSES (label) = 1;
20218 if (align <= 4 && desired_alignment > 4)
20220 rtx label = ix86_expand_aligntest (destptr, 4, false);
20221 srcmem = change_address (srcmem, SImode, srcptr);
20222 destmem = change_address (destmem, SImode, destptr);
20223 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20224 ix86_adjust_counter (count, 4);
20225 emit_label (label);
20226 LABEL_NUSES (label) = 1;
20228 gcc_assert (desired_alignment <= 8);
20231 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20232 ALIGN_BYTES is how many bytes need to be copied. */
20234 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20235 int desired_align, int align_bytes)
20238 rtx orig_dst = dst;
20239 rtx orig_src = src;
20241 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20242 if (src_align_bytes >= 0)
20243 src_align_bytes = desired_align - src_align_bytes;
20244 if (align_bytes & 1)
20246 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20247 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20249 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20251 if (align_bytes & 2)
20253 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20254 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20255 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20256 set_mem_align (dst, 2 * BITS_PER_UNIT);
20257 if (src_align_bytes >= 0
20258 && (src_align_bytes & 1) == (align_bytes & 1)
20259 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20260 set_mem_align (src, 2 * BITS_PER_UNIT);
20262 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20264 if (align_bytes & 4)
20266 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20267 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20268 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20269 set_mem_align (dst, 4 * BITS_PER_UNIT);
20270 if (src_align_bytes >= 0)
20272 unsigned int src_align = 0;
20273 if ((src_align_bytes & 3) == (align_bytes & 3))
20275 else if ((src_align_bytes & 1) == (align_bytes & 1))
20277 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20278 set_mem_align (src, src_align * BITS_PER_UNIT);
20281 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20283 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20284 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20285 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20286 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20287 if (src_align_bytes >= 0)
20289 unsigned int src_align = 0;
20290 if ((src_align_bytes & 7) == (align_bytes & 7))
20292 else if ((src_align_bytes & 3) == (align_bytes & 3))
20294 else if ((src_align_bytes & 1) == (align_bytes & 1))
20296 if (src_align > (unsigned int) desired_align)
20297 src_align = desired_align;
20298 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20299 set_mem_align (src, src_align * BITS_PER_UNIT);
20301 if (MEM_SIZE_KNOWN_P (orig_dst))
20302 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
20303 if (MEM_SIZE_KNOWN_P (orig_src))
20304 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
20309 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20310 DESIRED_ALIGNMENT. */
20312 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20313 int align, int desired_alignment)
20315 if (align <= 1 && desired_alignment > 1)
20317 rtx label = ix86_expand_aligntest (destptr, 1, false);
20318 destmem = change_address (destmem, QImode, destptr);
20319 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20320 ix86_adjust_counter (count, 1);
20321 emit_label (label);
20322 LABEL_NUSES (label) = 1;
20324 if (align <= 2 && desired_alignment > 2)
20326 rtx label = ix86_expand_aligntest (destptr, 2, false);
20327 destmem = change_address (destmem, HImode, destptr);
20328 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20329 ix86_adjust_counter (count, 2);
20330 emit_label (label);
20331 LABEL_NUSES (label) = 1;
20333 if (align <= 4 && desired_alignment > 4)
20335 rtx label = ix86_expand_aligntest (destptr, 4, false);
20336 destmem = change_address (destmem, SImode, destptr);
20337 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20338 ix86_adjust_counter (count, 4);
20339 emit_label (label);
20340 LABEL_NUSES (label) = 1;
20342 gcc_assert (desired_alignment <= 8);
20345 /* Set enough from DST to align DST known to by aligned by ALIGN to
20346 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20348 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20349 int desired_align, int align_bytes)
20352 rtx orig_dst = dst;
20353 if (align_bytes & 1)
20355 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20357 emit_insn (gen_strset (destreg, dst,
20358 gen_lowpart (QImode, value)));
20360 if (align_bytes & 2)
20362 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20363 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20364 set_mem_align (dst, 2 * BITS_PER_UNIT);
20366 emit_insn (gen_strset (destreg, dst,
20367 gen_lowpart (HImode, value)));
20369 if (align_bytes & 4)
20371 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20372 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20373 set_mem_align (dst, 4 * BITS_PER_UNIT);
20375 emit_insn (gen_strset (destreg, dst,
20376 gen_lowpart (SImode, value)));
20378 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20379 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20380 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20381 if (MEM_SIZE_KNOWN_P (orig_dst))
20382 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
20386 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20387 static enum stringop_alg
20388 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20389 int *dynamic_check)
20391 const struct stringop_algs * algs;
20392 bool optimize_for_speed;
20393 /* Algorithms using the rep prefix want at least edi and ecx;
20394 additionally, memset wants eax and memcpy wants esi. Don't
20395 consider such algorithms if the user has appropriated those
20396 registers for their own purposes. */
20397 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20399 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20401 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20402 || (alg != rep_prefix_1_byte \
20403 && alg != rep_prefix_4_byte \
20404 && alg != rep_prefix_8_byte))
20405 const struct processor_costs *cost;
20407 /* Even if the string operation call is cold, we still might spend a lot
20408 of time processing large blocks. */
20409 if (optimize_function_for_size_p (cfun)
20410 || (optimize_insn_for_size_p ()
20411 && expected_size != -1 && expected_size < 256))
20412 optimize_for_speed = false;
20414 optimize_for_speed = true;
20416 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20418 *dynamic_check = -1;
20420 algs = &cost->memset[TARGET_64BIT != 0];
20422 algs = &cost->memcpy[TARGET_64BIT != 0];
20423 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
20424 return ix86_stringop_alg;
20425 /* rep; movq or rep; movl is the smallest variant. */
20426 else if (!optimize_for_speed)
20428 if (!count || (count & 3))
20429 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20431 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20433 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20435 else if (expected_size != -1 && expected_size < 4)
20436 return loop_1_byte;
20437 else if (expected_size != -1)
20440 enum stringop_alg alg = libcall;
20441 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20443 /* We get here if the algorithms that were not libcall-based
20444 were rep-prefix based and we are unable to use rep prefixes
20445 based on global register usage. Break out of the loop and
20446 use the heuristic below. */
20447 if (algs->size[i].max == 0)
20449 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20451 enum stringop_alg candidate = algs->size[i].alg;
20453 if (candidate != libcall && ALG_USABLE_P (candidate))
20455 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20456 last non-libcall inline algorithm. */
20457 if (TARGET_INLINE_ALL_STRINGOPS)
20459 /* When the current size is best to be copied by a libcall,
20460 but we are still forced to inline, run the heuristic below
20461 that will pick code for medium sized blocks. */
20462 if (alg != libcall)
20466 else if (ALG_USABLE_P (candidate))
20470 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20472 /* When asked to inline the call anyway, try to pick meaningful choice.
20473 We look for maximal size of block that is faster to copy by hand and
20474 take blocks of at most of that size guessing that average size will
20475 be roughly half of the block.
20477 If this turns out to be bad, we might simply specify the preferred
20478 choice in ix86_costs. */
20479 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20480 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20483 enum stringop_alg alg;
20485 bool any_alg_usable_p = true;
20487 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20489 enum stringop_alg candidate = algs->size[i].alg;
20490 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20492 if (candidate != libcall && candidate
20493 && ALG_USABLE_P (candidate))
20494 max = algs->size[i].max;
20496 /* If there aren't any usable algorithms, then recursing on
20497 smaller sizes isn't going to find anything. Just return the
20498 simple byte-at-a-time copy loop. */
20499 if (!any_alg_usable_p)
20501 /* Pick something reasonable. */
20502 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20503 *dynamic_check = 128;
20504 return loop_1_byte;
20508 alg = decide_alg (count, max / 2, memset, dynamic_check);
20509 gcc_assert (*dynamic_check == -1);
20510 gcc_assert (alg != libcall);
20511 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20512 *dynamic_check = max;
20515 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20516 #undef ALG_USABLE_P
20519 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20520 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20522 decide_alignment (int align,
20523 enum stringop_alg alg,
20526 int desired_align = 0;
20530 gcc_unreachable ();
20532 case unrolled_loop:
20533 desired_align = GET_MODE_SIZE (Pmode);
20535 case rep_prefix_8_byte:
20538 case rep_prefix_4_byte:
20539 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20540 copying whole cacheline at once. */
20541 if (TARGET_PENTIUMPRO)
20546 case rep_prefix_1_byte:
20547 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20548 copying whole cacheline at once. */
20549 if (TARGET_PENTIUMPRO)
20563 if (desired_align < align)
20564 desired_align = align;
20565 if (expected_size != -1 && expected_size < 4)
20566 desired_align = align;
20567 return desired_align;
20570 /* Return the smallest power of 2 greater than VAL. */
20572 smallest_pow2_greater_than (int val)
20580 /* Expand string move (memcpy) operation. Use i386 string operations
20581 when profitable. expand_setmem contains similar code. The code
20582 depends upon architecture, block size and alignment, but always has
20583 the same overall structure:
20585 1) Prologue guard: Conditional that jumps up to epilogues for small
20586 blocks that can be handled by epilogue alone. This is faster
20587 but also needed for correctness, since prologue assume the block
20588 is larger than the desired alignment.
20590 Optional dynamic check for size and libcall for large
20591 blocks is emitted here too, with -minline-stringops-dynamically.
20593 2) Prologue: copy first few bytes in order to get destination
20594 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
20595 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
20596 copied. We emit either a jump tree on power of two sized
20597 blocks, or a byte loop.
20599 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20600 with specified algorithm.
20602 4) Epilogue: code copying tail of the block that is too small to be
20603 handled by main body (or up to size guarded by prologue guard). */
20606 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
20607 rtx expected_align_exp, rtx expected_size_exp)
20613 rtx jump_around_label = NULL;
20614 HOST_WIDE_INT align = 1;
20615 unsigned HOST_WIDE_INT count = 0;
20616 HOST_WIDE_INT expected_size = -1;
20617 int size_needed = 0, epilogue_size_needed;
20618 int desired_align = 0, align_bytes = 0;
20619 enum stringop_alg alg;
20621 bool need_zero_guard = false;
20623 if (CONST_INT_P (align_exp))
20624 align = INTVAL (align_exp);
20625 /* i386 can do misaligned access on reasonably increased cost. */
20626 if (CONST_INT_P (expected_align_exp)
20627 && INTVAL (expected_align_exp) > align)
20628 align = INTVAL (expected_align_exp);
20629 /* ALIGN is the minimum of destination and source alignment, but we care here
20630 just about destination alignment. */
20631 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
20632 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
20634 if (CONST_INT_P (count_exp))
20635 count = expected_size = INTVAL (count_exp);
20636 if (CONST_INT_P (expected_size_exp) && count == 0)
20637 expected_size = INTVAL (expected_size_exp);
20639 /* Make sure we don't need to care about overflow later on. */
20640 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20643 /* Step 0: Decide on preferred algorithm, desired alignment and
20644 size of chunks to be copied by main loop. */
20646 alg = decide_alg (count, expected_size, false, &dynamic_check);
20647 desired_align = decide_alignment (align, alg, expected_size);
20649 if (!TARGET_ALIGN_STRINGOPS)
20650 align = desired_align;
20652 if (alg == libcall)
20654 gcc_assert (alg != no_stringop);
20656 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
20657 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20658 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
20663 gcc_unreachable ();
20665 need_zero_guard = true;
20666 size_needed = GET_MODE_SIZE (Pmode);
20668 case unrolled_loop:
20669 need_zero_guard = true;
20670 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
20672 case rep_prefix_8_byte:
20675 case rep_prefix_4_byte:
20678 case rep_prefix_1_byte:
20682 need_zero_guard = true;
20687 epilogue_size_needed = size_needed;
20689 /* Step 1: Prologue guard. */
20691 /* Alignment code needs count to be in register. */
20692 if (CONST_INT_P (count_exp) && desired_align > align)
20694 if (INTVAL (count_exp) > desired_align
20695 && INTVAL (count_exp) > size_needed)
20698 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20699 if (align_bytes <= 0)
20702 align_bytes = desired_align - align_bytes;
20704 if (align_bytes == 0)
20705 count_exp = force_reg (counter_mode (count_exp), count_exp);
20707 gcc_assert (desired_align >= 1 && align >= 1);
20709 /* Ensure that alignment prologue won't copy past end of block. */
20710 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20712 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20713 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
20714 Make sure it is power of 2. */
20715 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20719 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20721 /* If main algorithm works on QImode, no epilogue is needed.
20722 For small sizes just don't align anything. */
20723 if (size_needed == 1)
20724 desired_align = align;
20731 label = gen_label_rtx ();
20732 emit_cmp_and_jump_insns (count_exp,
20733 GEN_INT (epilogue_size_needed),
20734 LTU, 0, counter_mode (count_exp), 1, label);
20735 if (expected_size == -1 || expected_size < epilogue_size_needed)
20736 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20738 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20742 /* Emit code to decide on runtime whether library call or inline should be
20744 if (dynamic_check != -1)
20746 if (CONST_INT_P (count_exp))
20748 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
20750 emit_block_move_via_libcall (dst, src, count_exp, false);
20751 count_exp = const0_rtx;
20757 rtx hot_label = gen_label_rtx ();
20758 jump_around_label = gen_label_rtx ();
20759 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20760 LEU, 0, GET_MODE (count_exp), 1, hot_label);
20761 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20762 emit_block_move_via_libcall (dst, src, count_exp, false);
20763 emit_jump (jump_around_label);
20764 emit_label (hot_label);
20768 /* Step 2: Alignment prologue. */
20770 if (desired_align > align)
20772 if (align_bytes == 0)
20774 /* Except for the first move in epilogue, we no longer know
20775 constant offset in aliasing info. It don't seems to worth
20776 the pain to maintain it for the first move, so throw away
20778 src = change_address (src, BLKmode, srcreg);
20779 dst = change_address (dst, BLKmode, destreg);
20780 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
20785 /* If we know how many bytes need to be stored before dst is
20786 sufficiently aligned, maintain aliasing info accurately. */
20787 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
20788 desired_align, align_bytes);
20789 count_exp = plus_constant (count_exp, -align_bytes);
20790 count -= align_bytes;
20792 if (need_zero_guard
20793 && (count < (unsigned HOST_WIDE_INT) size_needed
20794 || (align_bytes == 0
20795 && count < ((unsigned HOST_WIDE_INT) size_needed
20796 + desired_align - align))))
20798 /* It is possible that we copied enough so the main loop will not
20800 gcc_assert (size_needed > 1);
20801 if (label == NULL_RTX)
20802 label = gen_label_rtx ();
20803 emit_cmp_and_jump_insns (count_exp,
20804 GEN_INT (size_needed),
20805 LTU, 0, counter_mode (count_exp), 1, label);
20806 if (expected_size == -1
20807 || expected_size < (desired_align - align) / 2 + size_needed)
20808 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20810 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20813 if (label && size_needed == 1)
20815 emit_label (label);
20816 LABEL_NUSES (label) = 1;
20818 epilogue_size_needed = 1;
20820 else if (label == NULL_RTX)
20821 epilogue_size_needed = size_needed;
20823 /* Step 3: Main loop. */
20829 gcc_unreachable ();
20831 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20832 count_exp, QImode, 1, expected_size);
20835 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20836 count_exp, Pmode, 1, expected_size);
20838 case unrolled_loop:
20839 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20840 registers for 4 temporaries anyway. */
20841 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20842 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20845 case rep_prefix_8_byte:
20846 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20849 case rep_prefix_4_byte:
20850 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20853 case rep_prefix_1_byte:
20854 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20858 /* Adjust properly the offset of src and dest memory for aliasing. */
20859 if (CONST_INT_P (count_exp))
20861 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20862 (count / size_needed) * size_needed);
20863 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20864 (count / size_needed) * size_needed);
20868 src = change_address (src, BLKmode, srcreg);
20869 dst = change_address (dst, BLKmode, destreg);
20872 /* Step 4: Epilogue to copy the remaining bytes. */
20876 /* When the main loop is done, COUNT_EXP might hold original count,
20877 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20878 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20879 bytes. Compensate if needed. */
20881 if (size_needed < epilogue_size_needed)
20884 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20885 GEN_INT (size_needed - 1), count_exp, 1,
20887 if (tmp != count_exp)
20888 emit_move_insn (count_exp, tmp);
20890 emit_label (label);
20891 LABEL_NUSES (label) = 1;
20894 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20895 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20896 epilogue_size_needed);
20897 if (jump_around_label)
20898 emit_label (jump_around_label);
20902 /* Helper function for memcpy. For QImode value 0xXY produce
20903 0xXYXYXYXY of wide specified by MODE. This is essentially
20904 a * 0x10101010, but we can do slightly better than
20905 synth_mult by unwinding the sequence by hand on CPUs with
20908 promote_duplicated_reg (enum machine_mode mode, rtx val)
20910 enum machine_mode valmode = GET_MODE (val);
20912 int nops = mode == DImode ? 3 : 2;
20914 gcc_assert (mode == SImode || mode == DImode);
20915 if (val == const0_rtx)
20916 return copy_to_mode_reg (mode, const0_rtx);
20917 if (CONST_INT_P (val))
20919 HOST_WIDE_INT v = INTVAL (val) & 255;
20923 if (mode == DImode)
20924 v |= (v << 16) << 16;
20925 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
20928 if (valmode == VOIDmode)
20930 if (valmode != QImode)
20931 val = gen_lowpart (QImode, val);
20932 if (mode == QImode)
20934 if (!TARGET_PARTIAL_REG_STALL)
20936 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
20937 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
20938 <= (ix86_cost->shift_const + ix86_cost->add) * nops
20939 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
20941 rtx reg = convert_modes (mode, QImode, val, true);
20942 tmp = promote_duplicated_reg (mode, const1_rtx);
20943 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
20948 rtx reg = convert_modes (mode, QImode, val, true);
20950 if (!TARGET_PARTIAL_REG_STALL)
20951 if (mode == SImode)
20952 emit_insn (gen_movsi_insv_1 (reg, reg));
20954 emit_insn (gen_movdi_insv_1 (reg, reg));
20957 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
20958 NULL, 1, OPTAB_DIRECT);
20960 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20962 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
20963 NULL, 1, OPTAB_DIRECT);
20964 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20965 if (mode == SImode)
20967 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
20968 NULL, 1, OPTAB_DIRECT);
20969 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20974 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
20975 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
20976 alignment from ALIGN to DESIRED_ALIGN. */
20978 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
20983 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
20984 promoted_val = promote_duplicated_reg (DImode, val);
20985 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
20986 promoted_val = promote_duplicated_reg (SImode, val);
20987 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
20988 promoted_val = promote_duplicated_reg (HImode, val);
20990 promoted_val = val;
20992 return promoted_val;
20995 /* Expand string clear operation (bzero). Use i386 string operations when
20996 profitable. See expand_movmem comment for explanation of individual
20997 steps performed. */
20999 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
21000 rtx expected_align_exp, rtx expected_size_exp)
21005 rtx jump_around_label = NULL;
21006 HOST_WIDE_INT align = 1;
21007 unsigned HOST_WIDE_INT count = 0;
21008 HOST_WIDE_INT expected_size = -1;
21009 int size_needed = 0, epilogue_size_needed;
21010 int desired_align = 0, align_bytes = 0;
21011 enum stringop_alg alg;
21012 rtx promoted_val = NULL;
21013 bool force_loopy_epilogue = false;
21015 bool need_zero_guard = false;
21017 if (CONST_INT_P (align_exp))
21018 align = INTVAL (align_exp);
21019 /* i386 can do misaligned access on reasonably increased cost. */
21020 if (CONST_INT_P (expected_align_exp)
21021 && INTVAL (expected_align_exp) > align)
21022 align = INTVAL (expected_align_exp);
21023 if (CONST_INT_P (count_exp))
21024 count = expected_size = INTVAL (count_exp);
21025 if (CONST_INT_P (expected_size_exp) && count == 0)
21026 expected_size = INTVAL (expected_size_exp);
21028 /* Make sure we don't need to care about overflow later on. */
21029 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21032 /* Step 0: Decide on preferred algorithm, desired alignment and
21033 size of chunks to be copied by main loop. */
21035 alg = decide_alg (count, expected_size, true, &dynamic_check);
21036 desired_align = decide_alignment (align, alg, expected_size);
21038 if (!TARGET_ALIGN_STRINGOPS)
21039 align = desired_align;
21041 if (alg == libcall)
21043 gcc_assert (alg != no_stringop);
21045 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
21046 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21051 gcc_unreachable ();
21053 need_zero_guard = true;
21054 size_needed = GET_MODE_SIZE (Pmode);
21056 case unrolled_loop:
21057 need_zero_guard = true;
21058 size_needed = GET_MODE_SIZE (Pmode) * 4;
21060 case rep_prefix_8_byte:
21063 case rep_prefix_4_byte:
21066 case rep_prefix_1_byte:
21070 need_zero_guard = true;
21074 epilogue_size_needed = size_needed;
21076 /* Step 1: Prologue guard. */
21078 /* Alignment code needs count to be in register. */
21079 if (CONST_INT_P (count_exp) && desired_align > align)
21081 if (INTVAL (count_exp) > desired_align
21082 && INTVAL (count_exp) > size_needed)
21085 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21086 if (align_bytes <= 0)
21089 align_bytes = desired_align - align_bytes;
21091 if (align_bytes == 0)
21093 enum machine_mode mode = SImode;
21094 if (TARGET_64BIT && (count & ~0xffffffff))
21096 count_exp = force_reg (mode, count_exp);
21099 /* Do the cheap promotion to allow better CSE across the
21100 main loop and epilogue (ie one load of the big constant in the
21101 front of all code. */
21102 if (CONST_INT_P (val_exp))
21103 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21104 desired_align, align);
21105 /* Ensure that alignment prologue won't copy past end of block. */
21106 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21108 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21109 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21110 Make sure it is power of 2. */
21111 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21113 /* To improve performance of small blocks, we jump around the VAL
21114 promoting mode. This mean that if the promoted VAL is not constant,
21115 we might not use it in the epilogue and have to use byte
21117 if (epilogue_size_needed > 2 && !promoted_val)
21118 force_loopy_epilogue = true;
21121 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21123 /* If main algorithm works on QImode, no epilogue is needed.
21124 For small sizes just don't align anything. */
21125 if (size_needed == 1)
21126 desired_align = align;
21133 label = gen_label_rtx ();
21134 emit_cmp_and_jump_insns (count_exp,
21135 GEN_INT (epilogue_size_needed),
21136 LTU, 0, counter_mode (count_exp), 1, label);
21137 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21138 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21140 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21143 if (dynamic_check != -1)
21145 rtx hot_label = gen_label_rtx ();
21146 jump_around_label = gen_label_rtx ();
21147 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21148 LEU, 0, counter_mode (count_exp), 1, hot_label);
21149 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21150 set_storage_via_libcall (dst, count_exp, val_exp, false);
21151 emit_jump (jump_around_label);
21152 emit_label (hot_label);
21155 /* Step 2: Alignment prologue. */
21157 /* Do the expensive promotion once we branched off the small blocks. */
21159 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21160 desired_align, align);
21161 gcc_assert (desired_align >= 1 && align >= 1);
21163 if (desired_align > align)
21165 if (align_bytes == 0)
21167 /* Except for the first move in epilogue, we no longer know
21168 constant offset in aliasing info. It don't seems to worth
21169 the pain to maintain it for the first move, so throw away
21171 dst = change_address (dst, BLKmode, destreg);
21172 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21177 /* If we know how many bytes need to be stored before dst is
21178 sufficiently aligned, maintain aliasing info accurately. */
21179 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21180 desired_align, align_bytes);
21181 count_exp = plus_constant (count_exp, -align_bytes);
21182 count -= align_bytes;
21184 if (need_zero_guard
21185 && (count < (unsigned HOST_WIDE_INT) size_needed
21186 || (align_bytes == 0
21187 && count < ((unsigned HOST_WIDE_INT) size_needed
21188 + desired_align - align))))
21190 /* It is possible that we copied enough so the main loop will not
21192 gcc_assert (size_needed > 1);
21193 if (label == NULL_RTX)
21194 label = gen_label_rtx ();
21195 emit_cmp_and_jump_insns (count_exp,
21196 GEN_INT (size_needed),
21197 LTU, 0, counter_mode (count_exp), 1, label);
21198 if (expected_size == -1
21199 || expected_size < (desired_align - align) / 2 + size_needed)
21200 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21202 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21205 if (label && size_needed == 1)
21207 emit_label (label);
21208 LABEL_NUSES (label) = 1;
21210 promoted_val = val_exp;
21211 epilogue_size_needed = 1;
21213 else if (label == NULL_RTX)
21214 epilogue_size_needed = size_needed;
21216 /* Step 3: Main loop. */
21222 gcc_unreachable ();
21224 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21225 count_exp, QImode, 1, expected_size);
21228 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21229 count_exp, Pmode, 1, expected_size);
21231 case unrolled_loop:
21232 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21233 count_exp, Pmode, 4, expected_size);
21235 case rep_prefix_8_byte:
21236 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21239 case rep_prefix_4_byte:
21240 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21243 case rep_prefix_1_byte:
21244 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21248 /* Adjust properly the offset of src and dest memory for aliasing. */
21249 if (CONST_INT_P (count_exp))
21250 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21251 (count / size_needed) * size_needed);
21253 dst = change_address (dst, BLKmode, destreg);
21255 /* Step 4: Epilogue to copy the remaining bytes. */
21259 /* When the main loop is done, COUNT_EXP might hold original count,
21260 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21261 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21262 bytes. Compensate if needed. */
21264 if (size_needed < epilogue_size_needed)
21267 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21268 GEN_INT (size_needed - 1), count_exp, 1,
21270 if (tmp != count_exp)
21271 emit_move_insn (count_exp, tmp);
21273 emit_label (label);
21274 LABEL_NUSES (label) = 1;
21277 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21279 if (force_loopy_epilogue)
21280 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21281 epilogue_size_needed);
21283 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21284 epilogue_size_needed);
21286 if (jump_around_label)
21287 emit_label (jump_around_label);
21291 /* Expand the appropriate insns for doing strlen if not just doing
21294 out = result, initialized with the start address
21295 align_rtx = alignment of the address.
21296 scratch = scratch register, initialized with the startaddress when
21297 not aligned, otherwise undefined
21299 This is just the body. It needs the initializations mentioned above and
21300 some address computing at the end. These things are done in i386.md. */
21303 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21307 rtx align_2_label = NULL_RTX;
21308 rtx align_3_label = NULL_RTX;
21309 rtx align_4_label = gen_label_rtx ();
21310 rtx end_0_label = gen_label_rtx ();
21312 rtx tmpreg = gen_reg_rtx (SImode);
21313 rtx scratch = gen_reg_rtx (SImode);
21317 if (CONST_INT_P (align_rtx))
21318 align = INTVAL (align_rtx);
21320 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21322 /* Is there a known alignment and is it less than 4? */
21325 rtx scratch1 = gen_reg_rtx (Pmode);
21326 emit_move_insn (scratch1, out);
21327 /* Is there a known alignment and is it not 2? */
21330 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21331 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21333 /* Leave just the 3 lower bits. */
21334 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21335 NULL_RTX, 0, OPTAB_WIDEN);
21337 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21338 Pmode, 1, align_4_label);
21339 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21340 Pmode, 1, align_2_label);
21341 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21342 Pmode, 1, align_3_label);
21346 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21347 check if is aligned to 4 - byte. */
21349 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21350 NULL_RTX, 0, OPTAB_WIDEN);
21352 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21353 Pmode, 1, align_4_label);
21356 mem = change_address (src, QImode, out);
21358 /* Now compare the bytes. */
21360 /* Compare the first n unaligned byte on a byte per byte basis. */
21361 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21362 QImode, 1, end_0_label);
21364 /* Increment the address. */
21365 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21367 /* Not needed with an alignment of 2 */
21370 emit_label (align_2_label);
21372 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21375 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21377 emit_label (align_3_label);
21380 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21383 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21386 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21387 align this loop. It gives only huge programs, but does not help to
21389 emit_label (align_4_label);
21391 mem = change_address (src, SImode, out);
21392 emit_move_insn (scratch, mem);
21393 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21395 /* This formula yields a nonzero result iff one of the bytes is zero.
21396 This saves three branches inside loop and many cycles. */
21398 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21399 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21400 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21401 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21402 gen_int_mode (0x80808080, SImode)));
21403 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21408 rtx reg = gen_reg_rtx (SImode);
21409 rtx reg2 = gen_reg_rtx (Pmode);
21410 emit_move_insn (reg, tmpreg);
21411 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21413 /* If zero is not in the first two bytes, move two bytes forward. */
21414 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21415 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21416 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21417 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21418 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21421 /* Emit lea manually to avoid clobbering of flags. */
21422 emit_insn (gen_rtx_SET (SImode, reg2,
21423 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21425 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21426 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21427 emit_insn (gen_rtx_SET (VOIDmode, out,
21428 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21434 rtx end_2_label = gen_label_rtx ();
21435 /* Is zero in the first two bytes? */
21437 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21438 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21439 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21440 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21441 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21443 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21444 JUMP_LABEL (tmp) = end_2_label;
21446 /* Not in the first two. Move two bytes forward. */
21447 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21448 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21450 emit_label (end_2_label);
21454 /* Avoid branch in fixing the byte. */
21455 tmpreg = gen_lowpart (QImode, tmpreg);
21456 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21457 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21458 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21459 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21461 emit_label (end_0_label);
21464 /* Expand strlen. */
21467 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21469 rtx addr, scratch1, scratch2, scratch3, scratch4;
21471 /* The generic case of strlen expander is long. Avoid it's
21472 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21474 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21475 && !TARGET_INLINE_ALL_STRINGOPS
21476 && !optimize_insn_for_size_p ()
21477 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21480 addr = force_reg (Pmode, XEXP (src, 0));
21481 scratch1 = gen_reg_rtx (Pmode);
21483 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21484 && !optimize_insn_for_size_p ())
21486 /* Well it seems that some optimizer does not combine a call like
21487 foo(strlen(bar), strlen(bar));
21488 when the move and the subtraction is done here. It does calculate
21489 the length just once when these instructions are done inside of
21490 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21491 often used and I use one fewer register for the lifetime of
21492 output_strlen_unroll() this is better. */
21494 emit_move_insn (out, addr);
21496 ix86_expand_strlensi_unroll_1 (out, src, align);
21498 /* strlensi_unroll_1 returns the address of the zero at the end of
21499 the string, like memchr(), so compute the length by subtracting
21500 the start address. */
21501 emit_insn (ix86_gen_sub3 (out, out, addr));
21507 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21508 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21511 scratch2 = gen_reg_rtx (Pmode);
21512 scratch3 = gen_reg_rtx (Pmode);
21513 scratch4 = force_reg (Pmode, constm1_rtx);
21515 emit_move_insn (scratch3, addr);
21516 eoschar = force_reg (QImode, eoschar);
21518 src = replace_equiv_address_nv (src, scratch3);
21520 /* If .md starts supporting :P, this can be done in .md. */
21521 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21522 scratch4), UNSPEC_SCAS);
21523 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21524 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21525 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21530 /* For given symbol (function) construct code to compute address of it's PLT
21531 entry in large x86-64 PIC model. */
21533 construct_plt_address (rtx symbol)
21535 rtx tmp = gen_reg_rtx (Pmode);
21536 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21538 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21539 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21541 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21542 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21547 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21549 rtx pop, bool sibcall)
21551 /* We need to represent that SI and DI registers are clobbered
21553 static int clobbered_registers[] = {
21554 XMM6_REG, XMM7_REG, XMM8_REG,
21555 XMM9_REG, XMM10_REG, XMM11_REG,
21556 XMM12_REG, XMM13_REG, XMM14_REG,
21557 XMM15_REG, SI_REG, DI_REG
21559 rtx vec[ARRAY_SIZE (clobbered_registers) + 3];
21560 rtx use = NULL, call;
21561 unsigned int vec_len;
21563 if (pop == const0_rtx)
21565 gcc_assert (!TARGET_64BIT || !pop);
21567 if (TARGET_MACHO && !TARGET_64BIT)
21570 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21571 fnaddr = machopic_indirect_call_target (fnaddr);
21576 /* Static functions and indirect calls don't need the pic register. */
21577 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21578 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21579 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21580 use_reg (&use, pic_offset_table_rtx);
21583 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21585 rtx al = gen_rtx_REG (QImode, AX_REG);
21586 emit_move_insn (al, callarg2);
21587 use_reg (&use, al);
21590 if (ix86_cmodel == CM_LARGE_PIC
21592 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21593 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
21594 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
21596 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
21597 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
21599 fnaddr = XEXP (fnaddr, 0);
21600 if (GET_MODE (fnaddr) != Pmode)
21601 fnaddr = convert_to_mode (Pmode, fnaddr, 1);
21602 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
21606 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
21608 call = gen_rtx_SET (VOIDmode, retval, call);
21609 vec[vec_len++] = call;
21613 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
21614 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
21615 vec[vec_len++] = pop;
21618 if (TARGET_64BIT_MS_ABI
21619 && (!callarg2 || INTVAL (callarg2) != -2))
21623 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
21624 UNSPEC_MS_TO_SYSV_CALL);
21626 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
21628 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
21630 gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i])
21632 clobbered_registers[i]));
21635 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
21636 if (TARGET_VZEROUPPER)
21639 if (cfun->machine->callee_pass_avx256_p)
21641 if (cfun->machine->callee_return_avx256_p)
21642 avx256 = callee_return_pass_avx256;
21644 avx256 = callee_pass_avx256;
21646 else if (cfun->machine->callee_return_avx256_p)
21647 avx256 = callee_return_avx256;
21649 avx256 = call_no_avx256;
21651 if (reload_completed)
21652 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
21654 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode,
21655 gen_rtvec (1, GEN_INT (avx256)),
21656 UNSPEC_CALL_NEEDS_VZEROUPPER);
21660 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
21661 call = emit_call_insn (call);
21663 CALL_INSN_FUNCTION_USAGE (call) = use;
21669 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
21671 rtx pat = PATTERN (insn);
21672 rtvec vec = XVEC (pat, 0);
21673 int len = GET_NUM_ELEM (vec) - 1;
21675 /* Strip off the last entry of the parallel. */
21676 gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC);
21677 gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER);
21679 pat = RTVEC_ELT (vec, 0);
21681 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0)));
21683 emit_insn (gen_avx_vzeroupper (vzeroupper));
21684 emit_call_insn (pat);
21687 /* Output the assembly for a call instruction. */
21690 ix86_output_call_insn (rtx insn, rtx call_op)
21692 bool direct_p = constant_call_address_operand (call_op, Pmode);
21693 bool seh_nop_p = false;
21696 if (SIBLING_CALL_P (insn))
21700 /* SEH epilogue detection requires the indirect branch case
21701 to include REX.W. */
21702 else if (TARGET_SEH)
21703 xasm = "rex.W jmp %A0";
21707 output_asm_insn (xasm, &call_op);
21711 /* SEH unwinding can require an extra nop to be emitted in several
21712 circumstances. Determine if we have one of those. */
21717 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
21719 /* If we get to another real insn, we don't need the nop. */
21723 /* If we get to the epilogue note, prevent a catch region from
21724 being adjacent to the standard epilogue sequence. If non-
21725 call-exceptions, we'll have done this during epilogue emission. */
21726 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
21727 && !flag_non_call_exceptions
21728 && !can_throw_internal (insn))
21735 /* If we didn't find a real insn following the call, prevent the
21736 unwinder from looking into the next function. */
21742 xasm = "call\t%P0";
21744 xasm = "call\t%A0";
21746 output_asm_insn (xasm, &call_op);
21754 /* Clear stack slot assignments remembered from previous functions.
21755 This is called from INIT_EXPANDERS once before RTL is emitted for each
21758 static struct machine_function *
21759 ix86_init_machine_status (void)
21761 struct machine_function *f;
21763 f = ggc_alloc_cleared_machine_function ();
21764 f->use_fast_prologue_epilogue_nregs = -1;
21765 f->tls_descriptor_call_expanded_p = 0;
21766 f->call_abi = ix86_abi;
21771 /* Return a MEM corresponding to a stack slot with mode MODE.
21772 Allocate a new slot if necessary.
21774 The RTL for a function can have several slots available: N is
21775 which slot to use. */
21778 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
21780 struct stack_local_entry *s;
21782 gcc_assert (n < MAX_386_STACK_LOCALS);
21784 /* Virtual slot is valid only before vregs are instantiated. */
21785 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
21787 for (s = ix86_stack_locals; s; s = s->next)
21788 if (s->mode == mode && s->n == n)
21789 return validize_mem (copy_rtx (s->rtl));
21791 s = ggc_alloc_stack_local_entry ();
21794 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
21796 s->next = ix86_stack_locals;
21797 ix86_stack_locals = s;
21798 return validize_mem (s->rtl);
21801 /* Calculate the length of the memory address in the instruction encoding.
21802 Includes addr32 prefix, does not include the one-byte modrm, opcode,
21803 or other prefixes. */
21806 memory_address_length (rtx addr)
21808 struct ix86_address parts;
21809 rtx base, index, disp;
21813 if (GET_CODE (addr) == PRE_DEC
21814 || GET_CODE (addr) == POST_INC
21815 || GET_CODE (addr) == PRE_MODIFY
21816 || GET_CODE (addr) == POST_MODIFY)
21819 ok = ix86_decompose_address (addr, &parts);
21822 if (parts.base && GET_CODE (parts.base) == SUBREG)
21823 parts.base = SUBREG_REG (parts.base);
21824 if (parts.index && GET_CODE (parts.index) == SUBREG)
21825 parts.index = SUBREG_REG (parts.index);
21828 index = parts.index;
21831 /* Add length of addr32 prefix. */
21832 len = (GET_CODE (addr) == ZERO_EXTEND
21833 || GET_CODE (addr) == AND);
21836 - esp as the base always wants an index,
21837 - ebp as the base always wants a displacement,
21838 - r12 as the base always wants an index,
21839 - r13 as the base always wants a displacement. */
21841 /* Register Indirect. */
21842 if (base && !index && !disp)
21844 /* esp (for its index) and ebp (for its displacement) need
21845 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
21848 && (addr == arg_pointer_rtx
21849 || addr == frame_pointer_rtx
21850 || REGNO (addr) == SP_REG
21851 || REGNO (addr) == BP_REG
21852 || REGNO (addr) == R12_REG
21853 || REGNO (addr) == R13_REG))
21857 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
21858 is not disp32, but disp32(%rip), so for disp32
21859 SIB byte is needed, unless print_operand_address
21860 optimizes it into disp32(%rip) or (%rip) is implied
21862 else if (disp && !base && !index)
21869 if (GET_CODE (disp) == CONST)
21870 symbol = XEXP (disp, 0);
21871 if (GET_CODE (symbol) == PLUS
21872 && CONST_INT_P (XEXP (symbol, 1)))
21873 symbol = XEXP (symbol, 0);
21875 if (GET_CODE (symbol) != LABEL_REF
21876 && (GET_CODE (symbol) != SYMBOL_REF
21877 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
21878 && (GET_CODE (symbol) != UNSPEC
21879 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
21880 && XINT (symbol, 1) != UNSPEC_PCREL
21881 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
21888 /* Find the length of the displacement constant. */
21891 if (base && satisfies_constraint_K (disp))
21896 /* ebp always wants a displacement. Similarly r13. */
21897 else if (base && REG_P (base)
21898 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
21901 /* An index requires the two-byte modrm form.... */
21903 /* ...like esp (or r12), which always wants an index. */
21904 || base == arg_pointer_rtx
21905 || base == frame_pointer_rtx
21906 || (base && REG_P (base)
21907 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
21924 /* Compute default value for "length_immediate" attribute. When SHORTFORM
21925 is set, expect that insn have 8bit immediate alternative. */
21927 ix86_attr_length_immediate_default (rtx insn, bool shortform)
21931 extract_insn_cached (insn);
21932 for (i = recog_data.n_operands - 1; i >= 0; --i)
21933 if (CONSTANT_P (recog_data.operand[i]))
21935 enum attr_mode mode = get_attr_mode (insn);
21938 if (shortform && CONST_INT_P (recog_data.operand[i]))
21940 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
21947 ival = trunc_int_for_mode (ival, HImode);
21950 ival = trunc_int_for_mode (ival, SImode);
21955 if (IN_RANGE (ival, -128, 127))
21972 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
21977 fatal_insn ("unknown insn mode", insn);
21982 /* Compute default value for "length_address" attribute. */
21984 ix86_attr_length_address_default (rtx insn)
21988 if (get_attr_type (insn) == TYPE_LEA)
21990 rtx set = PATTERN (insn), addr;
21992 if (GET_CODE (set) == PARALLEL)
21993 set = XVECEXP (set, 0, 0);
21995 gcc_assert (GET_CODE (set) == SET);
21997 addr = SET_SRC (set);
21998 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
22000 if (GET_CODE (addr) == ZERO_EXTEND)
22001 addr = XEXP (addr, 0);
22002 if (GET_CODE (addr) == SUBREG)
22003 addr = SUBREG_REG (addr);
22006 return memory_address_length (addr);
22009 extract_insn_cached (insn);
22010 for (i = recog_data.n_operands - 1; i >= 0; --i)
22011 if (MEM_P (recog_data.operand[i]))
22013 constrain_operands_cached (reload_completed);
22014 if (which_alternative != -1)
22016 const char *constraints = recog_data.constraints[i];
22017 int alt = which_alternative;
22019 while (*constraints == '=' || *constraints == '+')
22022 while (*constraints++ != ',')
22024 /* Skip ignored operands. */
22025 if (*constraints == 'X')
22028 return memory_address_length (XEXP (recog_data.operand[i], 0));
22033 /* Compute default value for "length_vex" attribute. It includes
22034 2 or 3 byte VEX prefix and 1 opcode byte. */
22037 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
22041 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
22042 byte VEX prefix. */
22043 if (!has_0f_opcode || has_vex_w)
22046 /* We can always use 2 byte VEX prefix in 32bit. */
22050 extract_insn_cached (insn);
22052 for (i = recog_data.n_operands - 1; i >= 0; --i)
22053 if (REG_P (recog_data.operand[i]))
22055 /* REX.W bit uses 3 byte VEX prefix. */
22056 if (GET_MODE (recog_data.operand[i]) == DImode
22057 && GENERAL_REG_P (recog_data.operand[i]))
22062 /* REX.X or REX.B bits use 3 byte VEX prefix. */
22063 if (MEM_P (recog_data.operand[i])
22064 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
22071 /* Return the maximum number of instructions a cpu can issue. */
22074 ix86_issue_rate (void)
22078 case PROCESSOR_PENTIUM:
22079 case PROCESSOR_ATOM:
22083 case PROCESSOR_PENTIUMPRO:
22084 case PROCESSOR_PENTIUM4:
22085 case PROCESSOR_CORE2_32:
22086 case PROCESSOR_CORE2_64:
22087 case PROCESSOR_COREI7_32:
22088 case PROCESSOR_COREI7_64:
22089 case PROCESSOR_ATHLON:
22091 case PROCESSOR_AMDFAM10:
22092 case PROCESSOR_NOCONA:
22093 case PROCESSOR_GENERIC32:
22094 case PROCESSOR_GENERIC64:
22095 case PROCESSOR_BDVER1:
22096 case PROCESSOR_BDVER2:
22097 case PROCESSOR_BTVER1:
22105 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
22106 by DEP_INSN and nothing set by DEP_INSN. */
22109 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22113 /* Simplify the test for uninteresting insns. */
22114 if (insn_type != TYPE_SETCC
22115 && insn_type != TYPE_ICMOV
22116 && insn_type != TYPE_FCMOV
22117 && insn_type != TYPE_IBR)
22120 if ((set = single_set (dep_insn)) != 0)
22122 set = SET_DEST (set);
22125 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22126 && XVECLEN (PATTERN (dep_insn), 0) == 2
22127 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22128 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22130 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22131 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22136 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22139 /* This test is true if the dependent insn reads the flags but
22140 not any other potentially set register. */
22141 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22144 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22150 /* Return true iff USE_INSN has a memory address with operands set by
22154 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22157 extract_insn_cached (use_insn);
22158 for (i = recog_data.n_operands - 1; i >= 0; --i)
22159 if (MEM_P (recog_data.operand[i]))
22161 rtx addr = XEXP (recog_data.operand[i], 0);
22162 return modified_in_p (addr, set_insn) != 0;
22168 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22170 enum attr_type insn_type, dep_insn_type;
22171 enum attr_memory memory;
22173 int dep_insn_code_number;
22175 /* Anti and output dependencies have zero cost on all CPUs. */
22176 if (REG_NOTE_KIND (link) != 0)
22179 dep_insn_code_number = recog_memoized (dep_insn);
22181 /* If we can't recognize the insns, we can't really do anything. */
22182 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22185 insn_type = get_attr_type (insn);
22186 dep_insn_type = get_attr_type (dep_insn);
22190 case PROCESSOR_PENTIUM:
22191 /* Address Generation Interlock adds a cycle of latency. */
22192 if (insn_type == TYPE_LEA)
22194 rtx addr = PATTERN (insn);
22196 if (GET_CODE (addr) == PARALLEL)
22197 addr = XVECEXP (addr, 0, 0);
22199 gcc_assert (GET_CODE (addr) == SET);
22201 addr = SET_SRC (addr);
22202 if (modified_in_p (addr, dep_insn))
22205 else if (ix86_agi_dependent (dep_insn, insn))
22208 /* ??? Compares pair with jump/setcc. */
22209 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22212 /* Floating point stores require value to be ready one cycle earlier. */
22213 if (insn_type == TYPE_FMOV
22214 && get_attr_memory (insn) == MEMORY_STORE
22215 && !ix86_agi_dependent (dep_insn, insn))
22219 case PROCESSOR_PENTIUMPRO:
22220 memory = get_attr_memory (insn);
22222 /* INT->FP conversion is expensive. */
22223 if (get_attr_fp_int_src (dep_insn))
22226 /* There is one cycle extra latency between an FP op and a store. */
22227 if (insn_type == TYPE_FMOV
22228 && (set = single_set (dep_insn)) != NULL_RTX
22229 && (set2 = single_set (insn)) != NULL_RTX
22230 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22231 && MEM_P (SET_DEST (set2)))
22234 /* Show ability of reorder buffer to hide latency of load by executing
22235 in parallel with previous instruction in case
22236 previous instruction is not needed to compute the address. */
22237 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22238 && !ix86_agi_dependent (dep_insn, insn))
22240 /* Claim moves to take one cycle, as core can issue one load
22241 at time and the next load can start cycle later. */
22242 if (dep_insn_type == TYPE_IMOV
22243 || dep_insn_type == TYPE_FMOV)
22251 memory = get_attr_memory (insn);
22253 /* The esp dependency is resolved before the instruction is really
22255 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22256 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22259 /* INT->FP conversion is expensive. */
22260 if (get_attr_fp_int_src (dep_insn))
22263 /* Show ability of reorder buffer to hide latency of load by executing
22264 in parallel with previous instruction in case
22265 previous instruction is not needed to compute the address. */
22266 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22267 && !ix86_agi_dependent (dep_insn, insn))
22269 /* Claim moves to take one cycle, as core can issue one load
22270 at time and the next load can start cycle later. */
22271 if (dep_insn_type == TYPE_IMOV
22272 || dep_insn_type == TYPE_FMOV)
22281 case PROCESSOR_ATHLON:
22283 case PROCESSOR_AMDFAM10:
22284 case PROCESSOR_BDVER1:
22285 case PROCESSOR_BDVER2:
22286 case PROCESSOR_BTVER1:
22287 case PROCESSOR_ATOM:
22288 case PROCESSOR_GENERIC32:
22289 case PROCESSOR_GENERIC64:
22290 memory = get_attr_memory (insn);
22292 /* Show ability of reorder buffer to hide latency of load by executing
22293 in parallel with previous instruction in case
22294 previous instruction is not needed to compute the address. */
22295 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22296 && !ix86_agi_dependent (dep_insn, insn))
22298 enum attr_unit unit = get_attr_unit (insn);
22301 /* Because of the difference between the length of integer and
22302 floating unit pipeline preparation stages, the memory operands
22303 for floating point are cheaper.
22305 ??? For Athlon it the difference is most probably 2. */
22306 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22309 loadcost = TARGET_ATHLON ? 2 : 0;
22311 if (cost >= loadcost)
22324 /* How many alternative schedules to try. This should be as wide as the
22325 scheduling freedom in the DFA, but no wider. Making this value too
22326 large results extra work for the scheduler. */
22329 ia32_multipass_dfa_lookahead (void)
22333 case PROCESSOR_PENTIUM:
22336 case PROCESSOR_PENTIUMPRO:
22340 case PROCESSOR_CORE2_32:
22341 case PROCESSOR_CORE2_64:
22342 case PROCESSOR_COREI7_32:
22343 case PROCESSOR_COREI7_64:
22344 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22345 as many instructions can be executed on a cycle, i.e.,
22346 issue_rate. I wonder why tuning for many CPUs does not do this. */
22347 return ix86_issue_rate ();
22356 /* Model decoder of Core 2/i7.
22357 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22358 track the instruction fetch block boundaries and make sure that long
22359 (9+ bytes) instructions are assigned to D0. */
22361 /* Maximum length of an insn that can be handled by
22362 a secondary decoder unit. '8' for Core 2/i7. */
22363 static int core2i7_secondary_decoder_max_insn_size;
22365 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22366 '16' for Core 2/i7. */
22367 static int core2i7_ifetch_block_size;
22369 /* Maximum number of instructions decoder can handle per cycle.
22370 '6' for Core 2/i7. */
22371 static int core2i7_ifetch_block_max_insns;
22373 typedef struct ix86_first_cycle_multipass_data_ *
22374 ix86_first_cycle_multipass_data_t;
22375 typedef const struct ix86_first_cycle_multipass_data_ *
22376 const_ix86_first_cycle_multipass_data_t;
22378 /* A variable to store target state across calls to max_issue within
22380 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22381 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22383 /* Initialize DATA. */
22385 core2i7_first_cycle_multipass_init (void *_data)
22387 ix86_first_cycle_multipass_data_t data
22388 = (ix86_first_cycle_multipass_data_t) _data;
22390 data->ifetch_block_len = 0;
22391 data->ifetch_block_n_insns = 0;
22392 data->ready_try_change = NULL;
22393 data->ready_try_change_size = 0;
22396 /* Advancing the cycle; reset ifetch block counts. */
22398 core2i7_dfa_post_advance_cycle (void)
22400 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22402 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22404 data->ifetch_block_len = 0;
22405 data->ifetch_block_n_insns = 0;
22408 static int min_insn_size (rtx);
22410 /* Filter out insns from ready_try that the core will not be able to issue
22411 on current cycle due to decoder. */
22413 core2i7_first_cycle_multipass_filter_ready_try
22414 (const_ix86_first_cycle_multipass_data_t data,
22415 char *ready_try, int n_ready, bool first_cycle_insn_p)
22422 if (ready_try[n_ready])
22425 insn = get_ready_element (n_ready);
22426 insn_size = min_insn_size (insn);
22428 if (/* If this is a too long an insn for a secondary decoder ... */
22429 (!first_cycle_insn_p
22430 && insn_size > core2i7_secondary_decoder_max_insn_size)
22431 /* ... or it would not fit into the ifetch block ... */
22432 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22433 /* ... or the decoder is full already ... */
22434 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22435 /* ... mask the insn out. */
22437 ready_try[n_ready] = 1;
22439 if (data->ready_try_change)
22440 SET_BIT (data->ready_try_change, n_ready);
22445 /* Prepare for a new round of multipass lookahead scheduling. */
22447 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22448 bool first_cycle_insn_p)
22450 ix86_first_cycle_multipass_data_t data
22451 = (ix86_first_cycle_multipass_data_t) _data;
22452 const_ix86_first_cycle_multipass_data_t prev_data
22453 = ix86_first_cycle_multipass_data;
22455 /* Restore the state from the end of the previous round. */
22456 data->ifetch_block_len = prev_data->ifetch_block_len;
22457 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22459 /* Filter instructions that cannot be issued on current cycle due to
22460 decoder restrictions. */
22461 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22462 first_cycle_insn_p);
22465 /* INSN is being issued in current solution. Account for its impact on
22466 the decoder model. */
22468 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22469 rtx insn, const void *_prev_data)
22471 ix86_first_cycle_multipass_data_t data
22472 = (ix86_first_cycle_multipass_data_t) _data;
22473 const_ix86_first_cycle_multipass_data_t prev_data
22474 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22476 int insn_size = min_insn_size (insn);
22478 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22479 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22480 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22481 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22483 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22484 if (!data->ready_try_change)
22486 data->ready_try_change = sbitmap_alloc (n_ready);
22487 data->ready_try_change_size = n_ready;
22489 else if (data->ready_try_change_size < n_ready)
22491 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22493 data->ready_try_change_size = n_ready;
22495 sbitmap_zero (data->ready_try_change);
22497 /* Filter out insns from ready_try that the core will not be able to issue
22498 on current cycle due to decoder. */
22499 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22503 /* Revert the effect on ready_try. */
22505 core2i7_first_cycle_multipass_backtrack (const void *_data,
22507 int n_ready ATTRIBUTE_UNUSED)
22509 const_ix86_first_cycle_multipass_data_t data
22510 = (const_ix86_first_cycle_multipass_data_t) _data;
22511 unsigned int i = 0;
22512 sbitmap_iterator sbi;
22514 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22515 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22521 /* Save the result of multipass lookahead scheduling for the next round. */
22523 core2i7_first_cycle_multipass_end (const void *_data)
22525 const_ix86_first_cycle_multipass_data_t data
22526 = (const_ix86_first_cycle_multipass_data_t) _data;
22527 ix86_first_cycle_multipass_data_t next_data
22528 = ix86_first_cycle_multipass_data;
22532 next_data->ifetch_block_len = data->ifetch_block_len;
22533 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22537 /* Deallocate target data. */
22539 core2i7_first_cycle_multipass_fini (void *_data)
22541 ix86_first_cycle_multipass_data_t data
22542 = (ix86_first_cycle_multipass_data_t) _data;
22544 if (data->ready_try_change)
22546 sbitmap_free (data->ready_try_change);
22547 data->ready_try_change = NULL;
22548 data->ready_try_change_size = 0;
22552 /* Prepare for scheduling pass. */
22554 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22555 int verbose ATTRIBUTE_UNUSED,
22556 int max_uid ATTRIBUTE_UNUSED)
22558 /* Install scheduling hooks for current CPU. Some of these hooks are used
22559 in time-critical parts of the scheduler, so we only set them up when
22560 they are actually used. */
22563 case PROCESSOR_CORE2_32:
22564 case PROCESSOR_CORE2_64:
22565 case PROCESSOR_COREI7_32:
22566 case PROCESSOR_COREI7_64:
22567 targetm.sched.dfa_post_advance_cycle
22568 = core2i7_dfa_post_advance_cycle;
22569 targetm.sched.first_cycle_multipass_init
22570 = core2i7_first_cycle_multipass_init;
22571 targetm.sched.first_cycle_multipass_begin
22572 = core2i7_first_cycle_multipass_begin;
22573 targetm.sched.first_cycle_multipass_issue
22574 = core2i7_first_cycle_multipass_issue;
22575 targetm.sched.first_cycle_multipass_backtrack
22576 = core2i7_first_cycle_multipass_backtrack;
22577 targetm.sched.first_cycle_multipass_end
22578 = core2i7_first_cycle_multipass_end;
22579 targetm.sched.first_cycle_multipass_fini
22580 = core2i7_first_cycle_multipass_fini;
22582 /* Set decoder parameters. */
22583 core2i7_secondary_decoder_max_insn_size = 8;
22584 core2i7_ifetch_block_size = 16;
22585 core2i7_ifetch_block_max_insns = 6;
22589 targetm.sched.dfa_post_advance_cycle = NULL;
22590 targetm.sched.first_cycle_multipass_init = NULL;
22591 targetm.sched.first_cycle_multipass_begin = NULL;
22592 targetm.sched.first_cycle_multipass_issue = NULL;
22593 targetm.sched.first_cycle_multipass_backtrack = NULL;
22594 targetm.sched.first_cycle_multipass_end = NULL;
22595 targetm.sched.first_cycle_multipass_fini = NULL;
22601 /* Compute the alignment given to a constant that is being placed in memory.
22602 EXP is the constant and ALIGN is the alignment that the object would
22604 The value of this function is used instead of that alignment to align
22608 ix86_constant_alignment (tree exp, int align)
22610 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
22611 || TREE_CODE (exp) == INTEGER_CST)
22613 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
22615 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
22618 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
22619 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
22620 return BITS_PER_WORD;
22625 /* Compute the alignment for a static variable.
22626 TYPE is the data type, and ALIGN is the alignment that
22627 the object would ordinarily have. The value of this function is used
22628 instead of that alignment to align the object. */
22631 ix86_data_alignment (tree type, int align)
22633 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
22635 if (AGGREGATE_TYPE_P (type)
22636 && TYPE_SIZE (type)
22637 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22638 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
22639 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
22640 && align < max_align)
22643 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22644 to 16byte boundary. */
22647 if (AGGREGATE_TYPE_P (type)
22648 && TYPE_SIZE (type)
22649 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22650 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
22651 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22655 if (TREE_CODE (type) == ARRAY_TYPE)
22657 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22659 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22662 else if (TREE_CODE (type) == COMPLEX_TYPE)
22665 if (TYPE_MODE (type) == DCmode && align < 64)
22667 if ((TYPE_MODE (type) == XCmode
22668 || TYPE_MODE (type) == TCmode) && align < 128)
22671 else if ((TREE_CODE (type) == RECORD_TYPE
22672 || TREE_CODE (type) == UNION_TYPE
22673 || TREE_CODE (type) == QUAL_UNION_TYPE)
22674 && TYPE_FIELDS (type))
22676 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22678 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22681 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22682 || TREE_CODE (type) == INTEGER_TYPE)
22684 if (TYPE_MODE (type) == DFmode && align < 64)
22686 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22693 /* Compute the alignment for a local variable or a stack slot. EXP is
22694 the data type or decl itself, MODE is the widest mode available and
22695 ALIGN is the alignment that the object would ordinarily have. The
22696 value of this macro is used instead of that alignment to align the
22700 ix86_local_alignment (tree exp, enum machine_mode mode,
22701 unsigned int align)
22705 if (exp && DECL_P (exp))
22707 type = TREE_TYPE (exp);
22716 /* Don't do dynamic stack realignment for long long objects with
22717 -mpreferred-stack-boundary=2. */
22720 && ix86_preferred_stack_boundary < 64
22721 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
22722 && (!type || !TYPE_USER_ALIGN (type))
22723 && (!decl || !DECL_USER_ALIGN (decl)))
22726 /* If TYPE is NULL, we are allocating a stack slot for caller-save
22727 register in MODE. We will return the largest alignment of XF
22731 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
22732 align = GET_MODE_ALIGNMENT (DFmode);
22736 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22737 to 16byte boundary. Exact wording is:
22739 An array uses the same alignment as its elements, except that a local or
22740 global array variable of length at least 16 bytes or
22741 a C99 variable-length array variable always has alignment of at least 16 bytes.
22743 This was added to allow use of aligned SSE instructions at arrays. This
22744 rule is meant for static storage (where compiler can not do the analysis
22745 by itself). We follow it for automatic variables only when convenient.
22746 We fully control everything in the function compiled and functions from
22747 other unit can not rely on the alignment.
22749 Exclude va_list type. It is the common case of local array where
22750 we can not benefit from the alignment. */
22751 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
22754 if (AGGREGATE_TYPE_P (type)
22755 && (va_list_type_node == NULL_TREE
22756 || (TYPE_MAIN_VARIANT (type)
22757 != TYPE_MAIN_VARIANT (va_list_type_node)))
22758 && TYPE_SIZE (type)
22759 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22760 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
22761 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22764 if (TREE_CODE (type) == ARRAY_TYPE)
22766 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22768 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22771 else if (TREE_CODE (type) == COMPLEX_TYPE)
22773 if (TYPE_MODE (type) == DCmode && align < 64)
22775 if ((TYPE_MODE (type) == XCmode
22776 || TYPE_MODE (type) == TCmode) && align < 128)
22779 else if ((TREE_CODE (type) == RECORD_TYPE
22780 || TREE_CODE (type) == UNION_TYPE
22781 || TREE_CODE (type) == QUAL_UNION_TYPE)
22782 && TYPE_FIELDS (type))
22784 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22786 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22789 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22790 || TREE_CODE (type) == INTEGER_TYPE)
22793 if (TYPE_MODE (type) == DFmode && align < 64)
22795 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22801 /* Compute the minimum required alignment for dynamic stack realignment
22802 purposes for a local variable, parameter or a stack slot. EXP is
22803 the data type or decl itself, MODE is its mode and ALIGN is the
22804 alignment that the object would ordinarily have. */
22807 ix86_minimum_alignment (tree exp, enum machine_mode mode,
22808 unsigned int align)
22812 if (exp && DECL_P (exp))
22814 type = TREE_TYPE (exp);
22823 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
22826 /* Don't do dynamic stack realignment for long long objects with
22827 -mpreferred-stack-boundary=2. */
22828 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
22829 && (!type || !TYPE_USER_ALIGN (type))
22830 && (!decl || !DECL_USER_ALIGN (decl)))
22836 /* Find a location for the static chain incoming to a nested function.
22837 This is a register, unless all free registers are used by arguments. */
22840 ix86_static_chain (const_tree fndecl, bool incoming_p)
22844 if (!DECL_STATIC_CHAIN (fndecl))
22849 /* We always use R10 in 64-bit mode. */
22857 /* By default in 32-bit mode we use ECX to pass the static chain. */
22860 fntype = TREE_TYPE (fndecl);
22861 ccvt = ix86_get_callcvt (fntype);
22862 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
22864 /* Fastcall functions use ecx/edx for arguments, which leaves
22865 us with EAX for the static chain.
22866 Thiscall functions use ecx for arguments, which also
22867 leaves us with EAX for the static chain. */
22870 else if (ix86_function_regparm (fntype, fndecl) == 3)
22872 /* For regparm 3, we have no free call-clobbered registers in
22873 which to store the static chain. In order to implement this,
22874 we have the trampoline push the static chain to the stack.
22875 However, we can't push a value below the return address when
22876 we call the nested function directly, so we have to use an
22877 alternate entry point. For this we use ESI, and have the
22878 alternate entry point push ESI, so that things appear the
22879 same once we're executing the nested function. */
22882 if (fndecl == current_function_decl)
22883 ix86_static_chain_on_stack = true;
22884 return gen_frame_mem (SImode,
22885 plus_constant (arg_pointer_rtx, -8));
22891 return gen_rtx_REG (Pmode, regno);
22894 /* Emit RTL insns to initialize the variable parts of a trampoline.
22895 FNDECL is the decl of the target address; M_TRAMP is a MEM for
22896 the trampoline, and CHAIN_VALUE is an RTX for the static chain
22897 to be passed to the target function. */
22900 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
22906 fnaddr = XEXP (DECL_RTL (fndecl), 0);
22912 /* Load the function address to r11. Try to load address using
22913 the shorter movl instead of movabs. We may want to support
22914 movq for kernel mode, but kernel does not use trampolines at
22916 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
22918 fnaddr = copy_to_mode_reg (DImode, fnaddr);
22920 mem = adjust_address (m_tramp, HImode, offset);
22921 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
22923 mem = adjust_address (m_tramp, SImode, offset + 2);
22924 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
22929 mem = adjust_address (m_tramp, HImode, offset);
22930 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
22932 mem = adjust_address (m_tramp, DImode, offset + 2);
22933 emit_move_insn (mem, fnaddr);
22937 /* Load static chain using movabs to r10. Use the
22938 shorter movl instead of movabs for x32. */
22950 mem = adjust_address (m_tramp, HImode, offset);
22951 emit_move_insn (mem, gen_int_mode (opcode, HImode));
22953 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
22954 emit_move_insn (mem, chain_value);
22957 /* Jump to r11; the last (unused) byte is a nop, only there to
22958 pad the write out to a single 32-bit store. */
22959 mem = adjust_address (m_tramp, SImode, offset);
22960 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
22967 /* Depending on the static chain location, either load a register
22968 with a constant, or push the constant to the stack. All of the
22969 instructions are the same size. */
22970 chain = ix86_static_chain (fndecl, true);
22973 switch (REGNO (chain))
22976 opcode = 0xb8; break;
22978 opcode = 0xb9; break;
22980 gcc_unreachable ();
22986 mem = adjust_address (m_tramp, QImode, offset);
22987 emit_move_insn (mem, gen_int_mode (opcode, QImode));
22989 mem = adjust_address (m_tramp, SImode, offset + 1);
22990 emit_move_insn (mem, chain_value);
22993 mem = adjust_address (m_tramp, QImode, offset);
22994 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
22996 mem = adjust_address (m_tramp, SImode, offset + 1);
22998 /* Compute offset from the end of the jmp to the target function.
22999 In the case in which the trampoline stores the static chain on
23000 the stack, we need to skip the first insn which pushes the
23001 (call-saved) register static chain; this push is 1 byte. */
23003 disp = expand_binop (SImode, sub_optab, fnaddr,
23004 plus_constant (XEXP (m_tramp, 0),
23005 offset - (MEM_P (chain) ? 1 : 0)),
23006 NULL_RTX, 1, OPTAB_DIRECT);
23007 emit_move_insn (mem, disp);
23010 gcc_assert (offset <= TRAMPOLINE_SIZE);
23012 #ifdef HAVE_ENABLE_EXECUTE_STACK
23013 #ifdef CHECK_EXECUTE_STACK_ENABLED
23014 if (CHECK_EXECUTE_STACK_ENABLED)
23016 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
23017 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
23021 /* The following file contains several enumerations and data structures
23022 built from the definitions in i386-builtin-types.def. */
23024 #include "i386-builtin-types.inc"
23026 /* Table for the ix86 builtin non-function types. */
23027 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
23029 /* Retrieve an element from the above table, building some of
23030 the types lazily. */
23033 ix86_get_builtin_type (enum ix86_builtin_type tcode)
23035 unsigned int index;
23038 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
23040 type = ix86_builtin_type_tab[(int) tcode];
23044 gcc_assert (tcode > IX86_BT_LAST_PRIM);
23045 if (tcode <= IX86_BT_LAST_VECT)
23047 enum machine_mode mode;
23049 index = tcode - IX86_BT_LAST_PRIM - 1;
23050 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
23051 mode = ix86_builtin_type_vect_mode[index];
23053 type = build_vector_type_for_mode (itype, mode);
23059 index = tcode - IX86_BT_LAST_VECT - 1;
23060 if (tcode <= IX86_BT_LAST_PTR)
23061 quals = TYPE_UNQUALIFIED;
23063 quals = TYPE_QUAL_CONST;
23065 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
23066 if (quals != TYPE_UNQUALIFIED)
23067 itype = build_qualified_type (itype, quals);
23069 type = build_pointer_type (itype);
23072 ix86_builtin_type_tab[(int) tcode] = type;
23076 /* Table for the ix86 builtin function types. */
23077 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23079 /* Retrieve an element from the above table, building some of
23080 the types lazily. */
23083 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23087 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23089 type = ix86_builtin_func_type_tab[(int) tcode];
23093 if (tcode <= IX86_BT_LAST_FUNC)
23095 unsigned start = ix86_builtin_func_start[(int) tcode];
23096 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23097 tree rtype, atype, args = void_list_node;
23100 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23101 for (i = after - 1; i > start; --i)
23103 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23104 args = tree_cons (NULL, atype, args);
23107 type = build_function_type (rtype, args);
23111 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23112 enum ix86_builtin_func_type icode;
23114 icode = ix86_builtin_func_alias_base[index];
23115 type = ix86_get_builtin_func_type (icode);
23118 ix86_builtin_func_type_tab[(int) tcode] = type;
23123 /* Codes for all the SSE/MMX builtins. */
23126 IX86_BUILTIN_ADDPS,
23127 IX86_BUILTIN_ADDSS,
23128 IX86_BUILTIN_DIVPS,
23129 IX86_BUILTIN_DIVSS,
23130 IX86_BUILTIN_MULPS,
23131 IX86_BUILTIN_MULSS,
23132 IX86_BUILTIN_SUBPS,
23133 IX86_BUILTIN_SUBSS,
23135 IX86_BUILTIN_CMPEQPS,
23136 IX86_BUILTIN_CMPLTPS,
23137 IX86_BUILTIN_CMPLEPS,
23138 IX86_BUILTIN_CMPGTPS,
23139 IX86_BUILTIN_CMPGEPS,
23140 IX86_BUILTIN_CMPNEQPS,
23141 IX86_BUILTIN_CMPNLTPS,
23142 IX86_BUILTIN_CMPNLEPS,
23143 IX86_BUILTIN_CMPNGTPS,
23144 IX86_BUILTIN_CMPNGEPS,
23145 IX86_BUILTIN_CMPORDPS,
23146 IX86_BUILTIN_CMPUNORDPS,
23147 IX86_BUILTIN_CMPEQSS,
23148 IX86_BUILTIN_CMPLTSS,
23149 IX86_BUILTIN_CMPLESS,
23150 IX86_BUILTIN_CMPNEQSS,
23151 IX86_BUILTIN_CMPNLTSS,
23152 IX86_BUILTIN_CMPNLESS,
23153 IX86_BUILTIN_CMPNGTSS,
23154 IX86_BUILTIN_CMPNGESS,
23155 IX86_BUILTIN_CMPORDSS,
23156 IX86_BUILTIN_CMPUNORDSS,
23158 IX86_BUILTIN_COMIEQSS,
23159 IX86_BUILTIN_COMILTSS,
23160 IX86_BUILTIN_COMILESS,
23161 IX86_BUILTIN_COMIGTSS,
23162 IX86_BUILTIN_COMIGESS,
23163 IX86_BUILTIN_COMINEQSS,
23164 IX86_BUILTIN_UCOMIEQSS,
23165 IX86_BUILTIN_UCOMILTSS,
23166 IX86_BUILTIN_UCOMILESS,
23167 IX86_BUILTIN_UCOMIGTSS,
23168 IX86_BUILTIN_UCOMIGESS,
23169 IX86_BUILTIN_UCOMINEQSS,
23171 IX86_BUILTIN_CVTPI2PS,
23172 IX86_BUILTIN_CVTPS2PI,
23173 IX86_BUILTIN_CVTSI2SS,
23174 IX86_BUILTIN_CVTSI642SS,
23175 IX86_BUILTIN_CVTSS2SI,
23176 IX86_BUILTIN_CVTSS2SI64,
23177 IX86_BUILTIN_CVTTPS2PI,
23178 IX86_BUILTIN_CVTTSS2SI,
23179 IX86_BUILTIN_CVTTSS2SI64,
23181 IX86_BUILTIN_MAXPS,
23182 IX86_BUILTIN_MAXSS,
23183 IX86_BUILTIN_MINPS,
23184 IX86_BUILTIN_MINSS,
23186 IX86_BUILTIN_LOADUPS,
23187 IX86_BUILTIN_STOREUPS,
23188 IX86_BUILTIN_MOVSS,
23190 IX86_BUILTIN_MOVHLPS,
23191 IX86_BUILTIN_MOVLHPS,
23192 IX86_BUILTIN_LOADHPS,
23193 IX86_BUILTIN_LOADLPS,
23194 IX86_BUILTIN_STOREHPS,
23195 IX86_BUILTIN_STORELPS,
23197 IX86_BUILTIN_MASKMOVQ,
23198 IX86_BUILTIN_MOVMSKPS,
23199 IX86_BUILTIN_PMOVMSKB,
23201 IX86_BUILTIN_MOVNTPS,
23202 IX86_BUILTIN_MOVNTQ,
23204 IX86_BUILTIN_LOADDQU,
23205 IX86_BUILTIN_STOREDQU,
23207 IX86_BUILTIN_PACKSSWB,
23208 IX86_BUILTIN_PACKSSDW,
23209 IX86_BUILTIN_PACKUSWB,
23211 IX86_BUILTIN_PADDB,
23212 IX86_BUILTIN_PADDW,
23213 IX86_BUILTIN_PADDD,
23214 IX86_BUILTIN_PADDQ,
23215 IX86_BUILTIN_PADDSB,
23216 IX86_BUILTIN_PADDSW,
23217 IX86_BUILTIN_PADDUSB,
23218 IX86_BUILTIN_PADDUSW,
23219 IX86_BUILTIN_PSUBB,
23220 IX86_BUILTIN_PSUBW,
23221 IX86_BUILTIN_PSUBD,
23222 IX86_BUILTIN_PSUBQ,
23223 IX86_BUILTIN_PSUBSB,
23224 IX86_BUILTIN_PSUBSW,
23225 IX86_BUILTIN_PSUBUSB,
23226 IX86_BUILTIN_PSUBUSW,
23229 IX86_BUILTIN_PANDN,
23233 IX86_BUILTIN_PAVGB,
23234 IX86_BUILTIN_PAVGW,
23236 IX86_BUILTIN_PCMPEQB,
23237 IX86_BUILTIN_PCMPEQW,
23238 IX86_BUILTIN_PCMPEQD,
23239 IX86_BUILTIN_PCMPGTB,
23240 IX86_BUILTIN_PCMPGTW,
23241 IX86_BUILTIN_PCMPGTD,
23243 IX86_BUILTIN_PMADDWD,
23245 IX86_BUILTIN_PMAXSW,
23246 IX86_BUILTIN_PMAXUB,
23247 IX86_BUILTIN_PMINSW,
23248 IX86_BUILTIN_PMINUB,
23250 IX86_BUILTIN_PMULHUW,
23251 IX86_BUILTIN_PMULHW,
23252 IX86_BUILTIN_PMULLW,
23254 IX86_BUILTIN_PSADBW,
23255 IX86_BUILTIN_PSHUFW,
23257 IX86_BUILTIN_PSLLW,
23258 IX86_BUILTIN_PSLLD,
23259 IX86_BUILTIN_PSLLQ,
23260 IX86_BUILTIN_PSRAW,
23261 IX86_BUILTIN_PSRAD,
23262 IX86_BUILTIN_PSRLW,
23263 IX86_BUILTIN_PSRLD,
23264 IX86_BUILTIN_PSRLQ,
23265 IX86_BUILTIN_PSLLWI,
23266 IX86_BUILTIN_PSLLDI,
23267 IX86_BUILTIN_PSLLQI,
23268 IX86_BUILTIN_PSRAWI,
23269 IX86_BUILTIN_PSRADI,
23270 IX86_BUILTIN_PSRLWI,
23271 IX86_BUILTIN_PSRLDI,
23272 IX86_BUILTIN_PSRLQI,
23274 IX86_BUILTIN_PUNPCKHBW,
23275 IX86_BUILTIN_PUNPCKHWD,
23276 IX86_BUILTIN_PUNPCKHDQ,
23277 IX86_BUILTIN_PUNPCKLBW,
23278 IX86_BUILTIN_PUNPCKLWD,
23279 IX86_BUILTIN_PUNPCKLDQ,
23281 IX86_BUILTIN_SHUFPS,
23283 IX86_BUILTIN_RCPPS,
23284 IX86_BUILTIN_RCPSS,
23285 IX86_BUILTIN_RSQRTPS,
23286 IX86_BUILTIN_RSQRTPS_NR,
23287 IX86_BUILTIN_RSQRTSS,
23288 IX86_BUILTIN_RSQRTF,
23289 IX86_BUILTIN_SQRTPS,
23290 IX86_BUILTIN_SQRTPS_NR,
23291 IX86_BUILTIN_SQRTSS,
23293 IX86_BUILTIN_UNPCKHPS,
23294 IX86_BUILTIN_UNPCKLPS,
23296 IX86_BUILTIN_ANDPS,
23297 IX86_BUILTIN_ANDNPS,
23299 IX86_BUILTIN_XORPS,
23302 IX86_BUILTIN_LDMXCSR,
23303 IX86_BUILTIN_STMXCSR,
23304 IX86_BUILTIN_SFENCE,
23306 /* 3DNow! Original */
23307 IX86_BUILTIN_FEMMS,
23308 IX86_BUILTIN_PAVGUSB,
23309 IX86_BUILTIN_PF2ID,
23310 IX86_BUILTIN_PFACC,
23311 IX86_BUILTIN_PFADD,
23312 IX86_BUILTIN_PFCMPEQ,
23313 IX86_BUILTIN_PFCMPGE,
23314 IX86_BUILTIN_PFCMPGT,
23315 IX86_BUILTIN_PFMAX,
23316 IX86_BUILTIN_PFMIN,
23317 IX86_BUILTIN_PFMUL,
23318 IX86_BUILTIN_PFRCP,
23319 IX86_BUILTIN_PFRCPIT1,
23320 IX86_BUILTIN_PFRCPIT2,
23321 IX86_BUILTIN_PFRSQIT1,
23322 IX86_BUILTIN_PFRSQRT,
23323 IX86_BUILTIN_PFSUB,
23324 IX86_BUILTIN_PFSUBR,
23325 IX86_BUILTIN_PI2FD,
23326 IX86_BUILTIN_PMULHRW,
23328 /* 3DNow! Athlon Extensions */
23329 IX86_BUILTIN_PF2IW,
23330 IX86_BUILTIN_PFNACC,
23331 IX86_BUILTIN_PFPNACC,
23332 IX86_BUILTIN_PI2FW,
23333 IX86_BUILTIN_PSWAPDSI,
23334 IX86_BUILTIN_PSWAPDSF,
23337 IX86_BUILTIN_ADDPD,
23338 IX86_BUILTIN_ADDSD,
23339 IX86_BUILTIN_DIVPD,
23340 IX86_BUILTIN_DIVSD,
23341 IX86_BUILTIN_MULPD,
23342 IX86_BUILTIN_MULSD,
23343 IX86_BUILTIN_SUBPD,
23344 IX86_BUILTIN_SUBSD,
23346 IX86_BUILTIN_CMPEQPD,
23347 IX86_BUILTIN_CMPLTPD,
23348 IX86_BUILTIN_CMPLEPD,
23349 IX86_BUILTIN_CMPGTPD,
23350 IX86_BUILTIN_CMPGEPD,
23351 IX86_BUILTIN_CMPNEQPD,
23352 IX86_BUILTIN_CMPNLTPD,
23353 IX86_BUILTIN_CMPNLEPD,
23354 IX86_BUILTIN_CMPNGTPD,
23355 IX86_BUILTIN_CMPNGEPD,
23356 IX86_BUILTIN_CMPORDPD,
23357 IX86_BUILTIN_CMPUNORDPD,
23358 IX86_BUILTIN_CMPEQSD,
23359 IX86_BUILTIN_CMPLTSD,
23360 IX86_BUILTIN_CMPLESD,
23361 IX86_BUILTIN_CMPNEQSD,
23362 IX86_BUILTIN_CMPNLTSD,
23363 IX86_BUILTIN_CMPNLESD,
23364 IX86_BUILTIN_CMPORDSD,
23365 IX86_BUILTIN_CMPUNORDSD,
23367 IX86_BUILTIN_COMIEQSD,
23368 IX86_BUILTIN_COMILTSD,
23369 IX86_BUILTIN_COMILESD,
23370 IX86_BUILTIN_COMIGTSD,
23371 IX86_BUILTIN_COMIGESD,
23372 IX86_BUILTIN_COMINEQSD,
23373 IX86_BUILTIN_UCOMIEQSD,
23374 IX86_BUILTIN_UCOMILTSD,
23375 IX86_BUILTIN_UCOMILESD,
23376 IX86_BUILTIN_UCOMIGTSD,
23377 IX86_BUILTIN_UCOMIGESD,
23378 IX86_BUILTIN_UCOMINEQSD,
23380 IX86_BUILTIN_MAXPD,
23381 IX86_BUILTIN_MAXSD,
23382 IX86_BUILTIN_MINPD,
23383 IX86_BUILTIN_MINSD,
23385 IX86_BUILTIN_ANDPD,
23386 IX86_BUILTIN_ANDNPD,
23388 IX86_BUILTIN_XORPD,
23390 IX86_BUILTIN_SQRTPD,
23391 IX86_BUILTIN_SQRTSD,
23393 IX86_BUILTIN_UNPCKHPD,
23394 IX86_BUILTIN_UNPCKLPD,
23396 IX86_BUILTIN_SHUFPD,
23398 IX86_BUILTIN_LOADUPD,
23399 IX86_BUILTIN_STOREUPD,
23400 IX86_BUILTIN_MOVSD,
23402 IX86_BUILTIN_LOADHPD,
23403 IX86_BUILTIN_LOADLPD,
23405 IX86_BUILTIN_CVTDQ2PD,
23406 IX86_BUILTIN_CVTDQ2PS,
23408 IX86_BUILTIN_CVTPD2DQ,
23409 IX86_BUILTIN_CVTPD2PI,
23410 IX86_BUILTIN_CVTPD2PS,
23411 IX86_BUILTIN_CVTTPD2DQ,
23412 IX86_BUILTIN_CVTTPD2PI,
23414 IX86_BUILTIN_CVTPI2PD,
23415 IX86_BUILTIN_CVTSI2SD,
23416 IX86_BUILTIN_CVTSI642SD,
23418 IX86_BUILTIN_CVTSD2SI,
23419 IX86_BUILTIN_CVTSD2SI64,
23420 IX86_BUILTIN_CVTSD2SS,
23421 IX86_BUILTIN_CVTSS2SD,
23422 IX86_BUILTIN_CVTTSD2SI,
23423 IX86_BUILTIN_CVTTSD2SI64,
23425 IX86_BUILTIN_CVTPS2DQ,
23426 IX86_BUILTIN_CVTPS2PD,
23427 IX86_BUILTIN_CVTTPS2DQ,
23429 IX86_BUILTIN_MOVNTI,
23430 IX86_BUILTIN_MOVNTPD,
23431 IX86_BUILTIN_MOVNTDQ,
23433 IX86_BUILTIN_MOVQ128,
23436 IX86_BUILTIN_MASKMOVDQU,
23437 IX86_BUILTIN_MOVMSKPD,
23438 IX86_BUILTIN_PMOVMSKB128,
23440 IX86_BUILTIN_PACKSSWB128,
23441 IX86_BUILTIN_PACKSSDW128,
23442 IX86_BUILTIN_PACKUSWB128,
23444 IX86_BUILTIN_PADDB128,
23445 IX86_BUILTIN_PADDW128,
23446 IX86_BUILTIN_PADDD128,
23447 IX86_BUILTIN_PADDQ128,
23448 IX86_BUILTIN_PADDSB128,
23449 IX86_BUILTIN_PADDSW128,
23450 IX86_BUILTIN_PADDUSB128,
23451 IX86_BUILTIN_PADDUSW128,
23452 IX86_BUILTIN_PSUBB128,
23453 IX86_BUILTIN_PSUBW128,
23454 IX86_BUILTIN_PSUBD128,
23455 IX86_BUILTIN_PSUBQ128,
23456 IX86_BUILTIN_PSUBSB128,
23457 IX86_BUILTIN_PSUBSW128,
23458 IX86_BUILTIN_PSUBUSB128,
23459 IX86_BUILTIN_PSUBUSW128,
23461 IX86_BUILTIN_PAND128,
23462 IX86_BUILTIN_PANDN128,
23463 IX86_BUILTIN_POR128,
23464 IX86_BUILTIN_PXOR128,
23466 IX86_BUILTIN_PAVGB128,
23467 IX86_BUILTIN_PAVGW128,
23469 IX86_BUILTIN_PCMPEQB128,
23470 IX86_BUILTIN_PCMPEQW128,
23471 IX86_BUILTIN_PCMPEQD128,
23472 IX86_BUILTIN_PCMPGTB128,
23473 IX86_BUILTIN_PCMPGTW128,
23474 IX86_BUILTIN_PCMPGTD128,
23476 IX86_BUILTIN_PMADDWD128,
23478 IX86_BUILTIN_PMAXSW128,
23479 IX86_BUILTIN_PMAXUB128,
23480 IX86_BUILTIN_PMINSW128,
23481 IX86_BUILTIN_PMINUB128,
23483 IX86_BUILTIN_PMULUDQ,
23484 IX86_BUILTIN_PMULUDQ128,
23485 IX86_BUILTIN_PMULHUW128,
23486 IX86_BUILTIN_PMULHW128,
23487 IX86_BUILTIN_PMULLW128,
23489 IX86_BUILTIN_PSADBW128,
23490 IX86_BUILTIN_PSHUFHW,
23491 IX86_BUILTIN_PSHUFLW,
23492 IX86_BUILTIN_PSHUFD,
23494 IX86_BUILTIN_PSLLDQI128,
23495 IX86_BUILTIN_PSLLWI128,
23496 IX86_BUILTIN_PSLLDI128,
23497 IX86_BUILTIN_PSLLQI128,
23498 IX86_BUILTIN_PSRAWI128,
23499 IX86_BUILTIN_PSRADI128,
23500 IX86_BUILTIN_PSRLDQI128,
23501 IX86_BUILTIN_PSRLWI128,
23502 IX86_BUILTIN_PSRLDI128,
23503 IX86_BUILTIN_PSRLQI128,
23505 IX86_BUILTIN_PSLLDQ128,
23506 IX86_BUILTIN_PSLLW128,
23507 IX86_BUILTIN_PSLLD128,
23508 IX86_BUILTIN_PSLLQ128,
23509 IX86_BUILTIN_PSRAW128,
23510 IX86_BUILTIN_PSRAD128,
23511 IX86_BUILTIN_PSRLW128,
23512 IX86_BUILTIN_PSRLD128,
23513 IX86_BUILTIN_PSRLQ128,
23515 IX86_BUILTIN_PUNPCKHBW128,
23516 IX86_BUILTIN_PUNPCKHWD128,
23517 IX86_BUILTIN_PUNPCKHDQ128,
23518 IX86_BUILTIN_PUNPCKHQDQ128,
23519 IX86_BUILTIN_PUNPCKLBW128,
23520 IX86_BUILTIN_PUNPCKLWD128,
23521 IX86_BUILTIN_PUNPCKLDQ128,
23522 IX86_BUILTIN_PUNPCKLQDQ128,
23524 IX86_BUILTIN_CLFLUSH,
23525 IX86_BUILTIN_MFENCE,
23526 IX86_BUILTIN_LFENCE,
23527 IX86_BUILTIN_PAUSE,
23529 IX86_BUILTIN_BSRSI,
23530 IX86_BUILTIN_BSRDI,
23531 IX86_BUILTIN_RDPMC,
23532 IX86_BUILTIN_RDTSC,
23533 IX86_BUILTIN_RDTSCP,
23534 IX86_BUILTIN_ROLQI,
23535 IX86_BUILTIN_ROLHI,
23536 IX86_BUILTIN_RORQI,
23537 IX86_BUILTIN_RORHI,
23540 IX86_BUILTIN_ADDSUBPS,
23541 IX86_BUILTIN_HADDPS,
23542 IX86_BUILTIN_HSUBPS,
23543 IX86_BUILTIN_MOVSHDUP,
23544 IX86_BUILTIN_MOVSLDUP,
23545 IX86_BUILTIN_ADDSUBPD,
23546 IX86_BUILTIN_HADDPD,
23547 IX86_BUILTIN_HSUBPD,
23548 IX86_BUILTIN_LDDQU,
23550 IX86_BUILTIN_MONITOR,
23551 IX86_BUILTIN_MWAIT,
23554 IX86_BUILTIN_PHADDW,
23555 IX86_BUILTIN_PHADDD,
23556 IX86_BUILTIN_PHADDSW,
23557 IX86_BUILTIN_PHSUBW,
23558 IX86_BUILTIN_PHSUBD,
23559 IX86_BUILTIN_PHSUBSW,
23560 IX86_BUILTIN_PMADDUBSW,
23561 IX86_BUILTIN_PMULHRSW,
23562 IX86_BUILTIN_PSHUFB,
23563 IX86_BUILTIN_PSIGNB,
23564 IX86_BUILTIN_PSIGNW,
23565 IX86_BUILTIN_PSIGND,
23566 IX86_BUILTIN_PALIGNR,
23567 IX86_BUILTIN_PABSB,
23568 IX86_BUILTIN_PABSW,
23569 IX86_BUILTIN_PABSD,
23571 IX86_BUILTIN_PHADDW128,
23572 IX86_BUILTIN_PHADDD128,
23573 IX86_BUILTIN_PHADDSW128,
23574 IX86_BUILTIN_PHSUBW128,
23575 IX86_BUILTIN_PHSUBD128,
23576 IX86_BUILTIN_PHSUBSW128,
23577 IX86_BUILTIN_PMADDUBSW128,
23578 IX86_BUILTIN_PMULHRSW128,
23579 IX86_BUILTIN_PSHUFB128,
23580 IX86_BUILTIN_PSIGNB128,
23581 IX86_BUILTIN_PSIGNW128,
23582 IX86_BUILTIN_PSIGND128,
23583 IX86_BUILTIN_PALIGNR128,
23584 IX86_BUILTIN_PABSB128,
23585 IX86_BUILTIN_PABSW128,
23586 IX86_BUILTIN_PABSD128,
23588 /* AMDFAM10 - SSE4A New Instructions. */
23589 IX86_BUILTIN_MOVNTSD,
23590 IX86_BUILTIN_MOVNTSS,
23591 IX86_BUILTIN_EXTRQI,
23592 IX86_BUILTIN_EXTRQ,
23593 IX86_BUILTIN_INSERTQI,
23594 IX86_BUILTIN_INSERTQ,
23597 IX86_BUILTIN_BLENDPD,
23598 IX86_BUILTIN_BLENDPS,
23599 IX86_BUILTIN_BLENDVPD,
23600 IX86_BUILTIN_BLENDVPS,
23601 IX86_BUILTIN_PBLENDVB128,
23602 IX86_BUILTIN_PBLENDW128,
23607 IX86_BUILTIN_INSERTPS128,
23609 IX86_BUILTIN_MOVNTDQA,
23610 IX86_BUILTIN_MPSADBW128,
23611 IX86_BUILTIN_PACKUSDW128,
23612 IX86_BUILTIN_PCMPEQQ,
23613 IX86_BUILTIN_PHMINPOSUW128,
23615 IX86_BUILTIN_PMAXSB128,
23616 IX86_BUILTIN_PMAXSD128,
23617 IX86_BUILTIN_PMAXUD128,
23618 IX86_BUILTIN_PMAXUW128,
23620 IX86_BUILTIN_PMINSB128,
23621 IX86_BUILTIN_PMINSD128,
23622 IX86_BUILTIN_PMINUD128,
23623 IX86_BUILTIN_PMINUW128,
23625 IX86_BUILTIN_PMOVSXBW128,
23626 IX86_BUILTIN_PMOVSXBD128,
23627 IX86_BUILTIN_PMOVSXBQ128,
23628 IX86_BUILTIN_PMOVSXWD128,
23629 IX86_BUILTIN_PMOVSXWQ128,
23630 IX86_BUILTIN_PMOVSXDQ128,
23632 IX86_BUILTIN_PMOVZXBW128,
23633 IX86_BUILTIN_PMOVZXBD128,
23634 IX86_BUILTIN_PMOVZXBQ128,
23635 IX86_BUILTIN_PMOVZXWD128,
23636 IX86_BUILTIN_PMOVZXWQ128,
23637 IX86_BUILTIN_PMOVZXDQ128,
23639 IX86_BUILTIN_PMULDQ128,
23640 IX86_BUILTIN_PMULLD128,
23642 IX86_BUILTIN_ROUNDPD,
23643 IX86_BUILTIN_ROUNDPS,
23644 IX86_BUILTIN_ROUNDSD,
23645 IX86_BUILTIN_ROUNDSS,
23647 IX86_BUILTIN_FLOORPD,
23648 IX86_BUILTIN_CEILPD,
23649 IX86_BUILTIN_TRUNCPD,
23650 IX86_BUILTIN_RINTPD,
23651 IX86_BUILTIN_FLOORPS,
23652 IX86_BUILTIN_CEILPS,
23653 IX86_BUILTIN_TRUNCPS,
23654 IX86_BUILTIN_RINTPS,
23656 IX86_BUILTIN_PTESTZ,
23657 IX86_BUILTIN_PTESTC,
23658 IX86_BUILTIN_PTESTNZC,
23660 IX86_BUILTIN_VEC_INIT_V2SI,
23661 IX86_BUILTIN_VEC_INIT_V4HI,
23662 IX86_BUILTIN_VEC_INIT_V8QI,
23663 IX86_BUILTIN_VEC_EXT_V2DF,
23664 IX86_BUILTIN_VEC_EXT_V2DI,
23665 IX86_BUILTIN_VEC_EXT_V4SF,
23666 IX86_BUILTIN_VEC_EXT_V4SI,
23667 IX86_BUILTIN_VEC_EXT_V8HI,
23668 IX86_BUILTIN_VEC_EXT_V2SI,
23669 IX86_BUILTIN_VEC_EXT_V4HI,
23670 IX86_BUILTIN_VEC_EXT_V16QI,
23671 IX86_BUILTIN_VEC_SET_V2DI,
23672 IX86_BUILTIN_VEC_SET_V4SF,
23673 IX86_BUILTIN_VEC_SET_V4SI,
23674 IX86_BUILTIN_VEC_SET_V8HI,
23675 IX86_BUILTIN_VEC_SET_V4HI,
23676 IX86_BUILTIN_VEC_SET_V16QI,
23678 IX86_BUILTIN_VEC_PACK_SFIX,
23681 IX86_BUILTIN_CRC32QI,
23682 IX86_BUILTIN_CRC32HI,
23683 IX86_BUILTIN_CRC32SI,
23684 IX86_BUILTIN_CRC32DI,
23686 IX86_BUILTIN_PCMPESTRI128,
23687 IX86_BUILTIN_PCMPESTRM128,
23688 IX86_BUILTIN_PCMPESTRA128,
23689 IX86_BUILTIN_PCMPESTRC128,
23690 IX86_BUILTIN_PCMPESTRO128,
23691 IX86_BUILTIN_PCMPESTRS128,
23692 IX86_BUILTIN_PCMPESTRZ128,
23693 IX86_BUILTIN_PCMPISTRI128,
23694 IX86_BUILTIN_PCMPISTRM128,
23695 IX86_BUILTIN_PCMPISTRA128,
23696 IX86_BUILTIN_PCMPISTRC128,
23697 IX86_BUILTIN_PCMPISTRO128,
23698 IX86_BUILTIN_PCMPISTRS128,
23699 IX86_BUILTIN_PCMPISTRZ128,
23701 IX86_BUILTIN_PCMPGTQ,
23703 /* AES instructions */
23704 IX86_BUILTIN_AESENC128,
23705 IX86_BUILTIN_AESENCLAST128,
23706 IX86_BUILTIN_AESDEC128,
23707 IX86_BUILTIN_AESDECLAST128,
23708 IX86_BUILTIN_AESIMC128,
23709 IX86_BUILTIN_AESKEYGENASSIST128,
23711 /* PCLMUL instruction */
23712 IX86_BUILTIN_PCLMULQDQ128,
23715 IX86_BUILTIN_ADDPD256,
23716 IX86_BUILTIN_ADDPS256,
23717 IX86_BUILTIN_ADDSUBPD256,
23718 IX86_BUILTIN_ADDSUBPS256,
23719 IX86_BUILTIN_ANDPD256,
23720 IX86_BUILTIN_ANDPS256,
23721 IX86_BUILTIN_ANDNPD256,
23722 IX86_BUILTIN_ANDNPS256,
23723 IX86_BUILTIN_BLENDPD256,
23724 IX86_BUILTIN_BLENDPS256,
23725 IX86_BUILTIN_BLENDVPD256,
23726 IX86_BUILTIN_BLENDVPS256,
23727 IX86_BUILTIN_DIVPD256,
23728 IX86_BUILTIN_DIVPS256,
23729 IX86_BUILTIN_DPPS256,
23730 IX86_BUILTIN_HADDPD256,
23731 IX86_BUILTIN_HADDPS256,
23732 IX86_BUILTIN_HSUBPD256,
23733 IX86_BUILTIN_HSUBPS256,
23734 IX86_BUILTIN_MAXPD256,
23735 IX86_BUILTIN_MAXPS256,
23736 IX86_BUILTIN_MINPD256,
23737 IX86_BUILTIN_MINPS256,
23738 IX86_BUILTIN_MULPD256,
23739 IX86_BUILTIN_MULPS256,
23740 IX86_BUILTIN_ORPD256,
23741 IX86_BUILTIN_ORPS256,
23742 IX86_BUILTIN_SHUFPD256,
23743 IX86_BUILTIN_SHUFPS256,
23744 IX86_BUILTIN_SUBPD256,
23745 IX86_BUILTIN_SUBPS256,
23746 IX86_BUILTIN_XORPD256,
23747 IX86_BUILTIN_XORPS256,
23748 IX86_BUILTIN_CMPSD,
23749 IX86_BUILTIN_CMPSS,
23750 IX86_BUILTIN_CMPPD,
23751 IX86_BUILTIN_CMPPS,
23752 IX86_BUILTIN_CMPPD256,
23753 IX86_BUILTIN_CMPPS256,
23754 IX86_BUILTIN_CVTDQ2PD256,
23755 IX86_BUILTIN_CVTDQ2PS256,
23756 IX86_BUILTIN_CVTPD2PS256,
23757 IX86_BUILTIN_CVTPS2DQ256,
23758 IX86_BUILTIN_CVTPS2PD256,
23759 IX86_BUILTIN_CVTTPD2DQ256,
23760 IX86_BUILTIN_CVTPD2DQ256,
23761 IX86_BUILTIN_CVTTPS2DQ256,
23762 IX86_BUILTIN_EXTRACTF128PD256,
23763 IX86_BUILTIN_EXTRACTF128PS256,
23764 IX86_BUILTIN_EXTRACTF128SI256,
23765 IX86_BUILTIN_VZEROALL,
23766 IX86_BUILTIN_VZEROUPPER,
23767 IX86_BUILTIN_VPERMILVARPD,
23768 IX86_BUILTIN_VPERMILVARPS,
23769 IX86_BUILTIN_VPERMILVARPD256,
23770 IX86_BUILTIN_VPERMILVARPS256,
23771 IX86_BUILTIN_VPERMILPD,
23772 IX86_BUILTIN_VPERMILPS,
23773 IX86_BUILTIN_VPERMILPD256,
23774 IX86_BUILTIN_VPERMILPS256,
23775 IX86_BUILTIN_VPERMIL2PD,
23776 IX86_BUILTIN_VPERMIL2PS,
23777 IX86_BUILTIN_VPERMIL2PD256,
23778 IX86_BUILTIN_VPERMIL2PS256,
23779 IX86_BUILTIN_VPERM2F128PD256,
23780 IX86_BUILTIN_VPERM2F128PS256,
23781 IX86_BUILTIN_VPERM2F128SI256,
23782 IX86_BUILTIN_VBROADCASTSS,
23783 IX86_BUILTIN_VBROADCASTSD256,
23784 IX86_BUILTIN_VBROADCASTSS256,
23785 IX86_BUILTIN_VBROADCASTPD256,
23786 IX86_BUILTIN_VBROADCASTPS256,
23787 IX86_BUILTIN_VINSERTF128PD256,
23788 IX86_BUILTIN_VINSERTF128PS256,
23789 IX86_BUILTIN_VINSERTF128SI256,
23790 IX86_BUILTIN_LOADUPD256,
23791 IX86_BUILTIN_LOADUPS256,
23792 IX86_BUILTIN_STOREUPD256,
23793 IX86_BUILTIN_STOREUPS256,
23794 IX86_BUILTIN_LDDQU256,
23795 IX86_BUILTIN_MOVNTDQ256,
23796 IX86_BUILTIN_MOVNTPD256,
23797 IX86_BUILTIN_MOVNTPS256,
23798 IX86_BUILTIN_LOADDQU256,
23799 IX86_BUILTIN_STOREDQU256,
23800 IX86_BUILTIN_MASKLOADPD,
23801 IX86_BUILTIN_MASKLOADPS,
23802 IX86_BUILTIN_MASKSTOREPD,
23803 IX86_BUILTIN_MASKSTOREPS,
23804 IX86_BUILTIN_MASKLOADPD256,
23805 IX86_BUILTIN_MASKLOADPS256,
23806 IX86_BUILTIN_MASKSTOREPD256,
23807 IX86_BUILTIN_MASKSTOREPS256,
23808 IX86_BUILTIN_MOVSHDUP256,
23809 IX86_BUILTIN_MOVSLDUP256,
23810 IX86_BUILTIN_MOVDDUP256,
23812 IX86_BUILTIN_SQRTPD256,
23813 IX86_BUILTIN_SQRTPS256,
23814 IX86_BUILTIN_SQRTPS_NR256,
23815 IX86_BUILTIN_RSQRTPS256,
23816 IX86_BUILTIN_RSQRTPS_NR256,
23818 IX86_BUILTIN_RCPPS256,
23820 IX86_BUILTIN_ROUNDPD256,
23821 IX86_BUILTIN_ROUNDPS256,
23823 IX86_BUILTIN_FLOORPD256,
23824 IX86_BUILTIN_CEILPD256,
23825 IX86_BUILTIN_TRUNCPD256,
23826 IX86_BUILTIN_RINTPD256,
23827 IX86_BUILTIN_FLOORPS256,
23828 IX86_BUILTIN_CEILPS256,
23829 IX86_BUILTIN_TRUNCPS256,
23830 IX86_BUILTIN_RINTPS256,
23832 IX86_BUILTIN_UNPCKHPD256,
23833 IX86_BUILTIN_UNPCKLPD256,
23834 IX86_BUILTIN_UNPCKHPS256,
23835 IX86_BUILTIN_UNPCKLPS256,
23837 IX86_BUILTIN_SI256_SI,
23838 IX86_BUILTIN_PS256_PS,
23839 IX86_BUILTIN_PD256_PD,
23840 IX86_BUILTIN_SI_SI256,
23841 IX86_BUILTIN_PS_PS256,
23842 IX86_BUILTIN_PD_PD256,
23844 IX86_BUILTIN_VTESTZPD,
23845 IX86_BUILTIN_VTESTCPD,
23846 IX86_BUILTIN_VTESTNZCPD,
23847 IX86_BUILTIN_VTESTZPS,
23848 IX86_BUILTIN_VTESTCPS,
23849 IX86_BUILTIN_VTESTNZCPS,
23850 IX86_BUILTIN_VTESTZPD256,
23851 IX86_BUILTIN_VTESTCPD256,
23852 IX86_BUILTIN_VTESTNZCPD256,
23853 IX86_BUILTIN_VTESTZPS256,
23854 IX86_BUILTIN_VTESTCPS256,
23855 IX86_BUILTIN_VTESTNZCPS256,
23856 IX86_BUILTIN_PTESTZ256,
23857 IX86_BUILTIN_PTESTC256,
23858 IX86_BUILTIN_PTESTNZC256,
23860 IX86_BUILTIN_MOVMSKPD256,
23861 IX86_BUILTIN_MOVMSKPS256,
23863 /* TFmode support builtins. */
23865 IX86_BUILTIN_HUGE_VALQ,
23866 IX86_BUILTIN_FABSQ,
23867 IX86_BUILTIN_COPYSIGNQ,
23869 /* Vectorizer support builtins. */
23870 IX86_BUILTIN_CPYSGNPS,
23871 IX86_BUILTIN_CPYSGNPD,
23872 IX86_BUILTIN_CPYSGNPS256,
23873 IX86_BUILTIN_CPYSGNPD256,
23875 IX86_BUILTIN_CVTUDQ2PS,
23877 IX86_BUILTIN_VEC_PERM_V2DF,
23878 IX86_BUILTIN_VEC_PERM_V4SF,
23879 IX86_BUILTIN_VEC_PERM_V2DI,
23880 IX86_BUILTIN_VEC_PERM_V4SI,
23881 IX86_BUILTIN_VEC_PERM_V8HI,
23882 IX86_BUILTIN_VEC_PERM_V16QI,
23883 IX86_BUILTIN_VEC_PERM_V2DI_U,
23884 IX86_BUILTIN_VEC_PERM_V4SI_U,
23885 IX86_BUILTIN_VEC_PERM_V8HI_U,
23886 IX86_BUILTIN_VEC_PERM_V16QI_U,
23887 IX86_BUILTIN_VEC_PERM_V4DF,
23888 IX86_BUILTIN_VEC_PERM_V8SF,
23890 /* FMA4 and XOP instructions. */
23891 IX86_BUILTIN_VFMADDSS,
23892 IX86_BUILTIN_VFMADDSD,
23893 IX86_BUILTIN_VFMADDPS,
23894 IX86_BUILTIN_VFMADDPD,
23895 IX86_BUILTIN_VFMADDPS256,
23896 IX86_BUILTIN_VFMADDPD256,
23897 IX86_BUILTIN_VFMADDSUBPS,
23898 IX86_BUILTIN_VFMADDSUBPD,
23899 IX86_BUILTIN_VFMADDSUBPS256,
23900 IX86_BUILTIN_VFMADDSUBPD256,
23902 IX86_BUILTIN_VPCMOV,
23903 IX86_BUILTIN_VPCMOV_V2DI,
23904 IX86_BUILTIN_VPCMOV_V4SI,
23905 IX86_BUILTIN_VPCMOV_V8HI,
23906 IX86_BUILTIN_VPCMOV_V16QI,
23907 IX86_BUILTIN_VPCMOV_V4SF,
23908 IX86_BUILTIN_VPCMOV_V2DF,
23909 IX86_BUILTIN_VPCMOV256,
23910 IX86_BUILTIN_VPCMOV_V4DI256,
23911 IX86_BUILTIN_VPCMOV_V8SI256,
23912 IX86_BUILTIN_VPCMOV_V16HI256,
23913 IX86_BUILTIN_VPCMOV_V32QI256,
23914 IX86_BUILTIN_VPCMOV_V8SF256,
23915 IX86_BUILTIN_VPCMOV_V4DF256,
23917 IX86_BUILTIN_VPPERM,
23919 IX86_BUILTIN_VPMACSSWW,
23920 IX86_BUILTIN_VPMACSWW,
23921 IX86_BUILTIN_VPMACSSWD,
23922 IX86_BUILTIN_VPMACSWD,
23923 IX86_BUILTIN_VPMACSSDD,
23924 IX86_BUILTIN_VPMACSDD,
23925 IX86_BUILTIN_VPMACSSDQL,
23926 IX86_BUILTIN_VPMACSSDQH,
23927 IX86_BUILTIN_VPMACSDQL,
23928 IX86_BUILTIN_VPMACSDQH,
23929 IX86_BUILTIN_VPMADCSSWD,
23930 IX86_BUILTIN_VPMADCSWD,
23932 IX86_BUILTIN_VPHADDBW,
23933 IX86_BUILTIN_VPHADDBD,
23934 IX86_BUILTIN_VPHADDBQ,
23935 IX86_BUILTIN_VPHADDWD,
23936 IX86_BUILTIN_VPHADDWQ,
23937 IX86_BUILTIN_VPHADDDQ,
23938 IX86_BUILTIN_VPHADDUBW,
23939 IX86_BUILTIN_VPHADDUBD,
23940 IX86_BUILTIN_VPHADDUBQ,
23941 IX86_BUILTIN_VPHADDUWD,
23942 IX86_BUILTIN_VPHADDUWQ,
23943 IX86_BUILTIN_VPHADDUDQ,
23944 IX86_BUILTIN_VPHSUBBW,
23945 IX86_BUILTIN_VPHSUBWD,
23946 IX86_BUILTIN_VPHSUBDQ,
23948 IX86_BUILTIN_VPROTB,
23949 IX86_BUILTIN_VPROTW,
23950 IX86_BUILTIN_VPROTD,
23951 IX86_BUILTIN_VPROTQ,
23952 IX86_BUILTIN_VPROTB_IMM,
23953 IX86_BUILTIN_VPROTW_IMM,
23954 IX86_BUILTIN_VPROTD_IMM,
23955 IX86_BUILTIN_VPROTQ_IMM,
23957 IX86_BUILTIN_VPSHLB,
23958 IX86_BUILTIN_VPSHLW,
23959 IX86_BUILTIN_VPSHLD,
23960 IX86_BUILTIN_VPSHLQ,
23961 IX86_BUILTIN_VPSHAB,
23962 IX86_BUILTIN_VPSHAW,
23963 IX86_BUILTIN_VPSHAD,
23964 IX86_BUILTIN_VPSHAQ,
23966 IX86_BUILTIN_VFRCZSS,
23967 IX86_BUILTIN_VFRCZSD,
23968 IX86_BUILTIN_VFRCZPS,
23969 IX86_BUILTIN_VFRCZPD,
23970 IX86_BUILTIN_VFRCZPS256,
23971 IX86_BUILTIN_VFRCZPD256,
23973 IX86_BUILTIN_VPCOMEQUB,
23974 IX86_BUILTIN_VPCOMNEUB,
23975 IX86_BUILTIN_VPCOMLTUB,
23976 IX86_BUILTIN_VPCOMLEUB,
23977 IX86_BUILTIN_VPCOMGTUB,
23978 IX86_BUILTIN_VPCOMGEUB,
23979 IX86_BUILTIN_VPCOMFALSEUB,
23980 IX86_BUILTIN_VPCOMTRUEUB,
23982 IX86_BUILTIN_VPCOMEQUW,
23983 IX86_BUILTIN_VPCOMNEUW,
23984 IX86_BUILTIN_VPCOMLTUW,
23985 IX86_BUILTIN_VPCOMLEUW,
23986 IX86_BUILTIN_VPCOMGTUW,
23987 IX86_BUILTIN_VPCOMGEUW,
23988 IX86_BUILTIN_VPCOMFALSEUW,
23989 IX86_BUILTIN_VPCOMTRUEUW,
23991 IX86_BUILTIN_VPCOMEQUD,
23992 IX86_BUILTIN_VPCOMNEUD,
23993 IX86_BUILTIN_VPCOMLTUD,
23994 IX86_BUILTIN_VPCOMLEUD,
23995 IX86_BUILTIN_VPCOMGTUD,
23996 IX86_BUILTIN_VPCOMGEUD,
23997 IX86_BUILTIN_VPCOMFALSEUD,
23998 IX86_BUILTIN_VPCOMTRUEUD,
24000 IX86_BUILTIN_VPCOMEQUQ,
24001 IX86_BUILTIN_VPCOMNEUQ,
24002 IX86_BUILTIN_VPCOMLTUQ,
24003 IX86_BUILTIN_VPCOMLEUQ,
24004 IX86_BUILTIN_VPCOMGTUQ,
24005 IX86_BUILTIN_VPCOMGEUQ,
24006 IX86_BUILTIN_VPCOMFALSEUQ,
24007 IX86_BUILTIN_VPCOMTRUEUQ,
24009 IX86_BUILTIN_VPCOMEQB,
24010 IX86_BUILTIN_VPCOMNEB,
24011 IX86_BUILTIN_VPCOMLTB,
24012 IX86_BUILTIN_VPCOMLEB,
24013 IX86_BUILTIN_VPCOMGTB,
24014 IX86_BUILTIN_VPCOMGEB,
24015 IX86_BUILTIN_VPCOMFALSEB,
24016 IX86_BUILTIN_VPCOMTRUEB,
24018 IX86_BUILTIN_VPCOMEQW,
24019 IX86_BUILTIN_VPCOMNEW,
24020 IX86_BUILTIN_VPCOMLTW,
24021 IX86_BUILTIN_VPCOMLEW,
24022 IX86_BUILTIN_VPCOMGTW,
24023 IX86_BUILTIN_VPCOMGEW,
24024 IX86_BUILTIN_VPCOMFALSEW,
24025 IX86_BUILTIN_VPCOMTRUEW,
24027 IX86_BUILTIN_VPCOMEQD,
24028 IX86_BUILTIN_VPCOMNED,
24029 IX86_BUILTIN_VPCOMLTD,
24030 IX86_BUILTIN_VPCOMLED,
24031 IX86_BUILTIN_VPCOMGTD,
24032 IX86_BUILTIN_VPCOMGED,
24033 IX86_BUILTIN_VPCOMFALSED,
24034 IX86_BUILTIN_VPCOMTRUED,
24036 IX86_BUILTIN_VPCOMEQQ,
24037 IX86_BUILTIN_VPCOMNEQ,
24038 IX86_BUILTIN_VPCOMLTQ,
24039 IX86_BUILTIN_VPCOMLEQ,
24040 IX86_BUILTIN_VPCOMGTQ,
24041 IX86_BUILTIN_VPCOMGEQ,
24042 IX86_BUILTIN_VPCOMFALSEQ,
24043 IX86_BUILTIN_VPCOMTRUEQ,
24045 /* LWP instructions. */
24046 IX86_BUILTIN_LLWPCB,
24047 IX86_BUILTIN_SLWPCB,
24048 IX86_BUILTIN_LWPVAL32,
24049 IX86_BUILTIN_LWPVAL64,
24050 IX86_BUILTIN_LWPINS32,
24051 IX86_BUILTIN_LWPINS64,
24055 /* BMI instructions. */
24056 IX86_BUILTIN_BEXTR32,
24057 IX86_BUILTIN_BEXTR64,
24060 /* TBM instructions. */
24061 IX86_BUILTIN_BEXTRI32,
24062 IX86_BUILTIN_BEXTRI64,
24065 /* FSGSBASE instructions. */
24066 IX86_BUILTIN_RDFSBASE32,
24067 IX86_BUILTIN_RDFSBASE64,
24068 IX86_BUILTIN_RDGSBASE32,
24069 IX86_BUILTIN_RDGSBASE64,
24070 IX86_BUILTIN_WRFSBASE32,
24071 IX86_BUILTIN_WRFSBASE64,
24072 IX86_BUILTIN_WRGSBASE32,
24073 IX86_BUILTIN_WRGSBASE64,
24075 /* RDRND instructions. */
24076 IX86_BUILTIN_RDRAND16_STEP,
24077 IX86_BUILTIN_RDRAND32_STEP,
24078 IX86_BUILTIN_RDRAND64_STEP,
24080 /* F16C instructions. */
24081 IX86_BUILTIN_CVTPH2PS,
24082 IX86_BUILTIN_CVTPH2PS256,
24083 IX86_BUILTIN_CVTPS2PH,
24084 IX86_BUILTIN_CVTPS2PH256,
24086 /* CFString built-in for darwin */
24087 IX86_BUILTIN_CFSTRING,
24092 /* Table for the ix86 builtin decls. */
24093 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
24095 /* Table of all of the builtin functions that are possible with different ISA's
24096 but are waiting to be built until a function is declared to use that
24098 struct builtin_isa {
24099 const char *name; /* function name */
24100 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
24101 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
24102 bool const_p; /* true if the declaration is constant */
24103 bool set_and_not_built_p;
24106 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
24109 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24110 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24111 function decl in the ix86_builtins array. Returns the function decl or
24112 NULL_TREE, if the builtin was not added.
24114 If the front end has a special hook for builtin functions, delay adding
24115 builtin functions that aren't in the current ISA until the ISA is changed
24116 with function specific optimization. Doing so, can save about 300K for the
24117 default compiler. When the builtin is expanded, check at that time whether
24120 If the front end doesn't have a special hook, record all builtins, even if
24121 it isn't an instruction set in the current ISA in case the user uses
24122 function specific options for a different ISA, so that we don't get scope
24123 errors if a builtin is added in the middle of a function scope. */
24126 def_builtin (HOST_WIDE_INT mask, const char *name,
24127 enum ix86_builtin_func_type tcode,
24128 enum ix86_builtins code)
24130 tree decl = NULL_TREE;
24132 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
24134 ix86_builtins_isa[(int) code].isa = mask;
24136 mask &= ~OPTION_MASK_ISA_64BIT;
24138 || (mask & ix86_isa_flags) != 0
24139 || (lang_hooks.builtin_function
24140 == lang_hooks.builtin_function_ext_scope))
24143 tree type = ix86_get_builtin_func_type (tcode);
24144 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24146 ix86_builtins[(int) code] = decl;
24147 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24151 ix86_builtins[(int) code] = NULL_TREE;
24152 ix86_builtins_isa[(int) code].tcode = tcode;
24153 ix86_builtins_isa[(int) code].name = name;
24154 ix86_builtins_isa[(int) code].const_p = false;
24155 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24162 /* Like def_builtin, but also marks the function decl "const". */
24165 def_builtin_const (HOST_WIDE_INT mask, const char *name,
24166 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24168 tree decl = def_builtin (mask, name, tcode, code);
24170 TREE_READONLY (decl) = 1;
24172 ix86_builtins_isa[(int) code].const_p = true;
24177 /* Add any new builtin functions for a given ISA that may not have been
24178 declared. This saves a bit of space compared to adding all of the
24179 declarations to the tree, even if we didn't use them. */
24182 ix86_add_new_builtins (HOST_WIDE_INT isa)
24186 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24188 if ((ix86_builtins_isa[i].isa & isa) != 0
24189 && ix86_builtins_isa[i].set_and_not_built_p)
24193 /* Don't define the builtin again. */
24194 ix86_builtins_isa[i].set_and_not_built_p = false;
24196 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24197 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24198 type, i, BUILT_IN_MD, NULL,
24201 ix86_builtins[i] = decl;
24202 if (ix86_builtins_isa[i].const_p)
24203 TREE_READONLY (decl) = 1;
24208 /* Bits for builtin_description.flag. */
24210 /* Set when we don't support the comparison natively, and should
24211 swap_comparison in order to support it. */
24212 #define BUILTIN_DESC_SWAP_OPERANDS 1
24214 struct builtin_description
24216 const HOST_WIDE_INT mask;
24217 const enum insn_code icode;
24218 const char *const name;
24219 const enum ix86_builtins code;
24220 const enum rtx_code comparison;
24224 static const struct builtin_description bdesc_comi[] =
24226 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24227 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24228 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24229 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24230 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24231 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24232 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24233 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24235 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24236 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24237 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24238 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24239 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24241 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24242 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24243 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24244 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24245 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24246 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24247 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24248 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24249 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24252 static const struct builtin_description bdesc_pcmpestr[] =
24255 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24256 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24257 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24258 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24259 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24260 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24261 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24264 static const struct builtin_description bdesc_pcmpistr[] =
24267 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24268 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24269 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24270 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24271 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24272 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24273 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24276 /* Special builtins with variable number of arguments. */
24277 static const struct builtin_description bdesc_special_args[] =
24279 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24280 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24281 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
24284 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24287 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24290 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24291 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24292 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24294 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24295 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24296 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24297 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24299 /* SSE or 3DNow!A */
24300 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24301 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24304 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24308 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24309 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24312 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24318 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24321 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24324 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24325 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24328 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24329 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24331 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24332 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24333 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24334 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24335 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24337 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24338 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24339 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24340 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24341 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24342 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24343 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24345 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24346 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24347 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24349 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
24350 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
24351 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
24352 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
24353 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
24354 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
24355 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
24356 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
24358 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24359 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24360 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24361 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24362 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24363 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24366 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24367 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24368 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24369 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24370 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24371 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24372 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24373 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24376 /* Builtins with variable number of arguments. */
24377 static const struct builtin_description bdesc_args[] =
24379 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24380 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24381 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24382 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24383 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24384 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24385 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24388 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24389 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24390 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24391 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24392 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24393 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24395 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24396 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24397 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24398 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24399 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24400 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24401 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24402 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24404 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24405 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24407 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24408 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24409 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24410 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24412 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24413 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24414 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24415 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24416 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24417 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24419 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24420 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24421 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24422 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24423 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24424 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24426 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24427 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24428 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24430 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24432 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24433 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24434 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24435 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24436 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24437 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24439 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24440 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24441 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24442 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24443 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24444 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24446 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24447 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24448 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24449 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24452 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24453 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24454 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24455 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24457 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24458 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24459 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24460 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24461 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24462 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24463 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24464 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24465 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24466 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24467 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24468 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24469 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24470 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24471 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24474 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24475 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24476 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24477 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24478 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24479 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24482 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24483 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24484 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24485 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24486 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24487 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24488 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24489 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24490 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24491 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24492 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24493 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24495 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24497 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24498 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24499 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24500 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24501 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24502 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24503 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24504 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24506 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24507 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24508 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24509 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24510 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24511 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24512 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24513 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24514 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24515 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24516 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24517 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24518 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24519 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24520 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24521 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24522 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24523 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24524 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24525 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24526 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24527 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24529 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24530 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24531 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24532 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24534 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24535 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24536 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24537 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24539 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24541 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24542 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24543 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24544 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24545 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24547 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24548 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24549 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24551 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24553 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24554 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24555 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24557 /* SSE MMX or 3Dnow!A */
24558 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24559 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24560 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24562 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24563 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24564 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24565 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24567 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24568 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24570 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24573 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24575 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24576 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24577 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24578 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24579 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24580 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24581 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24582 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24583 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24584 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24585 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24586 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24597 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24605 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24606 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24612 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24613 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24614 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24615 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24617 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24618 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24619 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24622 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24623 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
24626 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24627 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24630 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24631 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24632 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24633 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24639 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24643 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24645 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24647 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24648 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24649 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24650 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24652 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24655 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24656 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24658 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
24660 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24661 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24662 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24663 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24664 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24665 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24666 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24667 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24669 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24670 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24673 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24674 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24675 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24676 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24678 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24679 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
24681 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24682 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24683 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24684 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24687 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24689 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24690 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24691 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24692 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24693 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24694 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24696 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24697 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24698 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24699 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24701 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24702 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24703 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24704 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24705 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24706 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24707 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24708 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24710 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24711 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24712 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24714 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24715 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
24717 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
24718 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24720 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
24722 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
24723 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
24724 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
24725 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
24727 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24728 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24729 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24730 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24731 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24732 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24733 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24735 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24736 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24737 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24738 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24739 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24740 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24741 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24743 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24744 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24745 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24746 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24748 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
24749 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24750 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24752 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
24754 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
24755 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
24757 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24760 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24761 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24764 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
24765 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24767 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24768 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24769 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24770 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24771 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24772 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24775 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
24776 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
24777 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24778 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
24779 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
24780 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24782 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24783 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24784 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24785 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24786 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24787 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24788 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24789 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24790 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24791 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24792 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24793 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24794 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
24795 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
24796 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24797 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24798 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24799 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24800 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24801 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24802 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24803 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24804 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24805 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24808 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
24809 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
24812 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24813 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24814 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
24815 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
24816 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24817 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24818 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24819 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
24820 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24821 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
24823 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24824 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24825 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24826 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24827 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24828 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24829 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24830 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24831 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24832 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24833 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24834 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24835 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24837 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24838 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24839 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24840 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24841 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24842 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24843 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24844 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24845 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24846 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24847 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24848 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24851 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24852 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24853 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24854 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24856 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
24857 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
24858 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
24859 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
24861 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
24862 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
24863 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
24864 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
24866 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24867 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24868 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24871 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24872 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
24873 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
24874 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24875 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24878 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
24879 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
24880 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
24881 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24884 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
24885 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24887 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24888 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24889 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24890 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24893 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
24896 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24897 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24900 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24901 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24904 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24905 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24906 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24907 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24910 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24911 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24912 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24913 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24914 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24915 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24916 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24917 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24918 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24919 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24920 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24921 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24923 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
24924 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
24925 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
24926 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
24928 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24929 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24930 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
24931 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
24932 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24933 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24934 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24935 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24936 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24937 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24938 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24939 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24940 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24941 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
24942 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
24943 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
24944 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
24945 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
24946 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
24947 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24948 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
24949 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24950 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24951 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24952 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24953 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24954 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
24955 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24956 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24957 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24958 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24959 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
24960 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
24961 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
24963 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24964 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24965 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24967 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24968 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24969 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24970 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24971 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24973 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24975 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24976 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24978 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
24979 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
24980 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
24981 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
24983 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
24984 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
24985 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
24986 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
24988 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24989 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24990 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24991 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24993 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
24994 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
24995 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
24996 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
24997 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
24998 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
25000 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25007 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25009 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25010 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25011 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25012 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25013 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25014 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25016 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
25017 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
25019 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25020 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25022 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25025 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25026 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25027 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25030 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25031 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25034 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
25035 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
25036 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
25037 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
25040 /* FMA4 and XOP. */
25041 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
25042 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
25043 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
25044 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
25045 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
25046 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
25047 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
25048 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
25049 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
25050 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
25051 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
25052 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
25053 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
25054 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
25055 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
25056 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
25057 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
25058 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
25059 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
25060 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
25061 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
25062 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
25063 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
25064 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
25065 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
25066 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
25067 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
25068 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
25069 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
25070 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
25071 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
25072 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
25073 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
25074 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
25075 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
25076 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
25077 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
25078 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
25079 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
25080 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
25081 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
25082 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
25083 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
25084 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
25085 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25086 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25087 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25088 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25089 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25090 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25091 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25092 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25094 static const struct builtin_description bdesc_multi_arg[] =
25096 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
25097 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
25098 UNKNOWN, (int)MULTI_ARG_3_SF },
25099 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
25100 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
25101 UNKNOWN, (int)MULTI_ARG_3_DF },
25103 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
25104 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
25105 UNKNOWN, (int)MULTI_ARG_3_SF },
25106 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
25107 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
25108 UNKNOWN, (int)MULTI_ARG_3_DF },
25109 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
25110 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
25111 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25112 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
25113 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
25114 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25116 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
25117 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
25118 UNKNOWN, (int)MULTI_ARG_3_SF },
25119 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
25120 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
25121 UNKNOWN, (int)MULTI_ARG_3_DF },
25122 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
25123 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
25124 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25125 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
25126 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
25127 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
25130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
25131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
25132 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
25133 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
25134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
25135 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
25137 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
25140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
25141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
25142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
25143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
25145 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
25147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25154 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25156 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
25162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
25163 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
25164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
25165 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
25166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
25167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
25168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25169 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
25170 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
25171 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
25172 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
25174 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
25175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
25177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
25178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
25179 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
25180 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
25181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
25182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
25184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
25201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
25204 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
25205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
25206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
25208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
25209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25211 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
25212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
25213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
25214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
25216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
25217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25274 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25275 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25282 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25283 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25289 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25290 in the current target ISA to allow the user to compile particular modules
25291 with different target specific options that differ from the command line
25294 ix86_init_mmx_sse_builtins (void)
25296 const struct builtin_description * d;
25297 enum ix86_builtin_func_type ftype;
25300 /* Add all special builtins with variable number of operands. */
25301 for (i = 0, d = bdesc_special_args;
25302 i < ARRAY_SIZE (bdesc_special_args);
25308 ftype = (enum ix86_builtin_func_type) d->flag;
25309 def_builtin (d->mask, d->name, ftype, d->code);
25312 /* Add all builtins with variable number of operands. */
25313 for (i = 0, d = bdesc_args;
25314 i < ARRAY_SIZE (bdesc_args);
25320 ftype = (enum ix86_builtin_func_type) d->flag;
25321 def_builtin_const (d->mask, d->name, ftype, d->code);
25324 /* pcmpestr[im] insns. */
25325 for (i = 0, d = bdesc_pcmpestr;
25326 i < ARRAY_SIZE (bdesc_pcmpestr);
25329 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25330 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25332 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25333 def_builtin_const (d->mask, d->name, ftype, d->code);
25336 /* pcmpistr[im] insns. */
25337 for (i = 0, d = bdesc_pcmpistr;
25338 i < ARRAY_SIZE (bdesc_pcmpistr);
25341 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25342 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25344 ftype = INT_FTYPE_V16QI_V16QI_INT;
25345 def_builtin_const (d->mask, d->name, ftype, d->code);
25348 /* comi/ucomi insns. */
25349 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25351 if (d->mask == OPTION_MASK_ISA_SSE2)
25352 ftype = INT_FTYPE_V2DF_V2DF;
25354 ftype = INT_FTYPE_V4SF_V4SF;
25355 def_builtin_const (d->mask, d->name, ftype, d->code);
25359 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25360 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25361 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25362 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25364 /* SSE or 3DNow!A */
25365 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25366 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25367 IX86_BUILTIN_MASKMOVQ);
25370 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25371 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25373 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25374 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25375 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25376 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25379 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25380 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25381 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25382 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25385 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25386 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25387 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25388 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25389 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25390 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25391 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25392 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25393 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25394 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25395 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25396 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25399 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25400 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25403 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
25404 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
25405 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
25406 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
25407 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
25408 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
25409 IX86_BUILTIN_RDRAND64_STEP);
25411 /* MMX access to the vec_init patterns. */
25412 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25413 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25415 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25416 V4HI_FTYPE_HI_HI_HI_HI,
25417 IX86_BUILTIN_VEC_INIT_V4HI);
25419 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25420 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25421 IX86_BUILTIN_VEC_INIT_V8QI);
25423 /* Access to the vec_extract patterns. */
25424 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25425 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25426 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25427 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25428 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25429 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25430 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25431 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25432 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25433 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25435 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25436 "__builtin_ia32_vec_ext_v4hi",
25437 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25439 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25440 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25442 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25443 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25445 /* Access to the vec_set patterns. */
25446 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25447 "__builtin_ia32_vec_set_v2di",
25448 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25450 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25451 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25453 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25454 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25456 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25457 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25459 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25460 "__builtin_ia32_vec_set_v4hi",
25461 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25463 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25464 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25466 /* Add FMA4 multi-arg argument instructions */
25467 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25472 ftype = (enum ix86_builtin_func_type) d->flag;
25473 def_builtin_const (d->mask, d->name, ftype, d->code);
25477 /* Internal method for ix86_init_builtins. */
25480 ix86_init_builtins_va_builtins_abi (void)
25482 tree ms_va_ref, sysv_va_ref;
25483 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25484 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25485 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25486 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25490 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25491 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25492 ms_va_ref = build_reference_type (ms_va_list_type_node);
25494 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25497 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25498 fnvoid_va_start_ms =
25499 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25500 fnvoid_va_end_sysv =
25501 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25502 fnvoid_va_start_sysv =
25503 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25505 fnvoid_va_copy_ms =
25506 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25508 fnvoid_va_copy_sysv =
25509 build_function_type_list (void_type_node, sysv_va_ref,
25510 sysv_va_ref, NULL_TREE);
25512 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25513 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25514 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25515 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25516 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25517 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25518 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25519 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25520 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25521 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25522 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25523 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25527 ix86_init_builtin_types (void)
25529 tree float128_type_node, float80_type_node;
25531 /* The __float80 type. */
25532 float80_type_node = long_double_type_node;
25533 if (TYPE_MODE (float80_type_node) != XFmode)
25535 /* The __float80 type. */
25536 float80_type_node = make_node (REAL_TYPE);
25538 TYPE_PRECISION (float80_type_node) = 80;
25539 layout_type (float80_type_node);
25541 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25543 /* The __float128 type. */
25544 float128_type_node = make_node (REAL_TYPE);
25545 TYPE_PRECISION (float128_type_node) = 128;
25546 layout_type (float128_type_node);
25547 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25549 /* This macro is built by i386-builtin-types.awk. */
25550 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25554 ix86_init_builtins (void)
25558 ix86_init_builtin_types ();
25560 /* TFmode support builtins. */
25561 def_builtin_const (0, "__builtin_infq",
25562 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
25563 def_builtin_const (0, "__builtin_huge_valq",
25564 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
25566 /* We will expand them to normal call if SSE2 isn't available since
25567 they are used by libgcc. */
25568 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
25569 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
25570 BUILT_IN_MD, "__fabstf2", NULL_TREE);
25571 TREE_READONLY (t) = 1;
25572 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
25574 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
25575 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
25576 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
25577 TREE_READONLY (t) = 1;
25578 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
25580 ix86_init_mmx_sse_builtins ();
25583 ix86_init_builtins_va_builtins_abi ();
25585 #ifdef SUBTARGET_INIT_BUILTINS
25586 SUBTARGET_INIT_BUILTINS;
25590 /* Return the ix86 builtin for CODE. */
25593 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25595 if (code >= IX86_BUILTIN_MAX)
25596 return error_mark_node;
25598 return ix86_builtins[code];
25601 /* Errors in the source file can cause expand_expr to return const0_rtx
25602 where we expect a vector. To avoid crashing, use one of the vector
25603 clear instructions. */
25605 safe_vector_operand (rtx x, enum machine_mode mode)
25607 if (x == const0_rtx)
25608 x = CONST0_RTX (mode);
25612 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
25615 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
25618 tree arg0 = CALL_EXPR_ARG (exp, 0);
25619 tree arg1 = CALL_EXPR_ARG (exp, 1);
25620 rtx op0 = expand_normal (arg0);
25621 rtx op1 = expand_normal (arg1);
25622 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25623 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25624 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25626 if (VECTOR_MODE_P (mode0))
25627 op0 = safe_vector_operand (op0, mode0);
25628 if (VECTOR_MODE_P (mode1))
25629 op1 = safe_vector_operand (op1, mode1);
25631 if (optimize || !target
25632 || GET_MODE (target) != tmode
25633 || !insn_data[icode].operand[0].predicate (target, tmode))
25634 target = gen_reg_rtx (tmode);
25636 if (GET_MODE (op1) == SImode && mode1 == TImode)
25638 rtx x = gen_reg_rtx (V4SImode);
25639 emit_insn (gen_sse2_loadd (x, op1));
25640 op1 = gen_lowpart (TImode, x);
25643 if (!insn_data[icode].operand[1].predicate (op0, mode0))
25644 op0 = copy_to_mode_reg (mode0, op0);
25645 if (!insn_data[icode].operand[2].predicate (op1, mode1))
25646 op1 = copy_to_mode_reg (mode1, op1);
25648 pat = GEN_FCN (icode) (target, op0, op1);
25657 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
25660 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
25661 enum ix86_builtin_func_type m_type,
25662 enum rtx_code sub_code)
25667 bool comparison_p = false;
25669 bool last_arg_constant = false;
25670 int num_memory = 0;
25673 enum machine_mode mode;
25676 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25680 case MULTI_ARG_4_DF2_DI_I:
25681 case MULTI_ARG_4_DF2_DI_I1:
25682 case MULTI_ARG_4_SF2_SI_I:
25683 case MULTI_ARG_4_SF2_SI_I1:
25685 last_arg_constant = true;
25688 case MULTI_ARG_3_SF:
25689 case MULTI_ARG_3_DF:
25690 case MULTI_ARG_3_SF2:
25691 case MULTI_ARG_3_DF2:
25692 case MULTI_ARG_3_DI:
25693 case MULTI_ARG_3_SI:
25694 case MULTI_ARG_3_SI_DI:
25695 case MULTI_ARG_3_HI:
25696 case MULTI_ARG_3_HI_SI:
25697 case MULTI_ARG_3_QI:
25698 case MULTI_ARG_3_DI2:
25699 case MULTI_ARG_3_SI2:
25700 case MULTI_ARG_3_HI2:
25701 case MULTI_ARG_3_QI2:
25705 case MULTI_ARG_2_SF:
25706 case MULTI_ARG_2_DF:
25707 case MULTI_ARG_2_DI:
25708 case MULTI_ARG_2_SI:
25709 case MULTI_ARG_2_HI:
25710 case MULTI_ARG_2_QI:
25714 case MULTI_ARG_2_DI_IMM:
25715 case MULTI_ARG_2_SI_IMM:
25716 case MULTI_ARG_2_HI_IMM:
25717 case MULTI_ARG_2_QI_IMM:
25719 last_arg_constant = true;
25722 case MULTI_ARG_1_SF:
25723 case MULTI_ARG_1_DF:
25724 case MULTI_ARG_1_SF2:
25725 case MULTI_ARG_1_DF2:
25726 case MULTI_ARG_1_DI:
25727 case MULTI_ARG_1_SI:
25728 case MULTI_ARG_1_HI:
25729 case MULTI_ARG_1_QI:
25730 case MULTI_ARG_1_SI_DI:
25731 case MULTI_ARG_1_HI_DI:
25732 case MULTI_ARG_1_HI_SI:
25733 case MULTI_ARG_1_QI_DI:
25734 case MULTI_ARG_1_QI_SI:
25735 case MULTI_ARG_1_QI_HI:
25739 case MULTI_ARG_2_DI_CMP:
25740 case MULTI_ARG_2_SI_CMP:
25741 case MULTI_ARG_2_HI_CMP:
25742 case MULTI_ARG_2_QI_CMP:
25744 comparison_p = true;
25747 case MULTI_ARG_2_SF_TF:
25748 case MULTI_ARG_2_DF_TF:
25749 case MULTI_ARG_2_DI_TF:
25750 case MULTI_ARG_2_SI_TF:
25751 case MULTI_ARG_2_HI_TF:
25752 case MULTI_ARG_2_QI_TF:
25758 gcc_unreachable ();
25761 if (optimize || !target
25762 || GET_MODE (target) != tmode
25763 || !insn_data[icode].operand[0].predicate (target, tmode))
25764 target = gen_reg_rtx (tmode);
25766 gcc_assert (nargs <= 4);
25768 for (i = 0; i < nargs; i++)
25770 tree arg = CALL_EXPR_ARG (exp, i);
25771 rtx op = expand_normal (arg);
25772 int adjust = (comparison_p) ? 1 : 0;
25773 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
25775 if (last_arg_constant && i == nargs - 1)
25777 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
25779 enum insn_code new_icode = icode;
25782 case CODE_FOR_xop_vpermil2v2df3:
25783 case CODE_FOR_xop_vpermil2v4sf3:
25784 case CODE_FOR_xop_vpermil2v4df3:
25785 case CODE_FOR_xop_vpermil2v8sf3:
25786 error ("the last argument must be a 2-bit immediate");
25787 return gen_reg_rtx (tmode);
25788 case CODE_FOR_xop_rotlv2di3:
25789 new_icode = CODE_FOR_rotlv2di3;
25791 case CODE_FOR_xop_rotlv4si3:
25792 new_icode = CODE_FOR_rotlv4si3;
25794 case CODE_FOR_xop_rotlv8hi3:
25795 new_icode = CODE_FOR_rotlv8hi3;
25797 case CODE_FOR_xop_rotlv16qi3:
25798 new_icode = CODE_FOR_rotlv16qi3;
25800 if (CONST_INT_P (op))
25802 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
25803 op = GEN_INT (INTVAL (op) & mask);
25804 gcc_checking_assert
25805 (insn_data[icode].operand[i + 1].predicate (op, mode));
25809 gcc_checking_assert
25811 && insn_data[new_icode].operand[0].mode == tmode
25812 && insn_data[new_icode].operand[1].mode == tmode
25813 && insn_data[new_icode].operand[2].mode == mode
25814 && insn_data[new_icode].operand[0].predicate
25815 == insn_data[icode].operand[0].predicate
25816 && insn_data[new_icode].operand[1].predicate
25817 == insn_data[icode].operand[1].predicate);
25823 gcc_unreachable ();
25830 if (VECTOR_MODE_P (mode))
25831 op = safe_vector_operand (op, mode);
25833 /* If we aren't optimizing, only allow one memory operand to be
25835 if (memory_operand (op, mode))
25838 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
25841 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
25843 op = force_reg (mode, op);
25847 args[i].mode = mode;
25853 pat = GEN_FCN (icode) (target, args[0].op);
25858 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
25859 GEN_INT ((int)sub_code));
25860 else if (! comparison_p)
25861 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25864 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
25868 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
25873 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25877 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
25881 gcc_unreachable ();
25891 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
25892 insns with vec_merge. */
25895 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
25899 tree arg0 = CALL_EXPR_ARG (exp, 0);
25900 rtx op1, op0 = expand_normal (arg0);
25901 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25902 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25904 if (optimize || !target
25905 || GET_MODE (target) != tmode
25906 || !insn_data[icode].operand[0].predicate (target, tmode))
25907 target = gen_reg_rtx (tmode);
25909 if (VECTOR_MODE_P (mode0))
25910 op0 = safe_vector_operand (op0, mode0);
25912 if ((optimize && !register_operand (op0, mode0))
25913 || !insn_data[icode].operand[1].predicate (op0, mode0))
25914 op0 = copy_to_mode_reg (mode0, op0);
25917 if (!insn_data[icode].operand[2].predicate (op1, mode0))
25918 op1 = copy_to_mode_reg (mode0, op1);
25920 pat = GEN_FCN (icode) (target, op0, op1);
25927 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
25930 ix86_expand_sse_compare (const struct builtin_description *d,
25931 tree exp, rtx target, bool swap)
25934 tree arg0 = CALL_EXPR_ARG (exp, 0);
25935 tree arg1 = CALL_EXPR_ARG (exp, 1);
25936 rtx op0 = expand_normal (arg0);
25937 rtx op1 = expand_normal (arg1);
25939 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
25940 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
25941 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
25942 enum rtx_code comparison = d->comparison;
25944 if (VECTOR_MODE_P (mode0))
25945 op0 = safe_vector_operand (op0, mode0);
25946 if (VECTOR_MODE_P (mode1))
25947 op1 = safe_vector_operand (op1, mode1);
25949 /* Swap operands if we have a comparison that isn't available in
25953 rtx tmp = gen_reg_rtx (mode1);
25954 emit_move_insn (tmp, op1);
25959 if (optimize || !target
25960 || GET_MODE (target) != tmode
25961 || !insn_data[d->icode].operand[0].predicate (target, tmode))
25962 target = gen_reg_rtx (tmode);
25964 if ((optimize && !register_operand (op0, mode0))
25965 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
25966 op0 = copy_to_mode_reg (mode0, op0);
25967 if ((optimize && !register_operand (op1, mode1))
25968 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
25969 op1 = copy_to_mode_reg (mode1, op1);
25971 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
25972 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
25979 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
25982 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
25986 tree arg0 = CALL_EXPR_ARG (exp, 0);
25987 tree arg1 = CALL_EXPR_ARG (exp, 1);
25988 rtx op0 = expand_normal (arg0);
25989 rtx op1 = expand_normal (arg1);
25990 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
25991 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
25992 enum rtx_code comparison = d->comparison;
25994 if (VECTOR_MODE_P (mode0))
25995 op0 = safe_vector_operand (op0, mode0);
25996 if (VECTOR_MODE_P (mode1))
25997 op1 = safe_vector_operand (op1, mode1);
25999 /* Swap operands if we have a comparison that isn't available in
26001 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
26008 target = gen_reg_rtx (SImode);
26009 emit_move_insn (target, const0_rtx);
26010 target = gen_rtx_SUBREG (QImode, target, 0);
26012 if ((optimize && !register_operand (op0, mode0))
26013 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26014 op0 = copy_to_mode_reg (mode0, op0);
26015 if ((optimize && !register_operand (op1, mode1))
26016 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26017 op1 = copy_to_mode_reg (mode1, op1);
26019 pat = GEN_FCN (d->icode) (op0, op1);
26023 emit_insn (gen_rtx_SET (VOIDmode,
26024 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26025 gen_rtx_fmt_ee (comparison, QImode,
26029 return SUBREG_REG (target);
26032 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
26035 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
26039 tree arg0 = CALL_EXPR_ARG (exp, 0);
26040 rtx op1, op0 = expand_normal (arg0);
26041 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26042 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26044 if (optimize || target == 0
26045 || GET_MODE (target) != tmode
26046 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26047 target = gen_reg_rtx (tmode);
26049 if (VECTOR_MODE_P (mode0))
26050 op0 = safe_vector_operand (op0, mode0);
26052 if ((optimize && !register_operand (op0, mode0))
26053 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26054 op0 = copy_to_mode_reg (mode0, op0);
26056 op1 = GEN_INT (d->comparison);
26058 pat = GEN_FCN (d->icode) (target, op0, op1);
26065 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
26068 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
26072 tree arg0 = CALL_EXPR_ARG (exp, 0);
26073 tree arg1 = CALL_EXPR_ARG (exp, 1);
26074 rtx op0 = expand_normal (arg0);
26075 rtx op1 = expand_normal (arg1);
26076 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26077 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26078 enum rtx_code comparison = d->comparison;
26080 if (VECTOR_MODE_P (mode0))
26081 op0 = safe_vector_operand (op0, mode0);
26082 if (VECTOR_MODE_P (mode1))
26083 op1 = safe_vector_operand (op1, mode1);
26085 target = gen_reg_rtx (SImode);
26086 emit_move_insn (target, const0_rtx);
26087 target = gen_rtx_SUBREG (QImode, target, 0);
26089 if ((optimize && !register_operand (op0, mode0))
26090 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26091 op0 = copy_to_mode_reg (mode0, op0);
26092 if ((optimize && !register_operand (op1, mode1))
26093 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26094 op1 = copy_to_mode_reg (mode1, op1);
26096 pat = GEN_FCN (d->icode) (op0, op1);
26100 emit_insn (gen_rtx_SET (VOIDmode,
26101 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26102 gen_rtx_fmt_ee (comparison, QImode,
26106 return SUBREG_REG (target);
26109 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
26112 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
26113 tree exp, rtx target)
26116 tree arg0 = CALL_EXPR_ARG (exp, 0);
26117 tree arg1 = CALL_EXPR_ARG (exp, 1);
26118 tree arg2 = CALL_EXPR_ARG (exp, 2);
26119 tree arg3 = CALL_EXPR_ARG (exp, 3);
26120 tree arg4 = CALL_EXPR_ARG (exp, 4);
26121 rtx scratch0, scratch1;
26122 rtx op0 = expand_normal (arg0);
26123 rtx op1 = expand_normal (arg1);
26124 rtx op2 = expand_normal (arg2);
26125 rtx op3 = expand_normal (arg3);
26126 rtx op4 = expand_normal (arg4);
26127 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
26129 tmode0 = insn_data[d->icode].operand[0].mode;
26130 tmode1 = insn_data[d->icode].operand[1].mode;
26131 modev2 = insn_data[d->icode].operand[2].mode;
26132 modei3 = insn_data[d->icode].operand[3].mode;
26133 modev4 = insn_data[d->icode].operand[4].mode;
26134 modei5 = insn_data[d->icode].operand[5].mode;
26135 modeimm = insn_data[d->icode].operand[6].mode;
26137 if (VECTOR_MODE_P (modev2))
26138 op0 = safe_vector_operand (op0, modev2);
26139 if (VECTOR_MODE_P (modev4))
26140 op2 = safe_vector_operand (op2, modev4);
26142 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26143 op0 = copy_to_mode_reg (modev2, op0);
26144 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
26145 op1 = copy_to_mode_reg (modei3, op1);
26146 if ((optimize && !register_operand (op2, modev4))
26147 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
26148 op2 = copy_to_mode_reg (modev4, op2);
26149 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
26150 op3 = copy_to_mode_reg (modei5, op3);
26152 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
26154 error ("the fifth argument must be an 8-bit immediate");
26158 if (d->code == IX86_BUILTIN_PCMPESTRI128)
26160 if (optimize || !target
26161 || GET_MODE (target) != tmode0
26162 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26163 target = gen_reg_rtx (tmode0);
26165 scratch1 = gen_reg_rtx (tmode1);
26167 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
26169 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
26171 if (optimize || !target
26172 || GET_MODE (target) != tmode1
26173 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26174 target = gen_reg_rtx (tmode1);
26176 scratch0 = gen_reg_rtx (tmode0);
26178 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
26182 gcc_assert (d->flag);
26184 scratch0 = gen_reg_rtx (tmode0);
26185 scratch1 = gen_reg_rtx (tmode1);
26187 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
26197 target = gen_reg_rtx (SImode);
26198 emit_move_insn (target, const0_rtx);
26199 target = gen_rtx_SUBREG (QImode, target, 0);
26202 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26203 gen_rtx_fmt_ee (EQ, QImode,
26204 gen_rtx_REG ((enum machine_mode) d->flag,
26207 return SUBREG_REG (target);
26214 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
26217 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
26218 tree exp, rtx target)
26221 tree arg0 = CALL_EXPR_ARG (exp, 0);
26222 tree arg1 = CALL_EXPR_ARG (exp, 1);
26223 tree arg2 = CALL_EXPR_ARG (exp, 2);
26224 rtx scratch0, scratch1;
26225 rtx op0 = expand_normal (arg0);
26226 rtx op1 = expand_normal (arg1);
26227 rtx op2 = expand_normal (arg2);
26228 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
26230 tmode0 = insn_data[d->icode].operand[0].mode;
26231 tmode1 = insn_data[d->icode].operand[1].mode;
26232 modev2 = insn_data[d->icode].operand[2].mode;
26233 modev3 = insn_data[d->icode].operand[3].mode;
26234 modeimm = insn_data[d->icode].operand[4].mode;
26236 if (VECTOR_MODE_P (modev2))
26237 op0 = safe_vector_operand (op0, modev2);
26238 if (VECTOR_MODE_P (modev3))
26239 op1 = safe_vector_operand (op1, modev3);
26241 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26242 op0 = copy_to_mode_reg (modev2, op0);
26243 if ((optimize && !register_operand (op1, modev3))
26244 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
26245 op1 = copy_to_mode_reg (modev3, op1);
26247 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
26249 error ("the third argument must be an 8-bit immediate");
26253 if (d->code == IX86_BUILTIN_PCMPISTRI128)
26255 if (optimize || !target
26256 || GET_MODE (target) != tmode0
26257 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26258 target = gen_reg_rtx (tmode0);
26260 scratch1 = gen_reg_rtx (tmode1);
26262 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26264 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26266 if (optimize || !target
26267 || GET_MODE (target) != tmode1
26268 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26269 target = gen_reg_rtx (tmode1);
26271 scratch0 = gen_reg_rtx (tmode0);
26273 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26277 gcc_assert (d->flag);
26279 scratch0 = gen_reg_rtx (tmode0);
26280 scratch1 = gen_reg_rtx (tmode1);
26282 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26292 target = gen_reg_rtx (SImode);
26293 emit_move_insn (target, const0_rtx);
26294 target = gen_rtx_SUBREG (QImode, target, 0);
26297 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26298 gen_rtx_fmt_ee (EQ, QImode,
26299 gen_rtx_REG ((enum machine_mode) d->flag,
26302 return SUBREG_REG (target);
26308 /* Subroutine of ix86_expand_builtin to take care of insns with
26309 variable number of operands. */
26312 ix86_expand_args_builtin (const struct builtin_description *d,
26313 tree exp, rtx target)
26315 rtx pat, real_target;
26316 unsigned int i, nargs;
26317 unsigned int nargs_constant = 0;
26318 int num_memory = 0;
26322 enum machine_mode mode;
26324 bool last_arg_count = false;
26325 enum insn_code icode = d->icode;
26326 const struct insn_data_d *insn_p = &insn_data[icode];
26327 enum machine_mode tmode = insn_p->operand[0].mode;
26328 enum machine_mode rmode = VOIDmode;
26330 enum rtx_code comparison = d->comparison;
26332 switch ((enum ix86_builtin_func_type) d->flag)
26334 case V2DF_FTYPE_V2DF_ROUND:
26335 case V4DF_FTYPE_V4DF_ROUND:
26336 case V4SF_FTYPE_V4SF_ROUND:
26337 case V8SF_FTYPE_V8SF_ROUND:
26338 return ix86_expand_sse_round (d, exp, target);
26339 case INT_FTYPE_V8SF_V8SF_PTEST:
26340 case INT_FTYPE_V4DI_V4DI_PTEST:
26341 case INT_FTYPE_V4DF_V4DF_PTEST:
26342 case INT_FTYPE_V4SF_V4SF_PTEST:
26343 case INT_FTYPE_V2DI_V2DI_PTEST:
26344 case INT_FTYPE_V2DF_V2DF_PTEST:
26345 return ix86_expand_sse_ptest (d, exp, target);
26346 case FLOAT128_FTYPE_FLOAT128:
26347 case FLOAT_FTYPE_FLOAT:
26348 case INT_FTYPE_INT:
26349 case UINT64_FTYPE_INT:
26350 case UINT16_FTYPE_UINT16:
26351 case INT64_FTYPE_INT64:
26352 case INT64_FTYPE_V4SF:
26353 case INT64_FTYPE_V2DF:
26354 case INT_FTYPE_V16QI:
26355 case INT_FTYPE_V8QI:
26356 case INT_FTYPE_V8SF:
26357 case INT_FTYPE_V4DF:
26358 case INT_FTYPE_V4SF:
26359 case INT_FTYPE_V2DF:
26360 case V16QI_FTYPE_V16QI:
26361 case V8SI_FTYPE_V8SF:
26362 case V8SI_FTYPE_V4SI:
26363 case V8HI_FTYPE_V8HI:
26364 case V8HI_FTYPE_V16QI:
26365 case V8QI_FTYPE_V8QI:
26366 case V8SF_FTYPE_V8SF:
26367 case V8SF_FTYPE_V8SI:
26368 case V8SF_FTYPE_V4SF:
26369 case V8SF_FTYPE_V8HI:
26370 case V4SI_FTYPE_V4SI:
26371 case V4SI_FTYPE_V16QI:
26372 case V4SI_FTYPE_V4SF:
26373 case V4SI_FTYPE_V8SI:
26374 case V4SI_FTYPE_V8HI:
26375 case V4SI_FTYPE_V4DF:
26376 case V4SI_FTYPE_V2DF:
26377 case V4HI_FTYPE_V4HI:
26378 case V4DF_FTYPE_V4DF:
26379 case V4DF_FTYPE_V4SI:
26380 case V4DF_FTYPE_V4SF:
26381 case V4DF_FTYPE_V2DF:
26382 case V4SF_FTYPE_V4SF:
26383 case V4SF_FTYPE_V4SI:
26384 case V4SF_FTYPE_V8SF:
26385 case V4SF_FTYPE_V4DF:
26386 case V4SF_FTYPE_V8HI:
26387 case V4SF_FTYPE_V2DF:
26388 case V2DI_FTYPE_V2DI:
26389 case V2DI_FTYPE_V16QI:
26390 case V2DI_FTYPE_V8HI:
26391 case V2DI_FTYPE_V4SI:
26392 case V2DF_FTYPE_V2DF:
26393 case V2DF_FTYPE_V4SI:
26394 case V2DF_FTYPE_V4DF:
26395 case V2DF_FTYPE_V4SF:
26396 case V2DF_FTYPE_V2SI:
26397 case V2SI_FTYPE_V2SI:
26398 case V2SI_FTYPE_V4SF:
26399 case V2SI_FTYPE_V2SF:
26400 case V2SI_FTYPE_V2DF:
26401 case V2SF_FTYPE_V2SF:
26402 case V2SF_FTYPE_V2SI:
26405 case V4SF_FTYPE_V4SF_VEC_MERGE:
26406 case V2DF_FTYPE_V2DF_VEC_MERGE:
26407 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26408 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26409 case V16QI_FTYPE_V16QI_V16QI:
26410 case V16QI_FTYPE_V8HI_V8HI:
26411 case V8QI_FTYPE_V8QI_V8QI:
26412 case V8QI_FTYPE_V4HI_V4HI:
26413 case V8HI_FTYPE_V8HI_V8HI:
26414 case V8HI_FTYPE_V16QI_V16QI:
26415 case V8HI_FTYPE_V4SI_V4SI:
26416 case V8SF_FTYPE_V8SF_V8SF:
26417 case V8SF_FTYPE_V8SF_V8SI:
26418 case V4SI_FTYPE_V4SI_V4SI:
26419 case V4SI_FTYPE_V8HI_V8HI:
26420 case V4SI_FTYPE_V4SF_V4SF:
26421 case V4SI_FTYPE_V2DF_V2DF:
26422 case V4HI_FTYPE_V4HI_V4HI:
26423 case V4HI_FTYPE_V8QI_V8QI:
26424 case V4HI_FTYPE_V2SI_V2SI:
26425 case V4DF_FTYPE_V4DF_V4DF:
26426 case V4DF_FTYPE_V4DF_V4DI:
26427 case V4SF_FTYPE_V4SF_V4SF:
26428 case V4SF_FTYPE_V4SF_V4SI:
26429 case V4SF_FTYPE_V4SF_V2SI:
26430 case V4SF_FTYPE_V4SF_V2DF:
26431 case V4SF_FTYPE_V4SF_DI:
26432 case V4SF_FTYPE_V4SF_SI:
26433 case V2DI_FTYPE_V2DI_V2DI:
26434 case V2DI_FTYPE_V16QI_V16QI:
26435 case V2DI_FTYPE_V4SI_V4SI:
26436 case V2DI_FTYPE_V2DI_V16QI:
26437 case V2DI_FTYPE_V2DF_V2DF:
26438 case V2SI_FTYPE_V2SI_V2SI:
26439 case V2SI_FTYPE_V4HI_V4HI:
26440 case V2SI_FTYPE_V2SF_V2SF:
26441 case V2DF_FTYPE_V2DF_V2DF:
26442 case V2DF_FTYPE_V2DF_V4SF:
26443 case V2DF_FTYPE_V2DF_V2DI:
26444 case V2DF_FTYPE_V2DF_DI:
26445 case V2DF_FTYPE_V2DF_SI:
26446 case V2SF_FTYPE_V2SF_V2SF:
26447 case V1DI_FTYPE_V1DI_V1DI:
26448 case V1DI_FTYPE_V8QI_V8QI:
26449 case V1DI_FTYPE_V2SI_V2SI:
26450 if (comparison == UNKNOWN)
26451 return ix86_expand_binop_builtin (icode, exp, target);
26454 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26455 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26456 gcc_assert (comparison != UNKNOWN);
26460 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26461 case V8HI_FTYPE_V8HI_SI_COUNT:
26462 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26463 case V4SI_FTYPE_V4SI_SI_COUNT:
26464 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26465 case V4HI_FTYPE_V4HI_SI_COUNT:
26466 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26467 case V2DI_FTYPE_V2DI_SI_COUNT:
26468 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26469 case V2SI_FTYPE_V2SI_SI_COUNT:
26470 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26471 case V1DI_FTYPE_V1DI_SI_COUNT:
26473 last_arg_count = true;
26475 case UINT64_FTYPE_UINT64_UINT64:
26476 case UINT_FTYPE_UINT_UINT:
26477 case UINT_FTYPE_UINT_USHORT:
26478 case UINT_FTYPE_UINT_UCHAR:
26479 case UINT16_FTYPE_UINT16_INT:
26480 case UINT8_FTYPE_UINT8_INT:
26483 case V2DI_FTYPE_V2DI_INT_CONVERT:
26486 nargs_constant = 1;
26488 case V8HI_FTYPE_V8HI_INT:
26489 case V8HI_FTYPE_V8SF_INT:
26490 case V8HI_FTYPE_V4SF_INT:
26491 case V8SF_FTYPE_V8SF_INT:
26492 case V4SI_FTYPE_V4SI_INT:
26493 case V4SI_FTYPE_V8SI_INT:
26494 case V4HI_FTYPE_V4HI_INT:
26495 case V4DF_FTYPE_V4DF_INT:
26496 case V4SF_FTYPE_V4SF_INT:
26497 case V4SF_FTYPE_V8SF_INT:
26498 case V2DI_FTYPE_V2DI_INT:
26499 case V2DF_FTYPE_V2DF_INT:
26500 case V2DF_FTYPE_V4DF_INT:
26502 nargs_constant = 1;
26504 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26505 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26506 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26507 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26508 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26511 case V16QI_FTYPE_V16QI_V16QI_INT:
26512 case V8HI_FTYPE_V8HI_V8HI_INT:
26513 case V8SI_FTYPE_V8SI_V8SI_INT:
26514 case V8SI_FTYPE_V8SI_V4SI_INT:
26515 case V8SF_FTYPE_V8SF_V8SF_INT:
26516 case V8SF_FTYPE_V8SF_V4SF_INT:
26517 case V4SI_FTYPE_V4SI_V4SI_INT:
26518 case V4DF_FTYPE_V4DF_V4DF_INT:
26519 case V4DF_FTYPE_V4DF_V2DF_INT:
26520 case V4SF_FTYPE_V4SF_V4SF_INT:
26521 case V2DI_FTYPE_V2DI_V2DI_INT:
26522 case V2DF_FTYPE_V2DF_V2DF_INT:
26524 nargs_constant = 1;
26526 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
26529 nargs_constant = 1;
26531 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
26534 nargs_constant = 1;
26536 case V2DI_FTYPE_V2DI_UINT_UINT:
26538 nargs_constant = 2;
26540 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
26541 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
26542 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
26543 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
26545 nargs_constant = 1;
26547 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
26549 nargs_constant = 2;
26552 gcc_unreachable ();
26555 gcc_assert (nargs <= ARRAY_SIZE (args));
26557 if (comparison != UNKNOWN)
26559 gcc_assert (nargs == 2);
26560 return ix86_expand_sse_compare (d, exp, target, swap);
26563 if (rmode == VOIDmode || rmode == tmode)
26567 || GET_MODE (target) != tmode
26568 || !insn_p->operand[0].predicate (target, tmode))
26569 target = gen_reg_rtx (tmode);
26570 real_target = target;
26574 target = gen_reg_rtx (rmode);
26575 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
26578 for (i = 0; i < nargs; i++)
26580 tree arg = CALL_EXPR_ARG (exp, i);
26581 rtx op = expand_normal (arg);
26582 enum machine_mode mode = insn_p->operand[i + 1].mode;
26583 bool match = insn_p->operand[i + 1].predicate (op, mode);
26585 if (last_arg_count && (i + 1) == nargs)
26587 /* SIMD shift insns take either an 8-bit immediate or
26588 register as count. But builtin functions take int as
26589 count. If count doesn't match, we put it in register. */
26592 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
26593 if (!insn_p->operand[i + 1].predicate (op, mode))
26594 op = copy_to_reg (op);
26597 else if ((nargs - i) <= nargs_constant)
26602 case CODE_FOR_sse4_1_roundpd:
26603 case CODE_FOR_sse4_1_roundps:
26604 case CODE_FOR_sse4_1_roundsd:
26605 case CODE_FOR_sse4_1_roundss:
26606 case CODE_FOR_sse4_1_blendps:
26607 case CODE_FOR_avx_blendpd256:
26608 case CODE_FOR_avx_vpermilv4df:
26609 case CODE_FOR_avx_roundpd256:
26610 case CODE_FOR_avx_roundps256:
26611 error ("the last argument must be a 4-bit immediate");
26614 case CODE_FOR_sse4_1_blendpd:
26615 case CODE_FOR_avx_vpermilv2df:
26616 case CODE_FOR_xop_vpermil2v2df3:
26617 case CODE_FOR_xop_vpermil2v4sf3:
26618 case CODE_FOR_xop_vpermil2v4df3:
26619 case CODE_FOR_xop_vpermil2v8sf3:
26620 error ("the last argument must be a 2-bit immediate");
26623 case CODE_FOR_avx_vextractf128v4df:
26624 case CODE_FOR_avx_vextractf128v8sf:
26625 case CODE_FOR_avx_vextractf128v8si:
26626 case CODE_FOR_avx_vinsertf128v4df:
26627 case CODE_FOR_avx_vinsertf128v8sf:
26628 case CODE_FOR_avx_vinsertf128v8si:
26629 error ("the last argument must be a 1-bit immediate");
26632 case CODE_FOR_avx_vmcmpv2df3:
26633 case CODE_FOR_avx_vmcmpv4sf3:
26634 case CODE_FOR_avx_cmpv2df3:
26635 case CODE_FOR_avx_cmpv4sf3:
26636 case CODE_FOR_avx_cmpv4df3:
26637 case CODE_FOR_avx_cmpv8sf3:
26638 error ("the last argument must be a 5-bit immediate");
26642 switch (nargs_constant)
26645 if ((nargs - i) == nargs_constant)
26647 error ("the next to last argument must be an 8-bit immediate");
26651 error ("the last argument must be an 8-bit immediate");
26654 gcc_unreachable ();
26661 if (VECTOR_MODE_P (mode))
26662 op = safe_vector_operand (op, mode);
26664 /* If we aren't optimizing, only allow one memory operand to
26666 if (memory_operand (op, mode))
26669 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
26671 if (optimize || !match || num_memory > 1)
26672 op = copy_to_mode_reg (mode, op);
26676 op = copy_to_reg (op);
26677 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
26682 args[i].mode = mode;
26688 pat = GEN_FCN (icode) (real_target, args[0].op);
26691 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
26694 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26698 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26699 args[2].op, args[3].op);
26702 gcc_unreachable ();
26712 /* Subroutine of ix86_expand_builtin to take care of special insns
26713 with variable number of operands. */
26716 ix86_expand_special_args_builtin (const struct builtin_description *d,
26717 tree exp, rtx target)
26721 unsigned int i, nargs, arg_adjust, memory;
26725 enum machine_mode mode;
26727 enum insn_code icode = d->icode;
26728 bool last_arg_constant = false;
26729 const struct insn_data_d *insn_p = &insn_data[icode];
26730 enum machine_mode tmode = insn_p->operand[0].mode;
26731 enum { load, store } klass;
26733 switch ((enum ix86_builtin_func_type) d->flag)
26735 case VOID_FTYPE_VOID:
26736 if (icode == CODE_FOR_avx_vzeroupper)
26737 target = GEN_INT (vzeroupper_intrinsic);
26738 emit_insn (GEN_FCN (icode) (target));
26740 case VOID_FTYPE_UINT64:
26741 case VOID_FTYPE_UNSIGNED:
26747 case UINT64_FTYPE_VOID:
26748 case UNSIGNED_FTYPE_VOID:
26753 case UINT64_FTYPE_PUNSIGNED:
26754 case V2DI_FTYPE_PV2DI:
26755 case V32QI_FTYPE_PCCHAR:
26756 case V16QI_FTYPE_PCCHAR:
26757 case V8SF_FTYPE_PCV4SF:
26758 case V8SF_FTYPE_PCFLOAT:
26759 case V4SF_FTYPE_PCFLOAT:
26760 case V4DF_FTYPE_PCV2DF:
26761 case V4DF_FTYPE_PCDOUBLE:
26762 case V2DF_FTYPE_PCDOUBLE:
26763 case VOID_FTYPE_PVOID:
26768 case VOID_FTYPE_PV2SF_V4SF:
26769 case VOID_FTYPE_PV4DI_V4DI:
26770 case VOID_FTYPE_PV2DI_V2DI:
26771 case VOID_FTYPE_PCHAR_V32QI:
26772 case VOID_FTYPE_PCHAR_V16QI:
26773 case VOID_FTYPE_PFLOAT_V8SF:
26774 case VOID_FTYPE_PFLOAT_V4SF:
26775 case VOID_FTYPE_PDOUBLE_V4DF:
26776 case VOID_FTYPE_PDOUBLE_V2DF:
26777 case VOID_FTYPE_PULONGLONG_ULONGLONG:
26778 case VOID_FTYPE_PINT_INT:
26781 /* Reserve memory operand for target. */
26782 memory = ARRAY_SIZE (args);
26784 case V4SF_FTYPE_V4SF_PCV2SF:
26785 case V2DF_FTYPE_V2DF_PCDOUBLE:
26790 case V8SF_FTYPE_PCV8SF_V8SI:
26791 case V4DF_FTYPE_PCV4DF_V4DI:
26792 case V4SF_FTYPE_PCV4SF_V4SI:
26793 case V2DF_FTYPE_PCV2DF_V2DI:
26798 case VOID_FTYPE_PV8SF_V8SI_V8SF:
26799 case VOID_FTYPE_PV4DF_V4DI_V4DF:
26800 case VOID_FTYPE_PV4SF_V4SI_V4SF:
26801 case VOID_FTYPE_PV2DF_V2DI_V2DF:
26804 /* Reserve memory operand for target. */
26805 memory = ARRAY_SIZE (args);
26807 case VOID_FTYPE_UINT_UINT_UINT:
26808 case VOID_FTYPE_UINT64_UINT_UINT:
26809 case UCHAR_FTYPE_UINT_UINT_UINT:
26810 case UCHAR_FTYPE_UINT64_UINT_UINT:
26813 memory = ARRAY_SIZE (args);
26814 last_arg_constant = true;
26817 gcc_unreachable ();
26820 gcc_assert (nargs <= ARRAY_SIZE (args));
26822 if (klass == store)
26824 arg = CALL_EXPR_ARG (exp, 0);
26825 op = expand_normal (arg);
26826 gcc_assert (target == 0);
26829 if (GET_MODE (op) != Pmode)
26830 op = convert_to_mode (Pmode, op, 1);
26831 target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
26834 target = force_reg (tmode, op);
26842 || GET_MODE (target) != tmode
26843 || !insn_p->operand[0].predicate (target, tmode))
26844 target = gen_reg_rtx (tmode);
26847 for (i = 0; i < nargs; i++)
26849 enum machine_mode mode = insn_p->operand[i + 1].mode;
26852 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
26853 op = expand_normal (arg);
26854 match = insn_p->operand[i + 1].predicate (op, mode);
26856 if (last_arg_constant && (i + 1) == nargs)
26860 if (icode == CODE_FOR_lwp_lwpvalsi3
26861 || icode == CODE_FOR_lwp_lwpinssi3
26862 || icode == CODE_FOR_lwp_lwpvaldi3
26863 || icode == CODE_FOR_lwp_lwpinsdi3)
26864 error ("the last argument must be a 32-bit immediate");
26866 error ("the last argument must be an 8-bit immediate");
26874 /* This must be the memory operand. */
26875 if (GET_MODE (op) != Pmode)
26876 op = convert_to_mode (Pmode, op, 1);
26877 op = gen_rtx_MEM (mode, force_reg (Pmode, op));
26878 gcc_assert (GET_MODE (op) == mode
26879 || GET_MODE (op) == VOIDmode);
26883 /* This must be register. */
26884 if (VECTOR_MODE_P (mode))
26885 op = safe_vector_operand (op, mode);
26887 gcc_assert (GET_MODE (op) == mode
26888 || GET_MODE (op) == VOIDmode);
26889 op = copy_to_mode_reg (mode, op);
26894 args[i].mode = mode;
26900 pat = GEN_FCN (icode) (target);
26903 pat = GEN_FCN (icode) (target, args[0].op);
26906 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26909 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26912 gcc_unreachable ();
26918 return klass == store ? 0 : target;
26921 /* Return the integer constant in ARG. Constrain it to be in the range
26922 of the subparts of VEC_TYPE; issue an error if not. */
26925 get_element_number (tree vec_type, tree arg)
26927 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
26929 if (!host_integerp (arg, 1)
26930 || (elt = tree_low_cst (arg, 1), elt > max))
26932 error ("selector must be an integer constant in the range 0..%wi", max);
26939 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26940 ix86_expand_vector_init. We DO have language-level syntax for this, in
26941 the form of (type){ init-list }. Except that since we can't place emms
26942 instructions from inside the compiler, we can't allow the use of MMX
26943 registers unless the user explicitly asks for it. So we do *not* define
26944 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
26945 we have builtins invoked by mmintrin.h that gives us license to emit
26946 these sorts of instructions. */
26949 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
26951 enum machine_mode tmode = TYPE_MODE (type);
26952 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
26953 int i, n_elt = GET_MODE_NUNITS (tmode);
26954 rtvec v = rtvec_alloc (n_elt);
26956 gcc_assert (VECTOR_MODE_P (tmode));
26957 gcc_assert (call_expr_nargs (exp) == n_elt);
26959 for (i = 0; i < n_elt; ++i)
26961 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
26962 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
26965 if (!target || !register_operand (target, tmode))
26966 target = gen_reg_rtx (tmode);
26968 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
26972 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26973 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
26974 had a language-level syntax for referencing vector elements. */
26977 ix86_expand_vec_ext_builtin (tree exp, rtx target)
26979 enum machine_mode tmode, mode0;
26984 arg0 = CALL_EXPR_ARG (exp, 0);
26985 arg1 = CALL_EXPR_ARG (exp, 1);
26987 op0 = expand_normal (arg0);
26988 elt = get_element_number (TREE_TYPE (arg0), arg1);
26990 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26991 mode0 = TYPE_MODE (TREE_TYPE (arg0));
26992 gcc_assert (VECTOR_MODE_P (mode0));
26994 op0 = force_reg (mode0, op0);
26996 if (optimize || !target || !register_operand (target, tmode))
26997 target = gen_reg_rtx (tmode);
26999 ix86_expand_vector_extract (true, target, op0, elt);
27004 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27005 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
27006 a language-level syntax for referencing vector elements. */
27009 ix86_expand_vec_set_builtin (tree exp)
27011 enum machine_mode tmode, mode1;
27012 tree arg0, arg1, arg2;
27014 rtx op0, op1, target;
27016 arg0 = CALL_EXPR_ARG (exp, 0);
27017 arg1 = CALL_EXPR_ARG (exp, 1);
27018 arg2 = CALL_EXPR_ARG (exp, 2);
27020 tmode = TYPE_MODE (TREE_TYPE (arg0));
27021 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27022 gcc_assert (VECTOR_MODE_P (tmode));
27024 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
27025 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
27026 elt = get_element_number (TREE_TYPE (arg0), arg2);
27028 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
27029 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
27031 op0 = force_reg (tmode, op0);
27032 op1 = force_reg (mode1, op1);
27034 /* OP0 is the source of these builtin functions and shouldn't be
27035 modified. Create a copy, use it and return it as target. */
27036 target = gen_reg_rtx (tmode);
27037 emit_move_insn (target, op0);
27038 ix86_expand_vector_set (true, target, op1, elt);
27043 /* Expand an expression EXP that calls a built-in function,
27044 with result going to TARGET if that's convenient
27045 (and in mode MODE if that's convenient).
27046 SUBTARGET may be used as the target for computing one of EXP's operands.
27047 IGNORE is nonzero if the value is to be ignored. */
27050 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
27051 enum machine_mode mode ATTRIBUTE_UNUSED,
27052 int ignore ATTRIBUTE_UNUSED)
27054 const struct builtin_description *d;
27056 enum insn_code icode;
27057 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
27058 tree arg0, arg1, arg2;
27059 rtx op0, op1, op2, pat;
27060 enum machine_mode mode0, mode1, mode2;
27061 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
27063 /* Determine whether the builtin function is available under the current ISA.
27064 Originally the builtin was not created if it wasn't applicable to the
27065 current ISA based on the command line switches. With function specific
27066 options, we need to check in the context of the function making the call
27067 whether it is supported. */
27068 if (ix86_builtins_isa[fcode].isa
27069 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
27071 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
27072 NULL, (enum fpmath_unit) 0, false);
27075 error ("%qE needs unknown isa option", fndecl);
27078 gcc_assert (opts != NULL);
27079 error ("%qE needs isa option %s", fndecl, opts);
27087 case IX86_BUILTIN_MASKMOVQ:
27088 case IX86_BUILTIN_MASKMOVDQU:
27089 icode = (fcode == IX86_BUILTIN_MASKMOVQ
27090 ? CODE_FOR_mmx_maskmovq
27091 : CODE_FOR_sse2_maskmovdqu);
27092 /* Note the arg order is different from the operand order. */
27093 arg1 = CALL_EXPR_ARG (exp, 0);
27094 arg2 = CALL_EXPR_ARG (exp, 1);
27095 arg0 = CALL_EXPR_ARG (exp, 2);
27096 op0 = expand_normal (arg0);
27097 op1 = expand_normal (arg1);
27098 op2 = expand_normal (arg2);
27099 mode0 = insn_data[icode].operand[0].mode;
27100 mode1 = insn_data[icode].operand[1].mode;
27101 mode2 = insn_data[icode].operand[2].mode;
27103 if (GET_MODE (op0) != Pmode)
27104 op0 = convert_to_mode (Pmode, op0, 1);
27105 op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
27107 if (!insn_data[icode].operand[0].predicate (op0, mode0))
27108 op0 = copy_to_mode_reg (mode0, op0);
27109 if (!insn_data[icode].operand[1].predicate (op1, mode1))
27110 op1 = copy_to_mode_reg (mode1, op1);
27111 if (!insn_data[icode].operand[2].predicate (op2, mode2))
27112 op2 = copy_to_mode_reg (mode2, op2);
27113 pat = GEN_FCN (icode) (op0, op1, op2);
27119 case IX86_BUILTIN_LDMXCSR:
27120 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
27121 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27122 emit_move_insn (target, op0);
27123 emit_insn (gen_sse_ldmxcsr (target));
27126 case IX86_BUILTIN_STMXCSR:
27127 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27128 emit_insn (gen_sse_stmxcsr (target));
27129 return copy_to_mode_reg (SImode, target);
27131 case IX86_BUILTIN_CLFLUSH:
27132 arg0 = CALL_EXPR_ARG (exp, 0);
27133 op0 = expand_normal (arg0);
27134 icode = CODE_FOR_sse2_clflush;
27135 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27137 if (GET_MODE (op0) != Pmode)
27138 op0 = convert_to_mode (Pmode, op0, 1);
27139 op0 = force_reg (Pmode, op0);
27142 emit_insn (gen_sse2_clflush (op0));
27145 case IX86_BUILTIN_MONITOR:
27146 arg0 = CALL_EXPR_ARG (exp, 0);
27147 arg1 = CALL_EXPR_ARG (exp, 1);
27148 arg2 = CALL_EXPR_ARG (exp, 2);
27149 op0 = expand_normal (arg0);
27150 op1 = expand_normal (arg1);
27151 op2 = expand_normal (arg2);
27154 if (GET_MODE (op0) != Pmode)
27155 op0 = convert_to_mode (Pmode, op0, 1);
27156 op0 = force_reg (Pmode, op0);
27159 op1 = copy_to_mode_reg (SImode, op1);
27161 op2 = copy_to_mode_reg (SImode, op2);
27162 emit_insn (ix86_gen_monitor (op0, op1, op2));
27165 case IX86_BUILTIN_MWAIT:
27166 arg0 = CALL_EXPR_ARG (exp, 0);
27167 arg1 = CALL_EXPR_ARG (exp, 1);
27168 op0 = expand_normal (arg0);
27169 op1 = expand_normal (arg1);
27171 op0 = copy_to_mode_reg (SImode, op0);
27173 op1 = copy_to_mode_reg (SImode, op1);
27174 emit_insn (gen_sse3_mwait (op0, op1));
27177 case IX86_BUILTIN_VEC_INIT_V2SI:
27178 case IX86_BUILTIN_VEC_INIT_V4HI:
27179 case IX86_BUILTIN_VEC_INIT_V8QI:
27180 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
27182 case IX86_BUILTIN_VEC_EXT_V2DF:
27183 case IX86_BUILTIN_VEC_EXT_V2DI:
27184 case IX86_BUILTIN_VEC_EXT_V4SF:
27185 case IX86_BUILTIN_VEC_EXT_V4SI:
27186 case IX86_BUILTIN_VEC_EXT_V8HI:
27187 case IX86_BUILTIN_VEC_EXT_V2SI:
27188 case IX86_BUILTIN_VEC_EXT_V4HI:
27189 case IX86_BUILTIN_VEC_EXT_V16QI:
27190 return ix86_expand_vec_ext_builtin (exp, target);
27192 case IX86_BUILTIN_VEC_SET_V2DI:
27193 case IX86_BUILTIN_VEC_SET_V4SF:
27194 case IX86_BUILTIN_VEC_SET_V4SI:
27195 case IX86_BUILTIN_VEC_SET_V8HI:
27196 case IX86_BUILTIN_VEC_SET_V4HI:
27197 case IX86_BUILTIN_VEC_SET_V16QI:
27198 return ix86_expand_vec_set_builtin (exp);
27200 case IX86_BUILTIN_VEC_PERM_V2DF:
27201 case IX86_BUILTIN_VEC_PERM_V4SF:
27202 case IX86_BUILTIN_VEC_PERM_V2DI:
27203 case IX86_BUILTIN_VEC_PERM_V4SI:
27204 case IX86_BUILTIN_VEC_PERM_V8HI:
27205 case IX86_BUILTIN_VEC_PERM_V16QI:
27206 case IX86_BUILTIN_VEC_PERM_V2DI_U:
27207 case IX86_BUILTIN_VEC_PERM_V4SI_U:
27208 case IX86_BUILTIN_VEC_PERM_V8HI_U:
27209 case IX86_BUILTIN_VEC_PERM_V16QI_U:
27210 case IX86_BUILTIN_VEC_PERM_V4DF:
27211 case IX86_BUILTIN_VEC_PERM_V8SF:
27212 return ix86_expand_vec_perm_builtin (exp);
27214 case IX86_BUILTIN_INFQ:
27215 case IX86_BUILTIN_HUGE_VALQ:
27217 REAL_VALUE_TYPE inf;
27221 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
27223 tmp = validize_mem (force_const_mem (mode, tmp));
27226 target = gen_reg_rtx (mode);
27228 emit_move_insn (target, tmp);
27232 case IX86_BUILTIN_LLWPCB:
27233 arg0 = CALL_EXPR_ARG (exp, 0);
27234 op0 = expand_normal (arg0);
27235 icode = CODE_FOR_lwp_llwpcb;
27236 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27238 if (GET_MODE (op0) != Pmode)
27239 op0 = convert_to_mode (Pmode, op0, 1);
27240 op0 = force_reg (Pmode, op0);
27242 emit_insn (gen_lwp_llwpcb (op0));
27245 case IX86_BUILTIN_SLWPCB:
27246 icode = CODE_FOR_lwp_slwpcb;
27248 || !insn_data[icode].operand[0].predicate (target, Pmode))
27249 target = gen_reg_rtx (Pmode);
27250 emit_insn (gen_lwp_slwpcb (target));
27253 case IX86_BUILTIN_BEXTRI32:
27254 case IX86_BUILTIN_BEXTRI64:
27255 arg0 = CALL_EXPR_ARG (exp, 0);
27256 arg1 = CALL_EXPR_ARG (exp, 1);
27257 op0 = expand_normal (arg0);
27258 op1 = expand_normal (arg1);
27259 icode = (fcode == IX86_BUILTIN_BEXTRI32
27260 ? CODE_FOR_tbm_bextri_si
27261 : CODE_FOR_tbm_bextri_di);
27262 if (!CONST_INT_P (op1))
27264 error ("last argument must be an immediate");
27269 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
27270 unsigned char lsb_index = INTVAL (op1) & 0xFF;
27271 op1 = GEN_INT (length);
27272 op2 = GEN_INT (lsb_index);
27273 pat = GEN_FCN (icode) (target, op0, op1, op2);
27279 case IX86_BUILTIN_RDRAND16_STEP:
27280 icode = CODE_FOR_rdrandhi_1;
27284 case IX86_BUILTIN_RDRAND32_STEP:
27285 icode = CODE_FOR_rdrandsi_1;
27289 case IX86_BUILTIN_RDRAND64_STEP:
27290 icode = CODE_FOR_rdranddi_1;
27294 op0 = gen_reg_rtx (mode0);
27295 emit_insn (GEN_FCN (icode) (op0));
27297 arg0 = CALL_EXPR_ARG (exp, 0);
27298 op1 = expand_normal (arg0);
27299 if (!address_operand (op1, VOIDmode))
27301 op1 = convert_memory_address (Pmode, op1);
27302 op1 = copy_addr_to_reg (op1);
27304 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
27306 op1 = gen_reg_rtx (SImode);
27307 emit_move_insn (op1, CONST1_RTX (SImode));
27309 /* Emit SImode conditional move. */
27310 if (mode0 == HImode)
27312 op2 = gen_reg_rtx (SImode);
27313 emit_insn (gen_zero_extendhisi2 (op2, op0));
27315 else if (mode0 == SImode)
27318 op2 = gen_rtx_SUBREG (SImode, op0, 0);
27321 target = gen_reg_rtx (SImode);
27323 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
27325 emit_insn (gen_rtx_SET (VOIDmode, target,
27326 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
27333 for (i = 0, d = bdesc_special_args;
27334 i < ARRAY_SIZE (bdesc_special_args);
27336 if (d->code == fcode)
27337 return ix86_expand_special_args_builtin (d, exp, target);
27339 for (i = 0, d = bdesc_args;
27340 i < ARRAY_SIZE (bdesc_args);
27342 if (d->code == fcode)
27345 case IX86_BUILTIN_FABSQ:
27346 case IX86_BUILTIN_COPYSIGNQ:
27348 /* Emit a normal call if SSE2 isn't available. */
27349 return expand_call (exp, target, ignore);
27351 return ix86_expand_args_builtin (d, exp, target);
27354 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27355 if (d->code == fcode)
27356 return ix86_expand_sse_comi (d, exp, target);
27358 for (i = 0, d = bdesc_pcmpestr;
27359 i < ARRAY_SIZE (bdesc_pcmpestr);
27361 if (d->code == fcode)
27362 return ix86_expand_sse_pcmpestr (d, exp, target);
27364 for (i = 0, d = bdesc_pcmpistr;
27365 i < ARRAY_SIZE (bdesc_pcmpistr);
27367 if (d->code == fcode)
27368 return ix86_expand_sse_pcmpistr (d, exp, target);
27370 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27371 if (d->code == fcode)
27372 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27373 (enum ix86_builtin_func_type)
27374 d->flag, d->comparison);
27376 gcc_unreachable ();
27379 /* Returns a function decl for a vectorized version of the builtin function
27380 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27381 if it is not available. */
27384 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27387 enum machine_mode in_mode, out_mode;
27389 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27391 if (TREE_CODE (type_out) != VECTOR_TYPE
27392 || TREE_CODE (type_in) != VECTOR_TYPE
27393 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27396 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27397 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27398 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27399 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27403 case BUILT_IN_SQRT:
27404 if (out_mode == DFmode && in_mode == DFmode)
27406 if (out_n == 2 && in_n == 2)
27407 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27408 else if (out_n == 4 && in_n == 4)
27409 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
27413 case BUILT_IN_SQRTF:
27414 if (out_mode == SFmode && in_mode == SFmode)
27416 if (out_n == 4 && in_n == 4)
27417 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
27418 else if (out_n == 8 && in_n == 8)
27419 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
27423 case BUILT_IN_LRINT:
27424 if (out_mode == SImode && out_n == 4
27425 && in_mode == DFmode && in_n == 2)
27426 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
27429 case BUILT_IN_LRINTF:
27430 if (out_mode == SImode && in_mode == SFmode)
27432 if (out_n == 4 && in_n == 4)
27433 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
27434 else if (out_n == 8 && in_n == 8)
27435 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
27439 case BUILT_IN_COPYSIGN:
27440 if (out_mode == DFmode && in_mode == DFmode)
27442 if (out_n == 2 && in_n == 2)
27443 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
27444 else if (out_n == 4 && in_n == 4)
27445 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
27449 case BUILT_IN_COPYSIGNF:
27450 if (out_mode == SFmode && in_mode == SFmode)
27452 if (out_n == 4 && in_n == 4)
27453 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
27454 else if (out_n == 8 && in_n == 8)
27455 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
27459 case BUILT_IN_FLOOR:
27460 /* The round insn does not trap on denormals. */
27461 if (flag_trapping_math || !TARGET_ROUND)
27464 if (out_mode == DFmode && in_mode == DFmode)
27466 if (out_n == 2 && in_n == 2)
27467 return ix86_builtins[IX86_BUILTIN_FLOORPD];
27468 else if (out_n == 4 && in_n == 4)
27469 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
27473 case BUILT_IN_FLOORF:
27474 /* The round insn does not trap on denormals. */
27475 if (flag_trapping_math || !TARGET_ROUND)
27478 if (out_mode == SFmode && in_mode == SFmode)
27480 if (out_n == 4 && in_n == 4)
27481 return ix86_builtins[IX86_BUILTIN_FLOORPS];
27482 else if (out_n == 8 && in_n == 8)
27483 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
27487 case BUILT_IN_CEIL:
27488 /* The round insn does not trap on denormals. */
27489 if (flag_trapping_math || !TARGET_ROUND)
27492 if (out_mode == DFmode && in_mode == DFmode)
27494 if (out_n == 2 && in_n == 2)
27495 return ix86_builtins[IX86_BUILTIN_CEILPD];
27496 else if (out_n == 4 && in_n == 4)
27497 return ix86_builtins[IX86_BUILTIN_CEILPD256];
27501 case BUILT_IN_CEILF:
27502 /* The round insn does not trap on denormals. */
27503 if (flag_trapping_math || !TARGET_ROUND)
27506 if (out_mode == SFmode && in_mode == SFmode)
27508 if (out_n == 4 && in_n == 4)
27509 return ix86_builtins[IX86_BUILTIN_CEILPS];
27510 else if (out_n == 8 && in_n == 8)
27511 return ix86_builtins[IX86_BUILTIN_CEILPS256];
27515 case BUILT_IN_TRUNC:
27516 /* The round insn does not trap on denormals. */
27517 if (flag_trapping_math || !TARGET_ROUND)
27520 if (out_mode == DFmode && in_mode == DFmode)
27522 if (out_n == 2 && in_n == 2)
27523 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
27524 else if (out_n == 4 && in_n == 4)
27525 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
27529 case BUILT_IN_TRUNCF:
27530 /* The round insn does not trap on denormals. */
27531 if (flag_trapping_math || !TARGET_ROUND)
27534 if (out_mode == SFmode && in_mode == SFmode)
27536 if (out_n == 4 && in_n == 4)
27537 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
27538 else if (out_n == 8 && in_n == 8)
27539 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
27543 case BUILT_IN_RINT:
27544 /* The round insn does not trap on denormals. */
27545 if (flag_trapping_math || !TARGET_ROUND)
27548 if (out_mode == DFmode && in_mode == DFmode)
27550 if (out_n == 2 && in_n == 2)
27551 return ix86_builtins[IX86_BUILTIN_RINTPD];
27552 else if (out_n == 4 && in_n == 4)
27553 return ix86_builtins[IX86_BUILTIN_RINTPD256];
27557 case BUILT_IN_RINTF:
27558 /* The round insn does not trap on denormals. */
27559 if (flag_trapping_math || !TARGET_ROUND)
27562 if (out_mode == SFmode && in_mode == SFmode)
27564 if (out_n == 4 && in_n == 4)
27565 return ix86_builtins[IX86_BUILTIN_RINTPS];
27566 else if (out_n == 8 && in_n == 8)
27567 return ix86_builtins[IX86_BUILTIN_RINTPS256];
27572 if (out_mode == DFmode && in_mode == DFmode)
27574 if (out_n == 2 && in_n == 2)
27575 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
27576 if (out_n == 4 && in_n == 4)
27577 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
27581 case BUILT_IN_FMAF:
27582 if (out_mode == SFmode && in_mode == SFmode)
27584 if (out_n == 4 && in_n == 4)
27585 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
27586 if (out_n == 8 && in_n == 8)
27587 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
27595 /* Dispatch to a handler for a vectorization library. */
27596 if (ix86_veclib_handler)
27597 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
27603 /* Handler for an SVML-style interface to
27604 a library with vectorized intrinsics. */
27607 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
27610 tree fntype, new_fndecl, args;
27613 enum machine_mode el_mode, in_mode;
27616 /* The SVML is suitable for unsafe math only. */
27617 if (!flag_unsafe_math_optimizations)
27620 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27621 n = TYPE_VECTOR_SUBPARTS (type_out);
27622 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27623 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27624 if (el_mode != in_mode
27632 case BUILT_IN_LOG10:
27634 case BUILT_IN_TANH:
27636 case BUILT_IN_ATAN:
27637 case BUILT_IN_ATAN2:
27638 case BUILT_IN_ATANH:
27639 case BUILT_IN_CBRT:
27640 case BUILT_IN_SINH:
27642 case BUILT_IN_ASINH:
27643 case BUILT_IN_ASIN:
27644 case BUILT_IN_COSH:
27646 case BUILT_IN_ACOSH:
27647 case BUILT_IN_ACOS:
27648 if (el_mode != DFmode || n != 2)
27652 case BUILT_IN_EXPF:
27653 case BUILT_IN_LOGF:
27654 case BUILT_IN_LOG10F:
27655 case BUILT_IN_POWF:
27656 case BUILT_IN_TANHF:
27657 case BUILT_IN_TANF:
27658 case BUILT_IN_ATANF:
27659 case BUILT_IN_ATAN2F:
27660 case BUILT_IN_ATANHF:
27661 case BUILT_IN_CBRTF:
27662 case BUILT_IN_SINHF:
27663 case BUILT_IN_SINF:
27664 case BUILT_IN_ASINHF:
27665 case BUILT_IN_ASINF:
27666 case BUILT_IN_COSHF:
27667 case BUILT_IN_COSF:
27668 case BUILT_IN_ACOSHF:
27669 case BUILT_IN_ACOSF:
27670 if (el_mode != SFmode || n != 4)
27678 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27680 if (fn == BUILT_IN_LOGF)
27681 strcpy (name, "vmlsLn4");
27682 else if (fn == BUILT_IN_LOG)
27683 strcpy (name, "vmldLn2");
27686 sprintf (name, "vmls%s", bname+10);
27687 name[strlen (name)-1] = '4';
27690 sprintf (name, "vmld%s2", bname+10);
27692 /* Convert to uppercase. */
27696 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27697 args = TREE_CHAIN (args))
27701 fntype = build_function_type_list (type_out, type_in, NULL);
27703 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27705 /* Build a function declaration for the vectorized function. */
27706 new_fndecl = build_decl (BUILTINS_LOCATION,
27707 FUNCTION_DECL, get_identifier (name), fntype);
27708 TREE_PUBLIC (new_fndecl) = 1;
27709 DECL_EXTERNAL (new_fndecl) = 1;
27710 DECL_IS_NOVOPS (new_fndecl) = 1;
27711 TREE_READONLY (new_fndecl) = 1;
27716 /* Handler for an ACML-style interface to
27717 a library with vectorized intrinsics. */
27720 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
27722 char name[20] = "__vr.._";
27723 tree fntype, new_fndecl, args;
27726 enum machine_mode el_mode, in_mode;
27729 /* The ACML is 64bits only and suitable for unsafe math only as
27730 it does not correctly support parts of IEEE with the required
27731 precision such as denormals. */
27733 || !flag_unsafe_math_optimizations)
27736 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27737 n = TYPE_VECTOR_SUBPARTS (type_out);
27738 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27739 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27740 if (el_mode != in_mode
27750 case BUILT_IN_LOG2:
27751 case BUILT_IN_LOG10:
27754 if (el_mode != DFmode
27759 case BUILT_IN_SINF:
27760 case BUILT_IN_COSF:
27761 case BUILT_IN_EXPF:
27762 case BUILT_IN_POWF:
27763 case BUILT_IN_LOGF:
27764 case BUILT_IN_LOG2F:
27765 case BUILT_IN_LOG10F:
27768 if (el_mode != SFmode
27777 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27778 sprintf (name + 7, "%s", bname+10);
27781 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27782 args = TREE_CHAIN (args))
27786 fntype = build_function_type_list (type_out, type_in, NULL);
27788 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27790 /* Build a function declaration for the vectorized function. */
27791 new_fndecl = build_decl (BUILTINS_LOCATION,
27792 FUNCTION_DECL, get_identifier (name), fntype);
27793 TREE_PUBLIC (new_fndecl) = 1;
27794 DECL_EXTERNAL (new_fndecl) = 1;
27795 DECL_IS_NOVOPS (new_fndecl) = 1;
27796 TREE_READONLY (new_fndecl) = 1;
27802 /* Returns a decl of a function that implements conversion of an integer vector
27803 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
27804 are the types involved when converting according to CODE.
27805 Return NULL_TREE if it is not available. */
27808 ix86_vectorize_builtin_conversion (unsigned int code,
27809 tree dest_type, tree src_type)
27817 switch (TYPE_MODE (src_type))
27820 switch (TYPE_MODE (dest_type))
27823 return (TYPE_UNSIGNED (src_type)
27824 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
27825 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
27827 return (TYPE_UNSIGNED (src_type)
27829 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
27835 switch (TYPE_MODE (dest_type))
27838 return (TYPE_UNSIGNED (src_type)
27840 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
27849 case FIX_TRUNC_EXPR:
27850 switch (TYPE_MODE (dest_type))
27853 switch (TYPE_MODE (src_type))
27856 return (TYPE_UNSIGNED (dest_type)
27858 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
27860 return (TYPE_UNSIGNED (dest_type)
27862 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
27869 switch (TYPE_MODE (src_type))
27872 return (TYPE_UNSIGNED (dest_type)
27874 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
27891 /* Returns a code for a target-specific builtin that implements
27892 reciprocal of the function, or NULL_TREE if not available. */
27895 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
27896 bool sqrt ATTRIBUTE_UNUSED)
27898 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
27899 && flag_finite_math_only && !flag_trapping_math
27900 && flag_unsafe_math_optimizations))
27904 /* Machine dependent builtins. */
27907 /* Vectorized version of sqrt to rsqrt conversion. */
27908 case IX86_BUILTIN_SQRTPS_NR:
27909 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
27911 case IX86_BUILTIN_SQRTPS_NR256:
27912 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
27918 /* Normal builtins. */
27921 /* Sqrt to rsqrt conversion. */
27922 case BUILT_IN_SQRTF:
27923 return ix86_builtins[IX86_BUILTIN_RSQRTF];
27930 /* Helper for avx_vpermilps256_operand et al. This is also used by
27931 the expansion functions to turn the parallel back into a mask.
27932 The return value is 0 for no match and the imm8+1 for a match. */
27935 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
27937 unsigned i, nelt = GET_MODE_NUNITS (mode);
27939 unsigned char ipar[8];
27941 if (XVECLEN (par, 0) != (int) nelt)
27944 /* Validate that all of the elements are constants, and not totally
27945 out of range. Copy the data into an integral array to make the
27946 subsequent checks easier. */
27947 for (i = 0; i < nelt; ++i)
27949 rtx er = XVECEXP (par, 0, i);
27950 unsigned HOST_WIDE_INT ei;
27952 if (!CONST_INT_P (er))
27963 /* In the 256-bit DFmode case, we can only move elements within
27965 for (i = 0; i < 2; ++i)
27969 mask |= ipar[i] << i;
27971 for (i = 2; i < 4; ++i)
27975 mask |= (ipar[i] - 2) << i;
27980 /* In the 256-bit SFmode case, we have full freedom of movement
27981 within the low 128-bit lane, but the high 128-bit lane must
27982 mirror the exact same pattern. */
27983 for (i = 0; i < 4; ++i)
27984 if (ipar[i] + 4 != ipar[i + 4])
27991 /* In the 128-bit case, we've full freedom in the placement of
27992 the elements from the source operand. */
27993 for (i = 0; i < nelt; ++i)
27994 mask |= ipar[i] << (i * (nelt / 2));
27998 gcc_unreachable ();
28001 /* Make sure success has a non-zero value by adding one. */
28005 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
28006 the expansion functions to turn the parallel back into a mask.
28007 The return value is 0 for no match and the imm8+1 for a match. */
28010 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
28012 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
28014 unsigned char ipar[8];
28016 if (XVECLEN (par, 0) != (int) nelt)
28019 /* Validate that all of the elements are constants, and not totally
28020 out of range. Copy the data into an integral array to make the
28021 subsequent checks easier. */
28022 for (i = 0; i < nelt; ++i)
28024 rtx er = XVECEXP (par, 0, i);
28025 unsigned HOST_WIDE_INT ei;
28027 if (!CONST_INT_P (er))
28030 if (ei >= 2 * nelt)
28035 /* Validate that the halves of the permute are halves. */
28036 for (i = 0; i < nelt2 - 1; ++i)
28037 if (ipar[i] + 1 != ipar[i + 1])
28039 for (i = nelt2; i < nelt - 1; ++i)
28040 if (ipar[i] + 1 != ipar[i + 1])
28043 /* Reconstruct the mask. */
28044 for (i = 0; i < 2; ++i)
28046 unsigned e = ipar[i * nelt2];
28050 mask |= e << (i * 4);
28053 /* Make sure success has a non-zero value by adding one. */
28058 /* Store OPERAND to the memory after reload is completed. This means
28059 that we can't easily use assign_stack_local. */
28061 ix86_force_to_memory (enum machine_mode mode, rtx operand)
28065 gcc_assert (reload_completed);
28066 if (ix86_using_red_zone ())
28068 result = gen_rtx_MEM (mode,
28069 gen_rtx_PLUS (Pmode,
28071 GEN_INT (-RED_ZONE_SIZE)));
28072 emit_move_insn (result, operand);
28074 else if (TARGET_64BIT)
28080 operand = gen_lowpart (DImode, operand);
28084 gen_rtx_SET (VOIDmode,
28085 gen_rtx_MEM (DImode,
28086 gen_rtx_PRE_DEC (DImode,
28087 stack_pointer_rtx)),
28091 gcc_unreachable ();
28093 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28102 split_double_mode (mode, &operand, 1, operands, operands + 1);
28104 gen_rtx_SET (VOIDmode,
28105 gen_rtx_MEM (SImode,
28106 gen_rtx_PRE_DEC (Pmode,
28107 stack_pointer_rtx)),
28110 gen_rtx_SET (VOIDmode,
28111 gen_rtx_MEM (SImode,
28112 gen_rtx_PRE_DEC (Pmode,
28113 stack_pointer_rtx)),
28118 /* Store HImodes as SImodes. */
28119 operand = gen_lowpart (SImode, operand);
28123 gen_rtx_SET (VOIDmode,
28124 gen_rtx_MEM (GET_MODE (operand),
28125 gen_rtx_PRE_DEC (SImode,
28126 stack_pointer_rtx)),
28130 gcc_unreachable ();
28132 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28137 /* Free operand from the memory. */
28139 ix86_free_from_memory (enum machine_mode mode)
28141 if (!ix86_using_red_zone ())
28145 if (mode == DImode || TARGET_64BIT)
28149 /* Use LEA to deallocate stack space. In peephole2 it will be converted
28150 to pop or add instruction if registers are available. */
28151 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
28152 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
28157 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
28159 Put float CONST_DOUBLE in the constant pool instead of fp regs.
28160 QImode must go into class Q_REGS.
28161 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
28162 movdf to do mem-to-mem moves through integer regs. */
28165 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
28167 enum machine_mode mode = GET_MODE (x);
28169 /* We're only allowed to return a subclass of CLASS. Many of the
28170 following checks fail for NO_REGS, so eliminate that early. */
28171 if (regclass == NO_REGS)
28174 /* All classes can load zeros. */
28175 if (x == CONST0_RTX (mode))
28178 /* Force constants into memory if we are loading a (nonzero) constant into
28179 an MMX or SSE register. This is because there are no MMX/SSE instructions
28180 to load from a constant. */
28182 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
28185 /* Prefer SSE regs only, if we can use them for math. */
28186 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
28187 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
28189 /* Floating-point constants need more complex checks. */
28190 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
28192 /* General regs can load everything. */
28193 if (reg_class_subset_p (regclass, GENERAL_REGS))
28196 /* Floats can load 0 and 1 plus some others. Note that we eliminated
28197 zero above. We only want to wind up preferring 80387 registers if
28198 we plan on doing computation with them. */
28200 && standard_80387_constant_p (x) > 0)
28202 /* Limit class to non-sse. */
28203 if (regclass == FLOAT_SSE_REGS)
28205 if (regclass == FP_TOP_SSE_REGS)
28207 if (regclass == FP_SECOND_SSE_REGS)
28208 return FP_SECOND_REG;
28209 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
28216 /* Generally when we see PLUS here, it's the function invariant
28217 (plus soft-fp const_int). Which can only be computed into general
28219 if (GET_CODE (x) == PLUS)
28220 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
28222 /* QImode constants are easy to load, but non-constant QImode data
28223 must go into Q_REGS. */
28224 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
28226 if (reg_class_subset_p (regclass, Q_REGS))
28228 if (reg_class_subset_p (Q_REGS, regclass))
28236 /* Discourage putting floating-point values in SSE registers unless
28237 SSE math is being used, and likewise for the 387 registers. */
28239 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
28241 enum machine_mode mode = GET_MODE (x);
28243 /* Restrict the output reload class to the register bank that we are doing
28244 math on. If we would like not to return a subset of CLASS, reject this
28245 alternative: if reload cannot do this, it will still use its choice. */
28246 mode = GET_MODE (x);
28247 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
28248 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
28250 if (X87_FLOAT_MODE_P (mode))
28252 if (regclass == FP_TOP_SSE_REGS)
28254 else if (regclass == FP_SECOND_SSE_REGS)
28255 return FP_SECOND_REG;
28257 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
28264 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
28265 enum machine_mode mode, secondary_reload_info *sri)
28267 /* Double-word spills from general registers to non-offsettable memory
28268 references (zero-extended addresses) require special handling. */
28271 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
28272 && rclass == GENERAL_REGS
28273 && !offsettable_memref_p (x))
28276 ? CODE_FOR_reload_noff_load
28277 : CODE_FOR_reload_noff_store);
28278 /* Add the cost of moving address to a temporary. */
28279 sri->extra_cost = 1;
28284 /* QImode spills from non-QI registers require
28285 intermediate register on 32bit targets. */
28287 && !in_p && mode == QImode
28288 && (rclass == GENERAL_REGS
28289 || rclass == LEGACY_REGS
28290 || rclass == INDEX_REGS))
28299 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
28300 regno = true_regnum (x);
28302 /* Return Q_REGS if the operand is in memory. */
28307 /* This condition handles corner case where an expression involving
28308 pointers gets vectorized. We're trying to use the address of a
28309 stack slot as a vector initializer.
28311 (set (reg:V2DI 74 [ vect_cst_.2 ])
28312 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
28314 Eventually frame gets turned into sp+offset like this:
28316 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28317 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28318 (const_int 392 [0x188]))))
28320 That later gets turned into:
28322 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28323 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28324 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
28326 We'll have the following reload recorded:
28328 Reload 0: reload_in (DI) =
28329 (plus:DI (reg/f:DI 7 sp)
28330 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
28331 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28332 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
28333 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
28334 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28335 reload_reg_rtx: (reg:V2DI 22 xmm1)
28337 Which isn't going to work since SSE instructions can't handle scalar
28338 additions. Returning GENERAL_REGS forces the addition into integer
28339 register and reload can handle subsequent reloads without problems. */
28341 if (in_p && GET_CODE (x) == PLUS
28342 && SSE_CLASS_P (rclass)
28343 && SCALAR_INT_MODE_P (mode))
28344 return GENERAL_REGS;
28349 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28352 ix86_class_likely_spilled_p (reg_class_t rclass)
28363 case SSE_FIRST_REG:
28365 case FP_SECOND_REG:
28375 /* If we are copying between general and FP registers, we need a memory
28376 location. The same is true for SSE and MMX registers.
28378 To optimize register_move_cost performance, allow inline variant.
28380 The macro can't work reliably when one of the CLASSES is class containing
28381 registers from multiple units (SSE, MMX, integer). We avoid this by never
28382 combining those units in single alternative in the machine description.
28383 Ensure that this constraint holds to avoid unexpected surprises.
28385 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
28386 enforce these sanity checks. */
28389 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28390 enum machine_mode mode, int strict)
28392 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
28393 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
28394 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
28395 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
28396 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
28397 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
28399 gcc_assert (!strict);
28403 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
28406 /* ??? This is a lie. We do have moves between mmx/general, and for
28407 mmx/sse2. But by saying we need secondary memory we discourage the
28408 register allocator from using the mmx registers unless needed. */
28409 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
28412 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28414 /* SSE1 doesn't have any direct moves from other classes. */
28418 /* If the target says that inter-unit moves are more expensive
28419 than moving through memory, then don't generate them. */
28420 if (!TARGET_INTER_UNIT_MOVES)
28423 /* Between SSE and general, we have moves no larger than word size. */
28424 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
28432 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28433 enum machine_mode mode, int strict)
28435 return inline_secondary_memory_needed (class1, class2, mode, strict);
28438 /* Implement the TARGET_CLASS_MAX_NREGS hook.
28440 On the 80386, this is the size of MODE in words,
28441 except in the FP regs, where a single reg is always enough. */
28443 static unsigned char
28444 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
28446 if (MAYBE_INTEGER_CLASS_P (rclass))
28448 if (mode == XFmode)
28449 return (TARGET_64BIT ? 2 : 3);
28450 else if (mode == XCmode)
28451 return (TARGET_64BIT ? 4 : 6);
28453 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
28457 if (COMPLEX_MODE_P (mode))
28464 /* Return true if the registers in CLASS cannot represent the change from
28465 modes FROM to TO. */
28468 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
28469 enum reg_class regclass)
28474 /* x87 registers can't do subreg at all, as all values are reformatted
28475 to extended precision. */
28476 if (MAYBE_FLOAT_CLASS_P (regclass))
28479 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
28481 /* Vector registers do not support QI or HImode loads. If we don't
28482 disallow a change to these modes, reload will assume it's ok to
28483 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
28484 the vec_dupv4hi pattern. */
28485 if (GET_MODE_SIZE (from) < 4)
28488 /* Vector registers do not support subreg with nonzero offsets, which
28489 are otherwise valid for integer registers. Since we can't see
28490 whether we have a nonzero offset from here, prohibit all
28491 nonparadoxical subregs changing size. */
28492 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
28499 /* Return the cost of moving data of mode M between a
28500 register and memory. A value of 2 is the default; this cost is
28501 relative to those in `REGISTER_MOVE_COST'.
28503 This function is used extensively by register_move_cost that is used to
28504 build tables at startup. Make it inline in this case.
28505 When IN is 2, return maximum of in and out move cost.
28507 If moving between registers and memory is more expensive than
28508 between two registers, you should define this macro to express the
28511 Model also increased moving costs of QImode registers in non
28515 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
28519 if (FLOAT_CLASS_P (regclass))
28537 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
28538 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
28540 if (SSE_CLASS_P (regclass))
28543 switch (GET_MODE_SIZE (mode))
28558 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
28559 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
28561 if (MMX_CLASS_P (regclass))
28564 switch (GET_MODE_SIZE (mode))
28576 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
28577 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
28579 switch (GET_MODE_SIZE (mode))
28582 if (Q_CLASS_P (regclass) || TARGET_64BIT)
28585 return ix86_cost->int_store[0];
28586 if (TARGET_PARTIAL_REG_DEPENDENCY
28587 && optimize_function_for_speed_p (cfun))
28588 cost = ix86_cost->movzbl_load;
28590 cost = ix86_cost->int_load[0];
28592 return MAX (cost, ix86_cost->int_store[0]);
28598 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
28600 return ix86_cost->movzbl_load;
28602 return ix86_cost->int_store[0] + 4;
28607 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
28608 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
28610 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28611 if (mode == TFmode)
28614 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
28616 cost = ix86_cost->int_load[2];
28618 cost = ix86_cost->int_store[2];
28619 return (cost * (((int) GET_MODE_SIZE (mode)
28620 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
28625 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
28628 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
28632 /* Return the cost of moving data from a register in class CLASS1 to
28633 one in class CLASS2.
28635 It is not required that the cost always equal 2 when FROM is the same as TO;
28636 on some machines it is expensive to move between registers if they are not
28637 general registers. */
28640 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
28641 reg_class_t class2_i)
28643 enum reg_class class1 = (enum reg_class) class1_i;
28644 enum reg_class class2 = (enum reg_class) class2_i;
28646 /* In case we require secondary memory, compute cost of the store followed
28647 by load. In order to avoid bad register allocation choices, we need
28648 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28650 if (inline_secondary_memory_needed (class1, class2, mode, 0))
28654 cost += inline_memory_move_cost (mode, class1, 2);
28655 cost += inline_memory_move_cost (mode, class2, 2);
28657 /* In case of copying from general_purpose_register we may emit multiple
28658 stores followed by single load causing memory size mismatch stall.
28659 Count this as arbitrarily high cost of 20. */
28660 if (targetm.class_max_nregs (class1, mode)
28661 > targetm.class_max_nregs (class2, mode))
28664 /* In the case of FP/MMX moves, the registers actually overlap, and we
28665 have to switch modes in order to treat them differently. */
28666 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
28667 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
28673 /* Moves between SSE/MMX and integer unit are expensive. */
28674 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
28675 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28677 /* ??? By keeping returned value relatively high, we limit the number
28678 of moves between integer and MMX/SSE registers for all targets.
28679 Additionally, high value prevents problem with x86_modes_tieable_p(),
28680 where integer modes in MMX/SSE registers are not tieable
28681 because of missing QImode and HImode moves to, from or between
28682 MMX/SSE registers. */
28683 return MAX (8, ix86_cost->mmxsse_to_integer);
28685 if (MAYBE_FLOAT_CLASS_P (class1))
28686 return ix86_cost->fp_move;
28687 if (MAYBE_SSE_CLASS_P (class1))
28688 return ix86_cost->sse_move;
28689 if (MAYBE_MMX_CLASS_P (class1))
28690 return ix86_cost->mmx_move;
28694 /* Return TRUE if hard register REGNO can hold a value of machine-mode
28698 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
28700 /* Flags and only flags can only hold CCmode values. */
28701 if (CC_REGNO_P (regno))
28702 return GET_MODE_CLASS (mode) == MODE_CC;
28703 if (GET_MODE_CLASS (mode) == MODE_CC
28704 || GET_MODE_CLASS (mode) == MODE_RANDOM
28705 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
28707 if (FP_REGNO_P (regno))
28708 return VALID_FP_MODE_P (mode);
28709 if (SSE_REGNO_P (regno))
28711 /* We implement the move patterns for all vector modes into and
28712 out of SSE registers, even when no operation instructions
28713 are available. OImode move is available only when AVX is
28715 return ((TARGET_AVX && mode == OImode)
28716 || VALID_AVX256_REG_MODE (mode)
28717 || VALID_SSE_REG_MODE (mode)
28718 || VALID_SSE2_REG_MODE (mode)
28719 || VALID_MMX_REG_MODE (mode)
28720 || VALID_MMX_REG_MODE_3DNOW (mode));
28722 if (MMX_REGNO_P (regno))
28724 /* We implement the move patterns for 3DNOW modes even in MMX mode,
28725 so if the register is available at all, then we can move data of
28726 the given mode into or out of it. */
28727 return (VALID_MMX_REG_MODE (mode)
28728 || VALID_MMX_REG_MODE_3DNOW (mode));
28731 if (mode == QImode)
28733 /* Take care for QImode values - they can be in non-QI regs,
28734 but then they do cause partial register stalls. */
28735 if (regno <= BX_REG || TARGET_64BIT)
28737 if (!TARGET_PARTIAL_REG_STALL)
28739 return !can_create_pseudo_p ();
28741 /* We handle both integer and floats in the general purpose registers. */
28742 else if (VALID_INT_MODE_P (mode))
28744 else if (VALID_FP_MODE_P (mode))
28746 else if (VALID_DFP_MODE_P (mode))
28748 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
28749 on to use that value in smaller contexts, this can easily force a
28750 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
28751 supporting DImode, allow it. */
28752 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
28758 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
28759 tieable integer mode. */
28762 ix86_tieable_integer_mode_p (enum machine_mode mode)
28771 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
28774 return TARGET_64BIT;
28781 /* Return true if MODE1 is accessible in a register that can hold MODE2
28782 without copying. That is, all register classes that can hold MODE2
28783 can also hold MODE1. */
28786 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
28788 if (mode1 == mode2)
28791 if (ix86_tieable_integer_mode_p (mode1)
28792 && ix86_tieable_integer_mode_p (mode2))
28795 /* MODE2 being XFmode implies fp stack or general regs, which means we
28796 can tie any smaller floating point modes to it. Note that we do not
28797 tie this with TFmode. */
28798 if (mode2 == XFmode)
28799 return mode1 == SFmode || mode1 == DFmode;
28801 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
28802 that we can tie it with SFmode. */
28803 if (mode2 == DFmode)
28804 return mode1 == SFmode;
28806 /* If MODE2 is only appropriate for an SSE register, then tie with
28807 any other mode acceptable to SSE registers. */
28808 if (GET_MODE_SIZE (mode2) == 16
28809 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
28810 return (GET_MODE_SIZE (mode1) == 16
28811 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
28813 /* If MODE2 is appropriate for an MMX register, then tie
28814 with any other mode acceptable to MMX registers. */
28815 if (GET_MODE_SIZE (mode2) == 8
28816 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
28817 return (GET_MODE_SIZE (mode1) == 8
28818 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
28823 /* Compute a (partial) cost for rtx X. Return true if the complete
28824 cost has been computed, and false if subexpressions should be
28825 scanned. In either case, *TOTAL contains the cost result. */
28828 ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
28831 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
28832 enum machine_mode mode = GET_MODE (x);
28833 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
28841 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
28843 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
28845 else if (flag_pic && SYMBOLIC_CONST (x)
28847 || (!GET_CODE (x) != LABEL_REF
28848 && (GET_CODE (x) != SYMBOL_REF
28849 || !SYMBOL_REF_LOCAL_P (x)))))
28856 if (mode == VOIDmode)
28859 switch (standard_80387_constant_p (x))
28864 default: /* Other constants */
28869 /* Start with (MEM (SYMBOL_REF)), since that's where
28870 it'll probably end up. Add a penalty for size. */
28871 *total = (COSTS_N_INSNS (1)
28872 + (flag_pic != 0 && !TARGET_64BIT)
28873 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
28879 /* The zero extensions is often completely free on x86_64, so make
28880 it as cheap as possible. */
28881 if (TARGET_64BIT && mode == DImode
28882 && GET_MODE (XEXP (x, 0)) == SImode)
28884 else if (TARGET_ZERO_EXTEND_WITH_AND)
28885 *total = cost->add;
28887 *total = cost->movzx;
28891 *total = cost->movsx;
28895 if (CONST_INT_P (XEXP (x, 1))
28896 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
28898 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28901 *total = cost->add;
28904 if ((value == 2 || value == 3)
28905 && cost->lea <= cost->shift_const)
28907 *total = cost->lea;
28917 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
28919 if (CONST_INT_P (XEXP (x, 1)))
28921 if (INTVAL (XEXP (x, 1)) > 32)
28922 *total = cost->shift_const + COSTS_N_INSNS (2);
28924 *total = cost->shift_const * 2;
28928 if (GET_CODE (XEXP (x, 1)) == AND)
28929 *total = cost->shift_var * 2;
28931 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
28936 if (CONST_INT_P (XEXP (x, 1)))
28937 *total = cost->shift_const;
28939 *total = cost->shift_var;
28947 gcc_assert (FLOAT_MODE_P (mode));
28948 gcc_assert (TARGET_FMA || TARGET_FMA4);
28950 /* ??? SSE scalar/vector cost should be used here. */
28951 /* ??? Bald assumption that fma has the same cost as fmul. */
28952 *total = cost->fmul;
28953 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
28955 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
28957 if (GET_CODE (sub) == NEG)
28958 sub = XEXP (sub, 0);
28959 *total += rtx_cost (sub, FMA, 0, speed);
28962 if (GET_CODE (sub) == NEG)
28963 sub = XEXP (sub, 0);
28964 *total += rtx_cost (sub, FMA, 2, speed);
28969 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28971 /* ??? SSE scalar cost should be used here. */
28972 *total = cost->fmul;
28975 else if (X87_FLOAT_MODE_P (mode))
28977 *total = cost->fmul;
28980 else if (FLOAT_MODE_P (mode))
28982 /* ??? SSE vector cost should be used here. */
28983 *total = cost->fmul;
28988 rtx op0 = XEXP (x, 0);
28989 rtx op1 = XEXP (x, 1);
28991 if (CONST_INT_P (XEXP (x, 1)))
28993 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28994 for (nbits = 0; value != 0; value &= value - 1)
28998 /* This is arbitrary. */
29001 /* Compute costs correctly for widening multiplication. */
29002 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
29003 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
29004 == GET_MODE_SIZE (mode))
29006 int is_mulwiden = 0;
29007 enum machine_mode inner_mode = GET_MODE (op0);
29009 if (GET_CODE (op0) == GET_CODE (op1))
29010 is_mulwiden = 1, op1 = XEXP (op1, 0);
29011 else if (CONST_INT_P (op1))
29013 if (GET_CODE (op0) == SIGN_EXTEND)
29014 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
29017 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
29021 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
29024 *total = (cost->mult_init[MODE_INDEX (mode)]
29025 + nbits * cost->mult_bit
29026 + rtx_cost (op0, outer_code, opno, speed)
29027 + rtx_cost (op1, outer_code, opno, speed));
29036 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29037 /* ??? SSE cost should be used here. */
29038 *total = cost->fdiv;
29039 else if (X87_FLOAT_MODE_P (mode))
29040 *total = cost->fdiv;
29041 else if (FLOAT_MODE_P (mode))
29042 /* ??? SSE vector cost should be used here. */
29043 *total = cost->fdiv;
29045 *total = cost->divide[MODE_INDEX (mode)];
29049 if (GET_MODE_CLASS (mode) == MODE_INT
29050 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
29052 if (GET_CODE (XEXP (x, 0)) == PLUS
29053 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
29054 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
29055 && CONSTANT_P (XEXP (x, 1)))
29057 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
29058 if (val == 2 || val == 4 || val == 8)
29060 *total = cost->lea;
29061 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
29062 outer_code, opno, speed);
29063 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
29064 outer_code, opno, speed);
29065 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
29069 else if (GET_CODE (XEXP (x, 0)) == MULT
29070 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
29072 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
29073 if (val == 2 || val == 4 || val == 8)
29075 *total = cost->lea;
29076 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
29077 outer_code, opno, speed);
29078 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
29082 else if (GET_CODE (XEXP (x, 0)) == PLUS)
29084 *total = cost->lea;
29085 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
29086 outer_code, opno, speed);
29087 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
29088 outer_code, opno, speed);
29089 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
29096 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29098 /* ??? SSE cost should be used here. */
29099 *total = cost->fadd;
29102 else if (X87_FLOAT_MODE_P (mode))
29104 *total = cost->fadd;
29107 else if (FLOAT_MODE_P (mode))
29109 /* ??? SSE vector cost should be used here. */
29110 *total = cost->fadd;
29118 if (!TARGET_64BIT && mode == DImode)
29120 *total = (cost->add * 2
29121 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
29122 << (GET_MODE (XEXP (x, 0)) != DImode))
29123 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
29124 << (GET_MODE (XEXP (x, 1)) != DImode)));
29130 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29132 /* ??? SSE cost should be used here. */
29133 *total = cost->fchs;
29136 else if (X87_FLOAT_MODE_P (mode))
29138 *total = cost->fchs;
29141 else if (FLOAT_MODE_P (mode))
29143 /* ??? SSE vector cost should be used here. */
29144 *total = cost->fchs;
29150 if (!TARGET_64BIT && mode == DImode)
29151 *total = cost->add * 2;
29153 *total = cost->add;
29157 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
29158 && XEXP (XEXP (x, 0), 1) == const1_rtx
29159 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
29160 && XEXP (x, 1) == const0_rtx)
29162 /* This kind of construct is implemented using test[bwl].
29163 Treat it as if we had an AND. */
29164 *total = (cost->add
29165 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
29166 + rtx_cost (const1_rtx, outer_code, opno, speed));
29172 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
29177 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29178 /* ??? SSE cost should be used here. */
29179 *total = cost->fabs;
29180 else if (X87_FLOAT_MODE_P (mode))
29181 *total = cost->fabs;
29182 else if (FLOAT_MODE_P (mode))
29183 /* ??? SSE vector cost should be used here. */
29184 *total = cost->fabs;
29188 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29189 /* ??? SSE cost should be used here. */
29190 *total = cost->fsqrt;
29191 else if (X87_FLOAT_MODE_P (mode))
29192 *total = cost->fsqrt;
29193 else if (FLOAT_MODE_P (mode))
29194 /* ??? SSE vector cost should be used here. */
29195 *total = cost->fsqrt;
29199 if (XINT (x, 1) == UNSPEC_TP)
29206 case VEC_DUPLICATE:
29207 /* ??? Assume all of these vector manipulation patterns are
29208 recognizable. In which case they all pretty much have the
29210 *total = COSTS_N_INSNS (1);
29220 static int current_machopic_label_num;
29222 /* Given a symbol name and its associated stub, write out the
29223 definition of the stub. */
29226 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29228 unsigned int length;
29229 char *binder_name, *symbol_name, lazy_ptr_name[32];
29230 int label = ++current_machopic_label_num;
29232 /* For 64-bit we shouldn't get here. */
29233 gcc_assert (!TARGET_64BIT);
29235 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29236 symb = targetm.strip_name_encoding (symb);
29238 length = strlen (stub);
29239 binder_name = XALLOCAVEC (char, length + 32);
29240 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
29242 length = strlen (symb);
29243 symbol_name = XALLOCAVEC (char, length + 32);
29244 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29246 sprintf (lazy_ptr_name, "L%d$lz", label);
29248 if (MACHOPIC_ATT_STUB)
29249 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
29250 else if (MACHOPIC_PURE)
29251 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
29253 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
29255 fprintf (file, "%s:\n", stub);
29256 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29258 if (MACHOPIC_ATT_STUB)
29260 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
29262 else if (MACHOPIC_PURE)
29265 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29266 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
29267 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
29268 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
29269 label, lazy_ptr_name, label);
29270 fprintf (file, "\tjmp\t*%%ecx\n");
29273 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
29275 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
29276 it needs no stub-binding-helper. */
29277 if (MACHOPIC_ATT_STUB)
29280 fprintf (file, "%s:\n", binder_name);
29284 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
29285 fprintf (file, "\tpushl\t%%ecx\n");
29288 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
29290 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
29292 /* N.B. Keep the correspondence of these
29293 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
29294 old-pic/new-pic/non-pic stubs; altering this will break
29295 compatibility with existing dylibs. */
29298 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29299 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
29302 /* 16-byte -mdynamic-no-pic stub. */
29303 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
29305 fprintf (file, "%s:\n", lazy_ptr_name);
29306 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29307 fprintf (file, ASM_LONG "%s\n", binder_name);
29309 #endif /* TARGET_MACHO */
29311 /* Order the registers for register allocator. */
29314 x86_order_regs_for_local_alloc (void)
29319 /* First allocate the local general purpose registers. */
29320 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29321 if (GENERAL_REGNO_P (i) && call_used_regs[i])
29322 reg_alloc_order [pos++] = i;
29324 /* Global general purpose registers. */
29325 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29326 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
29327 reg_alloc_order [pos++] = i;
29329 /* x87 registers come first in case we are doing FP math
29331 if (!TARGET_SSE_MATH)
29332 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29333 reg_alloc_order [pos++] = i;
29335 /* SSE registers. */
29336 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
29337 reg_alloc_order [pos++] = i;
29338 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
29339 reg_alloc_order [pos++] = i;
29341 /* x87 registers. */
29342 if (TARGET_SSE_MATH)
29343 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29344 reg_alloc_order [pos++] = i;
29346 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
29347 reg_alloc_order [pos++] = i;
29349 /* Initialize the rest of array as we do not allocate some registers
29351 while (pos < FIRST_PSEUDO_REGISTER)
29352 reg_alloc_order [pos++] = 0;
29355 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
29356 in struct attribute_spec handler. */
29358 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
29360 int flags ATTRIBUTE_UNUSED,
29361 bool *no_add_attrs)
29363 if (TREE_CODE (*node) != FUNCTION_TYPE
29364 && TREE_CODE (*node) != METHOD_TYPE
29365 && TREE_CODE (*node) != FIELD_DECL
29366 && TREE_CODE (*node) != TYPE_DECL)
29368 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29370 *no_add_attrs = true;
29375 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
29377 *no_add_attrs = true;
29380 if (is_attribute_p ("callee_pop_aggregate_return", name))
29384 cst = TREE_VALUE (args);
29385 if (TREE_CODE (cst) != INTEGER_CST)
29387 warning (OPT_Wattributes,
29388 "%qE attribute requires an integer constant argument",
29390 *no_add_attrs = true;
29392 else if (compare_tree_int (cst, 0) != 0
29393 && compare_tree_int (cst, 1) != 0)
29395 warning (OPT_Wattributes,
29396 "argument to %qE attribute is neither zero, nor one",
29398 *no_add_attrs = true;
29407 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
29408 struct attribute_spec.handler. */
29410 ix86_handle_abi_attribute (tree *node, tree name,
29411 tree args ATTRIBUTE_UNUSED,
29412 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29414 if (TREE_CODE (*node) != FUNCTION_TYPE
29415 && TREE_CODE (*node) != METHOD_TYPE
29416 && TREE_CODE (*node) != FIELD_DECL
29417 && TREE_CODE (*node) != TYPE_DECL)
29419 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29421 *no_add_attrs = true;
29425 /* Can combine regparm with all attributes but fastcall. */
29426 if (is_attribute_p ("ms_abi", name))
29428 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
29430 error ("ms_abi and sysv_abi attributes are not compatible");
29435 else if (is_attribute_p ("sysv_abi", name))
29437 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
29439 error ("ms_abi and sysv_abi attributes are not compatible");
29448 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29449 struct attribute_spec.handler. */
29451 ix86_handle_struct_attribute (tree *node, tree name,
29452 tree args ATTRIBUTE_UNUSED,
29453 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29456 if (DECL_P (*node))
29458 if (TREE_CODE (*node) == TYPE_DECL)
29459 type = &TREE_TYPE (*node);
29464 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29465 || TREE_CODE (*type) == UNION_TYPE)))
29467 warning (OPT_Wattributes, "%qE attribute ignored",
29469 *no_add_attrs = true;
29472 else if ((is_attribute_p ("ms_struct", name)
29473 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29474 || ((is_attribute_p ("gcc_struct", name)
29475 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29477 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29479 *no_add_attrs = true;
29486 ix86_handle_fndecl_attribute (tree *node, tree name,
29487 tree args ATTRIBUTE_UNUSED,
29488 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29490 if (TREE_CODE (*node) != FUNCTION_DECL)
29492 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29494 *no_add_attrs = true;
29500 ix86_ms_bitfield_layout_p (const_tree record_type)
29502 return ((TARGET_MS_BITFIELD_LAYOUT
29503 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29504 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
29507 /* Returns an expression indicating where the this parameter is
29508 located on entry to the FUNCTION. */
29511 x86_this_parameter (tree function)
29513 tree type = TREE_TYPE (function);
29514 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
29519 const int *parm_regs;
29521 if (ix86_function_type_abi (type) == MS_ABI)
29522 parm_regs = x86_64_ms_abi_int_parameter_registers;
29524 parm_regs = x86_64_int_parameter_registers;
29525 return gen_rtx_REG (DImode, parm_regs[aggr]);
29528 nregs = ix86_function_regparm (type, function);
29530 if (nregs > 0 && !stdarg_p (type))
29533 unsigned int ccvt = ix86_get_callcvt (type);
29535 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29536 regno = aggr ? DX_REG : CX_REG;
29537 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29541 return gen_rtx_MEM (SImode,
29542 plus_constant (stack_pointer_rtx, 4));
29551 return gen_rtx_MEM (SImode,
29552 plus_constant (stack_pointer_rtx, 4));
29555 return gen_rtx_REG (SImode, regno);
29558 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
29561 /* Determine whether x86_output_mi_thunk can succeed. */
29564 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
29565 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
29566 HOST_WIDE_INT vcall_offset, const_tree function)
29568 /* 64-bit can handle anything. */
29572 /* For 32-bit, everything's fine if we have one free register. */
29573 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
29576 /* Need a free register for vcall_offset. */
29580 /* Need a free register for GOT references. */
29581 if (flag_pic && !targetm.binds_local_p (function))
29584 /* Otherwise ok. */
29588 /* Output the assembler code for a thunk function. THUNK_DECL is the
29589 declaration for the thunk function itself, FUNCTION is the decl for
29590 the target function. DELTA is an immediate constant offset to be
29591 added to THIS. If VCALL_OFFSET is nonzero, the word at
29592 *(*this + vcall_offset) should be added to THIS. */
29595 x86_output_mi_thunk (FILE *file,
29596 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
29597 HOST_WIDE_INT vcall_offset, tree function)
29599 rtx this_param = x86_this_parameter (function);
29600 rtx this_reg, tmp, fnaddr;
29602 emit_note (NOTE_INSN_PROLOGUE_END);
29604 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29605 pull it in now and let DELTA benefit. */
29606 if (REG_P (this_param))
29607 this_reg = this_param;
29608 else if (vcall_offset)
29610 /* Put the this parameter into %eax. */
29611 this_reg = gen_rtx_REG (Pmode, AX_REG);
29612 emit_move_insn (this_reg, this_param);
29615 this_reg = NULL_RTX;
29617 /* Adjust the this parameter by a fixed constant. */
29620 rtx delta_rtx = GEN_INT (delta);
29621 rtx delta_dst = this_reg ? this_reg : this_param;
29625 if (!x86_64_general_operand (delta_rtx, Pmode))
29627 tmp = gen_rtx_REG (Pmode, R10_REG);
29628 emit_move_insn (tmp, delta_rtx);
29633 emit_insn (ix86_gen_add3 (delta_dst, delta_dst, delta_rtx));
29636 /* Adjust the this parameter by a value stored in the vtable. */
29639 rtx vcall_addr, vcall_mem, this_mem;
29640 unsigned int tmp_regno;
29643 tmp_regno = R10_REG;
29646 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
29647 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
29648 tmp_regno = AX_REG;
29650 tmp_regno = CX_REG;
29652 tmp = gen_rtx_REG (Pmode, tmp_regno);
29654 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
29655 if (Pmode != ptr_mode)
29656 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
29657 emit_move_insn (tmp, this_mem);
29659 /* Adjust the this parameter. */
29660 vcall_addr = plus_constant (tmp, vcall_offset);
29662 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
29664 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
29665 emit_move_insn (tmp2, GEN_INT (vcall_offset));
29666 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
29669 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
29670 if (Pmode != ptr_mode)
29671 emit_insn (gen_addsi_1_zext (this_reg,
29672 gen_rtx_REG (ptr_mode,
29676 emit_insn (ix86_gen_add3 (this_reg, this_reg, vcall_mem));
29679 /* If necessary, drop THIS back to its stack slot. */
29680 if (this_reg && this_reg != this_param)
29681 emit_move_insn (this_param, this_reg);
29683 fnaddr = XEXP (DECL_RTL (function), 0);
29686 if (!flag_pic || targetm.binds_local_p (function)
29687 || cfun->machine->call_abi == MS_ABI)
29691 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
29692 tmp = gen_rtx_CONST (Pmode, tmp);
29693 fnaddr = gen_rtx_MEM (Pmode, tmp);
29698 if (!flag_pic || targetm.binds_local_p (function))
29701 else if (TARGET_MACHO)
29703 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
29704 fnaddr = XEXP (fnaddr, 0);
29706 #endif /* TARGET_MACHO */
29709 tmp = gen_rtx_REG (Pmode, CX_REG);
29710 output_set_got (tmp, NULL_RTX);
29712 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
29713 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
29714 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
29718 /* Our sibling call patterns do not allow memories, because we have no
29719 predicate that can distinguish between frame and non-frame memory.
29720 For our purposes here, we can get away with (ab)using a jump pattern,
29721 because we're going to do no optimization. */
29722 if (MEM_P (fnaddr))
29723 emit_jump_insn (gen_indirect_jump (fnaddr));
29726 tmp = gen_rtx_MEM (QImode, fnaddr);
29727 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
29728 tmp = emit_call_insn (tmp);
29729 SIBLING_CALL_P (tmp) = 1;
29733 /* Emit just enough of rest_of_compilation to get the insns emitted.
29734 Note that use_thunk calls assemble_start_function et al. */
29735 tmp = get_insns ();
29736 insn_locators_alloc ();
29737 shorten_branches (tmp);
29738 final_start_function (tmp, file, 1);
29739 final (tmp, file, 1);
29740 final_end_function ();
29744 x86_file_start (void)
29746 default_file_start ();
29748 darwin_file_start ();
29750 if (X86_FILE_START_VERSION_DIRECTIVE)
29751 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
29752 if (X86_FILE_START_FLTUSED)
29753 fputs ("\t.global\t__fltused\n", asm_out_file);
29754 if (ix86_asm_dialect == ASM_INTEL)
29755 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
29759 x86_field_alignment (tree field, int computed)
29761 enum machine_mode mode;
29762 tree type = TREE_TYPE (field);
29764 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
29766 mode = TYPE_MODE (strip_array_types (type));
29767 if (mode == DFmode || mode == DCmode
29768 || GET_MODE_CLASS (mode) == MODE_INT
29769 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
29770 return MIN (32, computed);
29774 /* Output assembler code to FILE to increment profiler label # LABELNO
29775 for profiling a function entry. */
29777 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
29779 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
29784 #ifndef NO_PROFILE_COUNTERS
29785 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
29788 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
29789 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
29791 fprintf (file, "\tcall\t%s\n", mcount_name);
29795 #ifndef NO_PROFILE_COUNTERS
29796 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
29799 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
29803 #ifndef NO_PROFILE_COUNTERS
29804 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
29807 fprintf (file, "\tcall\t%s\n", mcount_name);
29811 /* We don't have exact information about the insn sizes, but we may assume
29812 quite safely that we are informed about all 1 byte insns and memory
29813 address sizes. This is enough to eliminate unnecessary padding in
29817 min_insn_size (rtx insn)
29821 if (!INSN_P (insn) || !active_insn_p (insn))
29824 /* Discard alignments we've emit and jump instructions. */
29825 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
29826 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
29828 if (JUMP_TABLE_DATA_P (insn))
29831 /* Important case - calls are always 5 bytes.
29832 It is common to have many calls in the row. */
29834 && symbolic_reference_mentioned_p (PATTERN (insn))
29835 && !SIBLING_CALL_P (insn))
29837 len = get_attr_length (insn);
29841 /* For normal instructions we rely on get_attr_length being exact,
29842 with a few exceptions. */
29843 if (!JUMP_P (insn))
29845 enum attr_type type = get_attr_type (insn);
29850 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
29851 || asm_noperands (PATTERN (insn)) >= 0)
29858 /* Otherwise trust get_attr_length. */
29862 l = get_attr_length_address (insn);
29863 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
29872 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29874 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
29878 ix86_avoid_jump_mispredicts (void)
29880 rtx insn, start = get_insns ();
29881 int nbytes = 0, njumps = 0;
29884 /* Look for all minimal intervals of instructions containing 4 jumps.
29885 The intervals are bounded by START and INSN. NBYTES is the total
29886 size of instructions in the interval including INSN and not including
29887 START. When the NBYTES is smaller than 16 bytes, it is possible
29888 that the end of START and INSN ends up in the same 16byte page.
29890 The smallest offset in the page INSN can start is the case where START
29891 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
29892 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
29894 for (insn = start; insn; insn = NEXT_INSN (insn))
29898 if (LABEL_P (insn))
29900 int align = label_to_alignment (insn);
29901 int max_skip = label_to_max_skip (insn);
29905 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
29906 already in the current 16 byte page, because otherwise
29907 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
29908 bytes to reach 16 byte boundary. */
29910 || (align <= 3 && max_skip != (1 << align) - 1))
29913 fprintf (dump_file, "Label %i with max_skip %i\n",
29914 INSN_UID (insn), max_skip);
29917 while (nbytes + max_skip >= 16)
29919 start = NEXT_INSN (start);
29920 if ((JUMP_P (start)
29921 && GET_CODE (PATTERN (start)) != ADDR_VEC
29922 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29924 njumps--, isjump = 1;
29927 nbytes -= min_insn_size (start);
29933 min_size = min_insn_size (insn);
29934 nbytes += min_size;
29936 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
29937 INSN_UID (insn), min_size);
29939 && GET_CODE (PATTERN (insn)) != ADDR_VEC
29940 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
29948 start = NEXT_INSN (start);
29949 if ((JUMP_P (start)
29950 && GET_CODE (PATTERN (start)) != ADDR_VEC
29951 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29953 njumps--, isjump = 1;
29956 nbytes -= min_insn_size (start);
29958 gcc_assert (njumps >= 0);
29960 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
29961 INSN_UID (start), INSN_UID (insn), nbytes);
29963 if (njumps == 3 && isjump && nbytes < 16)
29965 int padsize = 15 - nbytes + min_insn_size (insn);
29968 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
29969 INSN_UID (insn), padsize);
29970 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
29976 /* AMD Athlon works faster
29977 when RET is not destination of conditional jump or directly preceded
29978 by other jump instruction. We avoid the penalty by inserting NOP just
29979 before the RET instructions in such cases. */
29981 ix86_pad_returns (void)
29986 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29988 basic_block bb = e->src;
29989 rtx ret = BB_END (bb);
29991 bool replace = false;
29993 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
29994 || optimize_bb_for_size_p (bb))
29996 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
29997 if (active_insn_p (prev) || LABEL_P (prev))
29999 if (prev && LABEL_P (prev))
30004 FOR_EACH_EDGE (e, ei, bb->preds)
30005 if (EDGE_FREQUENCY (e) && e->src->index >= 0
30006 && !(e->flags & EDGE_FALLTHRU))
30011 prev = prev_active_insn (ret);
30013 && ((JUMP_P (prev) && any_condjump_p (prev))
30016 /* Empty functions get branch mispredict even when
30017 the jump destination is not visible to us. */
30018 if (!prev && !optimize_function_for_size_p (cfun))
30023 emit_jump_insn_before (gen_return_internal_long (), ret);
30029 /* Count the minimum number of instructions in BB. Return 4 if the
30030 number of instructions >= 4. */
30033 ix86_count_insn_bb (basic_block bb)
30036 int insn_count = 0;
30038 /* Count number of instructions in this block. Return 4 if the number
30039 of instructions >= 4. */
30040 FOR_BB_INSNS (bb, insn)
30042 /* Only happen in exit blocks. */
30044 && GET_CODE (PATTERN (insn)) == RETURN)
30047 if (NONDEBUG_INSN_P (insn)
30048 && GET_CODE (PATTERN (insn)) != USE
30049 && GET_CODE (PATTERN (insn)) != CLOBBER)
30052 if (insn_count >= 4)
30061 /* Count the minimum number of instructions in code path in BB.
30062 Return 4 if the number of instructions >= 4. */
30065 ix86_count_insn (basic_block bb)
30069 int min_prev_count;
30071 /* Only bother counting instructions along paths with no
30072 more than 2 basic blocks between entry and exit. Given
30073 that BB has an edge to exit, determine if a predecessor
30074 of BB has an edge from entry. If so, compute the number
30075 of instructions in the predecessor block. If there
30076 happen to be multiple such blocks, compute the minimum. */
30077 min_prev_count = 4;
30078 FOR_EACH_EDGE (e, ei, bb->preds)
30081 edge_iterator prev_ei;
30083 if (e->src == ENTRY_BLOCK_PTR)
30085 min_prev_count = 0;
30088 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
30090 if (prev_e->src == ENTRY_BLOCK_PTR)
30092 int count = ix86_count_insn_bb (e->src);
30093 if (count < min_prev_count)
30094 min_prev_count = count;
30100 if (min_prev_count < 4)
30101 min_prev_count += ix86_count_insn_bb (bb);
30103 return min_prev_count;
30106 /* Pad short funtion to 4 instructions. */
30109 ix86_pad_short_function (void)
30114 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30116 rtx ret = BB_END (e->src);
30117 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
30119 int insn_count = ix86_count_insn (e->src);
30121 /* Pad short function. */
30122 if (insn_count < 4)
30126 /* Find epilogue. */
30129 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
30130 insn = PREV_INSN (insn);
30135 /* Two NOPs count as one instruction. */
30136 insn_count = 2 * (4 - insn_count);
30137 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
30143 /* Implement machine specific optimizations. We implement padding of returns
30144 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
30148 /* We are freeing block_for_insn in the toplev to keep compatibility
30149 with old MDEP_REORGS that are not CFG based. Recompute it now. */
30150 compute_bb_for_insn ();
30152 /* Run the vzeroupper optimization if needed. */
30153 if (TARGET_VZEROUPPER)
30154 move_or_delete_vzeroupper ();
30156 if (optimize && optimize_function_for_speed_p (cfun))
30158 if (TARGET_PAD_SHORT_FUNCTION)
30159 ix86_pad_short_function ();
30160 else if (TARGET_PAD_RETURNS)
30161 ix86_pad_returns ();
30162 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30163 if (TARGET_FOUR_JUMP_LIMIT)
30164 ix86_avoid_jump_mispredicts ();
30169 /* Return nonzero when QImode register that must be represented via REX prefix
30172 x86_extended_QIreg_mentioned_p (rtx insn)
30175 extract_insn_cached (insn);
30176 for (i = 0; i < recog_data.n_operands; i++)
30177 if (REG_P (recog_data.operand[i])
30178 && REGNO (recog_data.operand[i]) > BX_REG)
30183 /* Return nonzero when P points to register encoded via REX prefix.
30184 Called via for_each_rtx. */
30186 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
30188 unsigned int regno;
30191 regno = REGNO (*p);
30192 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
30195 /* Return true when INSN mentions register that must be encoded using REX
30198 x86_extended_reg_mentioned_p (rtx insn)
30200 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
30201 extended_reg_mentioned_1, NULL);
30204 /* If profitable, negate (without causing overflow) integer constant
30205 of mode MODE at location LOC. Return true in this case. */
30207 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
30211 if (!CONST_INT_P (*loc))
30217 /* DImode x86_64 constants must fit in 32 bits. */
30218 gcc_assert (x86_64_immediate_operand (*loc, mode));
30229 gcc_unreachable ();
30232 /* Avoid overflows. */
30233 if (mode_signbit_p (mode, *loc))
30236 val = INTVAL (*loc);
30238 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
30239 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
30240 if ((val < 0 && val != -128)
30243 *loc = GEN_INT (-val);
30250 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
30251 optabs would emit if we didn't have TFmode patterns. */
30254 x86_emit_floatuns (rtx operands[2])
30256 rtx neglab, donelab, i0, i1, f0, in, out;
30257 enum machine_mode mode, inmode;
30259 inmode = GET_MODE (operands[1]);
30260 gcc_assert (inmode == SImode || inmode == DImode);
30263 in = force_reg (inmode, operands[1]);
30264 mode = GET_MODE (out);
30265 neglab = gen_label_rtx ();
30266 donelab = gen_label_rtx ();
30267 f0 = gen_reg_rtx (mode);
30269 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
30271 expand_float (out, in, 0);
30273 emit_jump_insn (gen_jump (donelab));
30276 emit_label (neglab);
30278 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
30280 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
30282 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
30284 expand_float (f0, i0, 0);
30286 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
30288 emit_label (donelab);
30291 /* AVX does not support 32-byte integer vector operations,
30292 thus the longest vector we are faced with is V16QImode. */
30293 #define MAX_VECT_LEN 16
30295 struct expand_vec_perm_d
30297 rtx target, op0, op1;
30298 unsigned char perm[MAX_VECT_LEN];
30299 enum machine_mode vmode;
30300 unsigned char nelt;
30304 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
30305 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
30307 /* Get a vector mode of the same size as the original but with elements
30308 twice as wide. This is only guaranteed to apply to integral vectors. */
30310 static inline enum machine_mode
30311 get_mode_wider_vector (enum machine_mode o)
30313 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
30314 enum machine_mode n = GET_MODE_WIDER_MODE (o);
30315 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
30316 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
30320 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30321 with all elements equal to VAR. Return true if successful. */
30324 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
30325 rtx target, rtx val)
30348 /* First attempt to recognize VAL as-is. */
30349 dup = gen_rtx_VEC_DUPLICATE (mode, val);
30350 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
30351 if (recog_memoized (insn) < 0)
30354 /* If that fails, force VAL into a register. */
30357 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
30358 seq = get_insns ();
30361 emit_insn_before (seq, insn);
30363 ok = recog_memoized (insn) >= 0;
30372 if (TARGET_SSE || TARGET_3DNOW_A)
30376 val = gen_lowpart (SImode, val);
30377 x = gen_rtx_TRUNCATE (HImode, val);
30378 x = gen_rtx_VEC_DUPLICATE (mode, x);
30379 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30392 struct expand_vec_perm_d dperm;
30396 memset (&dperm, 0, sizeof (dperm));
30397 dperm.target = target;
30398 dperm.vmode = mode;
30399 dperm.nelt = GET_MODE_NUNITS (mode);
30400 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
30402 /* Extend to SImode using a paradoxical SUBREG. */
30403 tmp1 = gen_reg_rtx (SImode);
30404 emit_move_insn (tmp1, gen_lowpart (SImode, val));
30406 /* Insert the SImode value as low element of a V4SImode vector. */
30407 tmp2 = gen_lowpart (V4SImode, dperm.op0);
30408 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
30410 ok = (expand_vec_perm_1 (&dperm)
30411 || expand_vec_perm_broadcast_1 (&dperm));
30423 /* Replicate the value once into the next wider mode and recurse. */
30425 enum machine_mode smode, wsmode, wvmode;
30428 smode = GET_MODE_INNER (mode);
30429 wvmode = get_mode_wider_vector (mode);
30430 wsmode = GET_MODE_INNER (wvmode);
30432 val = convert_modes (wsmode, smode, val, true);
30433 x = expand_simple_binop (wsmode, ASHIFT, val,
30434 GEN_INT (GET_MODE_BITSIZE (smode)),
30435 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30436 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
30438 x = gen_lowpart (wvmode, target);
30439 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
30447 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
30448 rtx x = gen_reg_rtx (hvmode);
30450 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
30453 x = gen_rtx_VEC_CONCAT (mode, x, x);
30454 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30463 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30464 whose ONE_VAR element is VAR, and other elements are zero. Return true
30468 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
30469 rtx target, rtx var, int one_var)
30471 enum machine_mode vsimode;
30474 bool use_vector_set = false;
30479 /* For SSE4.1, we normally use vector set. But if the second
30480 element is zero and inter-unit moves are OK, we use movq
30482 use_vector_set = (TARGET_64BIT
30484 && !(TARGET_INTER_UNIT_MOVES
30490 use_vector_set = TARGET_SSE4_1;
30493 use_vector_set = TARGET_SSE2;
30496 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
30503 use_vector_set = TARGET_AVX;
30506 /* Use ix86_expand_vector_set in 64bit mode only. */
30507 use_vector_set = TARGET_AVX && TARGET_64BIT;
30513 if (use_vector_set)
30515 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
30516 var = force_reg (GET_MODE_INNER (mode), var);
30517 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30533 var = force_reg (GET_MODE_INNER (mode), var);
30534 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
30535 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30540 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
30541 new_target = gen_reg_rtx (mode);
30543 new_target = target;
30544 var = force_reg (GET_MODE_INNER (mode), var);
30545 x = gen_rtx_VEC_DUPLICATE (mode, var);
30546 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
30547 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
30550 /* We need to shuffle the value to the correct position, so
30551 create a new pseudo to store the intermediate result. */
30553 /* With SSE2, we can use the integer shuffle insns. */
30554 if (mode != V4SFmode && TARGET_SSE2)
30556 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
30558 GEN_INT (one_var == 1 ? 0 : 1),
30559 GEN_INT (one_var == 2 ? 0 : 1),
30560 GEN_INT (one_var == 3 ? 0 : 1)));
30561 if (target != new_target)
30562 emit_move_insn (target, new_target);
30566 /* Otherwise convert the intermediate result to V4SFmode and
30567 use the SSE1 shuffle instructions. */
30568 if (mode != V4SFmode)
30570 tmp = gen_reg_rtx (V4SFmode);
30571 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
30576 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
30578 GEN_INT (one_var == 1 ? 0 : 1),
30579 GEN_INT (one_var == 2 ? 0+4 : 1+4),
30580 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
30582 if (mode != V4SFmode)
30583 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
30584 else if (tmp != target)
30585 emit_move_insn (target, tmp);
30587 else if (target != new_target)
30588 emit_move_insn (target, new_target);
30593 vsimode = V4SImode;
30599 vsimode = V2SImode;
30605 /* Zero extend the variable element to SImode and recurse. */
30606 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
30608 x = gen_reg_rtx (vsimode);
30609 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
30611 gcc_unreachable ();
30613 emit_move_insn (target, gen_lowpart (mode, x));
30621 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30622 consisting of the values in VALS. It is known that all elements
30623 except ONE_VAR are constants. Return true if successful. */
30626 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
30627 rtx target, rtx vals, int one_var)
30629 rtx var = XVECEXP (vals, 0, one_var);
30630 enum machine_mode wmode;
30633 const_vec = copy_rtx (vals);
30634 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
30635 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
30643 /* For the two element vectors, it's just as easy to use
30644 the general case. */
30648 /* Use ix86_expand_vector_set in 64bit mode only. */
30671 /* There's no way to set one QImode entry easily. Combine
30672 the variable value with its adjacent constant value, and
30673 promote to an HImode set. */
30674 x = XVECEXP (vals, 0, one_var ^ 1);
30677 var = convert_modes (HImode, QImode, var, true);
30678 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
30679 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30680 x = GEN_INT (INTVAL (x) & 0xff);
30684 var = convert_modes (HImode, QImode, var, true);
30685 x = gen_int_mode (INTVAL (x) << 8, HImode);
30687 if (x != const0_rtx)
30688 var = expand_simple_binop (HImode, IOR, var, x, var,
30689 1, OPTAB_LIB_WIDEN);
30691 x = gen_reg_rtx (wmode);
30692 emit_move_insn (x, gen_lowpart (wmode, const_vec));
30693 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
30695 emit_move_insn (target, gen_lowpart (mode, x));
30702 emit_move_insn (target, const_vec);
30703 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30707 /* A subroutine of ix86_expand_vector_init_general. Use vector
30708 concatenate to handle the most general case: all values variable,
30709 and none identical. */
30712 ix86_expand_vector_init_concat (enum machine_mode mode,
30713 rtx target, rtx *ops, int n)
30715 enum machine_mode cmode, hmode = VOIDmode;
30716 rtx first[8], second[4];
30756 gcc_unreachable ();
30759 if (!register_operand (ops[1], cmode))
30760 ops[1] = force_reg (cmode, ops[1]);
30761 if (!register_operand (ops[0], cmode))
30762 ops[0] = force_reg (cmode, ops[0]);
30763 emit_insn (gen_rtx_SET (VOIDmode, target,
30764 gen_rtx_VEC_CONCAT (mode, ops[0],
30784 gcc_unreachable ();
30800 gcc_unreachable ();
30805 /* FIXME: We process inputs backward to help RA. PR 36222. */
30808 for (; i > 0; i -= 2, j--)
30810 first[j] = gen_reg_rtx (cmode);
30811 v = gen_rtvec (2, ops[i - 1], ops[i]);
30812 ix86_expand_vector_init (false, first[j],
30813 gen_rtx_PARALLEL (cmode, v));
30819 gcc_assert (hmode != VOIDmode);
30820 for (i = j = 0; i < n; i += 2, j++)
30822 second[j] = gen_reg_rtx (hmode);
30823 ix86_expand_vector_init_concat (hmode, second [j],
30827 ix86_expand_vector_init_concat (mode, target, second, n);
30830 ix86_expand_vector_init_concat (mode, target, first, n);
30834 gcc_unreachable ();
30838 /* A subroutine of ix86_expand_vector_init_general. Use vector
30839 interleave to handle the most general case: all values variable,
30840 and none identical. */
30843 ix86_expand_vector_init_interleave (enum machine_mode mode,
30844 rtx target, rtx *ops, int n)
30846 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
30849 rtx (*gen_load_even) (rtx, rtx, rtx);
30850 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
30851 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
30856 gen_load_even = gen_vec_setv8hi;
30857 gen_interleave_first_low = gen_vec_interleave_lowv4si;
30858 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30859 inner_mode = HImode;
30860 first_imode = V4SImode;
30861 second_imode = V2DImode;
30862 third_imode = VOIDmode;
30865 gen_load_even = gen_vec_setv16qi;
30866 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
30867 gen_interleave_second_low = gen_vec_interleave_lowv4si;
30868 inner_mode = QImode;
30869 first_imode = V8HImode;
30870 second_imode = V4SImode;
30871 third_imode = V2DImode;
30874 gcc_unreachable ();
30877 for (i = 0; i < n; i++)
30879 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
30880 op0 = gen_reg_rtx (SImode);
30881 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
30883 /* Insert the SImode value as low element of V4SImode vector. */
30884 op1 = gen_reg_rtx (V4SImode);
30885 op0 = gen_rtx_VEC_MERGE (V4SImode,
30886 gen_rtx_VEC_DUPLICATE (V4SImode,
30888 CONST0_RTX (V4SImode),
30890 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
30892 /* Cast the V4SImode vector back to a vector in orignal mode. */
30893 op0 = gen_reg_rtx (mode);
30894 emit_move_insn (op0, gen_lowpart (mode, op1));
30896 /* Load even elements into the second positon. */
30897 emit_insn (gen_load_even (op0,
30898 force_reg (inner_mode,
30902 /* Cast vector to FIRST_IMODE vector. */
30903 ops[i] = gen_reg_rtx (first_imode);
30904 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
30907 /* Interleave low FIRST_IMODE vectors. */
30908 for (i = j = 0; i < n; i += 2, j++)
30910 op0 = gen_reg_rtx (first_imode);
30911 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
30913 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
30914 ops[j] = gen_reg_rtx (second_imode);
30915 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
30918 /* Interleave low SECOND_IMODE vectors. */
30919 switch (second_imode)
30922 for (i = j = 0; i < n / 2; i += 2, j++)
30924 op0 = gen_reg_rtx (second_imode);
30925 emit_insn (gen_interleave_second_low (op0, ops[i],
30928 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
30930 ops[j] = gen_reg_rtx (third_imode);
30931 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
30933 second_imode = V2DImode;
30934 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30938 op0 = gen_reg_rtx (second_imode);
30939 emit_insn (gen_interleave_second_low (op0, ops[0],
30942 /* Cast the SECOND_IMODE vector back to a vector on original
30944 emit_insn (gen_rtx_SET (VOIDmode, target,
30945 gen_lowpart (mode, op0)));
30949 gcc_unreachable ();
30953 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
30954 all values variable, and none identical. */
30957 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
30958 rtx target, rtx vals)
30960 rtx ops[32], op0, op1;
30961 enum machine_mode half_mode = VOIDmode;
30968 if (!mmx_ok && !TARGET_SSE)
30980 n = GET_MODE_NUNITS (mode);
30981 for (i = 0; i < n; i++)
30982 ops[i] = XVECEXP (vals, 0, i);
30983 ix86_expand_vector_init_concat (mode, target, ops, n);
30987 half_mode = V16QImode;
30991 half_mode = V8HImode;
30995 n = GET_MODE_NUNITS (mode);
30996 for (i = 0; i < n; i++)
30997 ops[i] = XVECEXP (vals, 0, i);
30998 op0 = gen_reg_rtx (half_mode);
30999 op1 = gen_reg_rtx (half_mode);
31000 ix86_expand_vector_init_interleave (half_mode, op0, ops,
31002 ix86_expand_vector_init_interleave (half_mode, op1,
31003 &ops [n >> 1], n >> 2);
31004 emit_insn (gen_rtx_SET (VOIDmode, target,
31005 gen_rtx_VEC_CONCAT (mode, op0, op1)));
31009 if (!TARGET_SSE4_1)
31017 /* Don't use ix86_expand_vector_init_interleave if we can't
31018 move from GPR to SSE register directly. */
31019 if (!TARGET_INTER_UNIT_MOVES)
31022 n = GET_MODE_NUNITS (mode);
31023 for (i = 0; i < n; i++)
31024 ops[i] = XVECEXP (vals, 0, i);
31025 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
31033 gcc_unreachable ();
31037 int i, j, n_elts, n_words, n_elt_per_word;
31038 enum machine_mode inner_mode;
31039 rtx words[4], shift;
31041 inner_mode = GET_MODE_INNER (mode);
31042 n_elts = GET_MODE_NUNITS (mode);
31043 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
31044 n_elt_per_word = n_elts / n_words;
31045 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
31047 for (i = 0; i < n_words; ++i)
31049 rtx word = NULL_RTX;
31051 for (j = 0; j < n_elt_per_word; ++j)
31053 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
31054 elt = convert_modes (word_mode, inner_mode, elt, true);
31060 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
31061 word, 1, OPTAB_LIB_WIDEN);
31062 word = expand_simple_binop (word_mode, IOR, word, elt,
31063 word, 1, OPTAB_LIB_WIDEN);
31071 emit_move_insn (target, gen_lowpart (mode, words[0]));
31072 else if (n_words == 2)
31074 rtx tmp = gen_reg_rtx (mode);
31075 emit_clobber (tmp);
31076 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
31077 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
31078 emit_move_insn (target, tmp);
31080 else if (n_words == 4)
31082 rtx tmp = gen_reg_rtx (V4SImode);
31083 gcc_assert (word_mode == SImode);
31084 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
31085 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
31086 emit_move_insn (target, gen_lowpart (mode, tmp));
31089 gcc_unreachable ();
31093 /* Initialize vector TARGET via VALS. Suppress the use of MMX
31094 instructions unless MMX_OK is true. */
31097 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
31099 enum machine_mode mode = GET_MODE (target);
31100 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31101 int n_elts = GET_MODE_NUNITS (mode);
31102 int n_var = 0, one_var = -1;
31103 bool all_same = true, all_const_zero = true;
31107 for (i = 0; i < n_elts; ++i)
31109 x = XVECEXP (vals, 0, i);
31110 if (!(CONST_INT_P (x)
31111 || GET_CODE (x) == CONST_DOUBLE
31112 || GET_CODE (x) == CONST_FIXED))
31113 n_var++, one_var = i;
31114 else if (x != CONST0_RTX (inner_mode))
31115 all_const_zero = false;
31116 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
31120 /* Constants are best loaded from the constant pool. */
31123 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
31127 /* If all values are identical, broadcast the value. */
31129 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
31130 XVECEXP (vals, 0, 0)))
31133 /* Values where only one field is non-constant are best loaded from
31134 the pool and overwritten via move later. */
31138 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
31139 XVECEXP (vals, 0, one_var),
31143 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
31147 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
31151 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
31153 enum machine_mode mode = GET_MODE (target);
31154 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31155 enum machine_mode half_mode;
31156 bool use_vec_merge = false;
31158 static rtx (*gen_extract[6][2]) (rtx, rtx)
31160 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
31161 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
31162 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
31163 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
31164 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
31165 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
31167 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
31169 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
31170 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
31171 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
31172 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
31173 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
31174 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
31184 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31185 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
31187 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31189 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31190 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31196 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
31200 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31201 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
31203 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31205 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31206 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31213 /* For the two element vectors, we implement a VEC_CONCAT with
31214 the extraction of the other element. */
31216 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
31217 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
31220 op0 = val, op1 = tmp;
31222 op0 = tmp, op1 = val;
31224 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
31225 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31230 use_vec_merge = TARGET_SSE4_1;
31237 use_vec_merge = true;
31241 /* tmp = target = A B C D */
31242 tmp = copy_to_reg (target);
31243 /* target = A A B B */
31244 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
31245 /* target = X A B B */
31246 ix86_expand_vector_set (false, target, val, 0);
31247 /* target = A X C D */
31248 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31249 const1_rtx, const0_rtx,
31250 GEN_INT (2+4), GEN_INT (3+4)));
31254 /* tmp = target = A B C D */
31255 tmp = copy_to_reg (target);
31256 /* tmp = X B C D */
31257 ix86_expand_vector_set (false, tmp, val, 0);
31258 /* target = A B X D */
31259 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31260 const0_rtx, const1_rtx,
31261 GEN_INT (0+4), GEN_INT (3+4)));
31265 /* tmp = target = A B C D */
31266 tmp = copy_to_reg (target);
31267 /* tmp = X B C D */
31268 ix86_expand_vector_set (false, tmp, val, 0);
31269 /* target = A B X D */
31270 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31271 const0_rtx, const1_rtx,
31272 GEN_INT (2+4), GEN_INT (0+4)));
31276 gcc_unreachable ();
31281 use_vec_merge = TARGET_SSE4_1;
31285 /* Element 0 handled by vec_merge below. */
31288 use_vec_merge = true;
31294 /* With SSE2, use integer shuffles to swap element 0 and ELT,
31295 store into element 0, then shuffle them back. */
31299 order[0] = GEN_INT (elt);
31300 order[1] = const1_rtx;
31301 order[2] = const2_rtx;
31302 order[3] = GEN_INT (3);
31303 order[elt] = const0_rtx;
31305 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31306 order[1], order[2], order[3]));
31308 ix86_expand_vector_set (false, target, val, 0);
31310 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31311 order[1], order[2], order[3]));
31315 /* For SSE1, we have to reuse the V4SF code. */
31316 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
31317 gen_lowpart (SFmode, val), elt);
31322 use_vec_merge = TARGET_SSE2;
31325 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31329 use_vec_merge = TARGET_SSE4_1;
31336 half_mode = V16QImode;
31342 half_mode = V8HImode;
31348 half_mode = V4SImode;
31354 half_mode = V2DImode;
31360 half_mode = V4SFmode;
31366 half_mode = V2DFmode;
31372 /* Compute offset. */
31376 gcc_assert (i <= 1);
31378 /* Extract the half. */
31379 tmp = gen_reg_rtx (half_mode);
31380 emit_insn (gen_extract[j][i] (tmp, target));
31382 /* Put val in tmp at elt. */
31383 ix86_expand_vector_set (false, tmp, val, elt);
31386 emit_insn (gen_insert[j][i] (target, target, tmp));
31395 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
31396 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
31397 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31401 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31403 emit_move_insn (mem, target);
31405 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31406 emit_move_insn (tmp, val);
31408 emit_move_insn (target, mem);
31413 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
31415 enum machine_mode mode = GET_MODE (vec);
31416 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31417 bool use_vec_extr = false;
31430 use_vec_extr = true;
31434 use_vec_extr = TARGET_SSE4_1;
31446 tmp = gen_reg_rtx (mode);
31447 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
31448 GEN_INT (elt), GEN_INT (elt),
31449 GEN_INT (elt+4), GEN_INT (elt+4)));
31453 tmp = gen_reg_rtx (mode);
31454 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
31458 gcc_unreachable ();
31461 use_vec_extr = true;
31466 use_vec_extr = TARGET_SSE4_1;
31480 tmp = gen_reg_rtx (mode);
31481 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
31482 GEN_INT (elt), GEN_INT (elt),
31483 GEN_INT (elt), GEN_INT (elt)));
31487 tmp = gen_reg_rtx (mode);
31488 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
31492 gcc_unreachable ();
31495 use_vec_extr = true;
31500 /* For SSE1, we have to reuse the V4SF code. */
31501 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
31502 gen_lowpart (V4SFmode, vec), elt);
31508 use_vec_extr = TARGET_SSE2;
31511 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31515 use_vec_extr = TARGET_SSE4_1;
31519 /* ??? Could extract the appropriate HImode element and shift. */
31526 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
31527 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
31529 /* Let the rtl optimizers know about the zero extension performed. */
31530 if (inner_mode == QImode || inner_mode == HImode)
31532 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
31533 target = gen_lowpart (SImode, target);
31536 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31540 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31542 emit_move_insn (mem, vec);
31544 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31545 emit_move_insn (target, tmp);
31549 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
31550 pattern to reduce; DEST is the destination; IN is the input vector. */
31553 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
31555 rtx tmp1, tmp2, tmp3;
31557 tmp1 = gen_reg_rtx (V4SFmode);
31558 tmp2 = gen_reg_rtx (V4SFmode);
31559 tmp3 = gen_reg_rtx (V4SFmode);
31561 emit_insn (gen_sse_movhlps (tmp1, in, in));
31562 emit_insn (fn (tmp2, tmp1, in));
31564 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
31565 const1_rtx, const1_rtx,
31566 GEN_INT (1+4), GEN_INT (1+4)));
31567 emit_insn (fn (dest, tmp2, tmp3));
31570 /* Target hook for scalar_mode_supported_p. */
31572 ix86_scalar_mode_supported_p (enum machine_mode mode)
31574 if (DECIMAL_FLOAT_MODE_P (mode))
31575 return default_decimal_float_supported_p ();
31576 else if (mode == TFmode)
31579 return default_scalar_mode_supported_p (mode);
31582 /* Implements target hook vector_mode_supported_p. */
31584 ix86_vector_mode_supported_p (enum machine_mode mode)
31586 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
31588 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
31590 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
31592 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
31594 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
31599 /* Target hook for c_mode_for_suffix. */
31600 static enum machine_mode
31601 ix86_c_mode_for_suffix (char suffix)
31611 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31613 We do this in the new i386 backend to maintain source compatibility
31614 with the old cc0-based compiler. */
31617 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
31618 tree inputs ATTRIBUTE_UNUSED,
31621 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
31623 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
31628 /* Implements target vector targetm.asm.encode_section_info. */
31630 static void ATTRIBUTE_UNUSED
31631 ix86_encode_section_info (tree decl, rtx rtl, int first)
31633 default_encode_section_info (decl, rtl, first);
31635 if (TREE_CODE (decl) == VAR_DECL
31636 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
31637 && ix86_in_large_data_p (decl))
31638 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
31641 /* Worker function for REVERSE_CONDITION. */
31644 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
31646 return (mode != CCFPmode && mode != CCFPUmode
31647 ? reverse_condition (code)
31648 : reverse_condition_maybe_unordered (code));
31651 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31655 output_387_reg_move (rtx insn, rtx *operands)
31657 if (REG_P (operands[0]))
31659 if (REG_P (operands[1])
31660 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31662 if (REGNO (operands[0]) == FIRST_STACK_REG)
31663 return output_387_ffreep (operands, 0);
31664 return "fstp\t%y0";
31666 if (STACK_TOP_P (operands[0]))
31667 return "fld%Z1\t%y1";
31670 else if (MEM_P (operands[0]))
31672 gcc_assert (REG_P (operands[1]));
31673 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31674 return "fstp%Z0\t%y0";
31677 /* There is no non-popping store to memory for XFmode.
31678 So if we need one, follow the store with a load. */
31679 if (GET_MODE (operands[0]) == XFmode)
31680 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31682 return "fst%Z0\t%y0";
31689 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31690 FP status register is set. */
31693 ix86_emit_fp_unordered_jump (rtx label)
31695 rtx reg = gen_reg_rtx (HImode);
31698 emit_insn (gen_x86_fnstsw_1 (reg));
31700 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
31702 emit_insn (gen_x86_sahf_1 (reg));
31704 temp = gen_rtx_REG (CCmode, FLAGS_REG);
31705 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
31709 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
31711 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
31712 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
31715 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
31716 gen_rtx_LABEL_REF (VOIDmode, label),
31718 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
31720 emit_jump_insn (temp);
31721 predict_jump (REG_BR_PROB_BASE * 10 / 100);
31724 /* Output code to perform a log1p XFmode calculation. */
31726 void ix86_emit_i387_log1p (rtx op0, rtx op1)
31728 rtx label1 = gen_label_rtx ();
31729 rtx label2 = gen_label_rtx ();
31731 rtx tmp = gen_reg_rtx (XFmode);
31732 rtx tmp2 = gen_reg_rtx (XFmode);
31735 emit_insn (gen_absxf2 (tmp, op1));
31736 test = gen_rtx_GE (VOIDmode, tmp,
31737 CONST_DOUBLE_FROM_REAL_VALUE (
31738 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
31740 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
31742 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31743 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
31744 emit_jump (label2);
31746 emit_label (label1);
31747 emit_move_insn (tmp, CONST1_RTX (XFmode));
31748 emit_insn (gen_addxf3 (tmp, op1, tmp));
31749 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31750 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
31752 emit_label (label2);
31755 /* Emit code for round calculation. */
31756 void ix86_emit_i387_round (rtx op0, rtx op1)
31758 enum machine_mode inmode = GET_MODE (op1);
31759 enum machine_mode outmode = GET_MODE (op0);
31760 rtx e1, e2, res, tmp, tmp1, half;
31761 rtx scratch = gen_reg_rtx (HImode);
31762 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
31763 rtx jump_label = gen_label_rtx ();
31765 rtx (*gen_abs) (rtx, rtx);
31766 rtx (*gen_neg) (rtx, rtx);
31771 gen_abs = gen_abssf2;
31774 gen_abs = gen_absdf2;
31777 gen_abs = gen_absxf2;
31780 gcc_unreachable ();
31786 gen_neg = gen_negsf2;
31789 gen_neg = gen_negdf2;
31792 gen_neg = gen_negxf2;
31795 gen_neg = gen_neghi2;
31798 gen_neg = gen_negsi2;
31801 gen_neg = gen_negdi2;
31804 gcc_unreachable ();
31807 e1 = gen_reg_rtx (inmode);
31808 e2 = gen_reg_rtx (inmode);
31809 res = gen_reg_rtx (outmode);
31811 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
31813 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
31815 /* scratch = fxam(op1) */
31816 emit_insn (gen_rtx_SET (VOIDmode, scratch,
31817 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
31819 /* e1 = fabs(op1) */
31820 emit_insn (gen_abs (e1, op1));
31822 /* e2 = e1 + 0.5 */
31823 half = force_reg (inmode, half);
31824 emit_insn (gen_rtx_SET (VOIDmode, e2,
31825 gen_rtx_PLUS (inmode, e1, half)));
31827 /* res = floor(e2) */
31828 if (inmode != XFmode)
31830 tmp1 = gen_reg_rtx (XFmode);
31832 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
31833 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
31843 rtx tmp0 = gen_reg_rtx (XFmode);
31845 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
31847 emit_insn (gen_rtx_SET (VOIDmode, res,
31848 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
31849 UNSPEC_TRUNC_NOOP)));
31853 emit_insn (gen_frndintxf2_floor (res, tmp1));
31856 emit_insn (gen_lfloorxfhi2 (res, tmp1));
31859 emit_insn (gen_lfloorxfsi2 (res, tmp1));
31862 emit_insn (gen_lfloorxfdi2 (res, tmp1));
31865 gcc_unreachable ();
31868 /* flags = signbit(a) */
31869 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
31871 /* if (flags) then res = -res */
31872 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
31873 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
31874 gen_rtx_LABEL_REF (VOIDmode, jump_label),
31876 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
31877 predict_jump (REG_BR_PROB_BASE * 50 / 100);
31878 JUMP_LABEL (insn) = jump_label;
31880 emit_insn (gen_neg (res, res));
31882 emit_label (jump_label);
31883 LABEL_NUSES (jump_label) = 1;
31885 emit_move_insn (op0, res);
31888 /* Output code to perform a Newton-Rhapson approximation of a single precision
31889 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
31891 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
31893 rtx x0, x1, e0, e1;
31895 x0 = gen_reg_rtx (mode);
31896 e0 = gen_reg_rtx (mode);
31897 e1 = gen_reg_rtx (mode);
31898 x1 = gen_reg_rtx (mode);
31900 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
31902 /* x0 = rcp(b) estimate */
31903 emit_insn (gen_rtx_SET (VOIDmode, x0,
31904 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
31907 emit_insn (gen_rtx_SET (VOIDmode, e0,
31908 gen_rtx_MULT (mode, x0, b)));
31911 emit_insn (gen_rtx_SET (VOIDmode, e0,
31912 gen_rtx_MULT (mode, x0, e0)));
31915 emit_insn (gen_rtx_SET (VOIDmode, e1,
31916 gen_rtx_PLUS (mode, x0, x0)));
31919 emit_insn (gen_rtx_SET (VOIDmode, x1,
31920 gen_rtx_MINUS (mode, e1, e0)));
31923 emit_insn (gen_rtx_SET (VOIDmode, res,
31924 gen_rtx_MULT (mode, a, x1)));
31927 /* Output code to perform a Newton-Rhapson approximation of a
31928 single precision floating point [reciprocal] square root. */
31930 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
31933 rtx x0, e0, e1, e2, e3, mthree, mhalf;
31936 x0 = gen_reg_rtx (mode);
31937 e0 = gen_reg_rtx (mode);
31938 e1 = gen_reg_rtx (mode);
31939 e2 = gen_reg_rtx (mode);
31940 e3 = gen_reg_rtx (mode);
31942 real_from_integer (&r, VOIDmode, -3, -1, 0);
31943 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31945 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
31946 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31948 if (VECTOR_MODE_P (mode))
31950 mthree = ix86_build_const_vector (mode, true, mthree);
31951 mhalf = ix86_build_const_vector (mode, true, mhalf);
31954 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
31955 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
31957 /* x0 = rsqrt(a) estimate */
31958 emit_insn (gen_rtx_SET (VOIDmode, x0,
31959 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
31962 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
31967 zero = gen_reg_rtx (mode);
31968 mask = gen_reg_rtx (mode);
31970 zero = force_reg (mode, CONST0_RTX(mode));
31971 emit_insn (gen_rtx_SET (VOIDmode, mask,
31972 gen_rtx_NE (mode, zero, a)));
31974 emit_insn (gen_rtx_SET (VOIDmode, x0,
31975 gen_rtx_AND (mode, x0, mask)));
31979 emit_insn (gen_rtx_SET (VOIDmode, e0,
31980 gen_rtx_MULT (mode, x0, a)));
31982 emit_insn (gen_rtx_SET (VOIDmode, e1,
31983 gen_rtx_MULT (mode, e0, x0)));
31986 mthree = force_reg (mode, mthree);
31987 emit_insn (gen_rtx_SET (VOIDmode, e2,
31988 gen_rtx_PLUS (mode, e1, mthree)));
31990 mhalf = force_reg (mode, mhalf);
31992 /* e3 = -.5 * x0 */
31993 emit_insn (gen_rtx_SET (VOIDmode, e3,
31994 gen_rtx_MULT (mode, x0, mhalf)));
31996 /* e3 = -.5 * e0 */
31997 emit_insn (gen_rtx_SET (VOIDmode, e3,
31998 gen_rtx_MULT (mode, e0, mhalf)));
31999 /* ret = e2 * e3 */
32000 emit_insn (gen_rtx_SET (VOIDmode, res,
32001 gen_rtx_MULT (mode, e2, e3)));
32004 #ifdef TARGET_SOLARIS
32005 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
32008 i386_solaris_elf_named_section (const char *name, unsigned int flags,
32011 /* With Binutils 2.15, the "@unwind" marker must be specified on
32012 every occurrence of the ".eh_frame" section, not just the first
32015 && strcmp (name, ".eh_frame") == 0)
32017 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
32018 flags & SECTION_WRITE ? "aw" : "a");
32023 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
32025 solaris_elf_asm_comdat_section (name, flags, decl);
32030 default_elf_asm_named_section (name, flags, decl);
32032 #endif /* TARGET_SOLARIS */
32034 /* Return the mangling of TYPE if it is an extended fundamental type. */
32036 static const char *
32037 ix86_mangle_type (const_tree type)
32039 type = TYPE_MAIN_VARIANT (type);
32041 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
32042 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
32045 switch (TYPE_MODE (type))
32048 /* __float128 is "g". */
32051 /* "long double" or __float80 is "e". */
32058 /* For 32-bit code we can save PIC register setup by using
32059 __stack_chk_fail_local hidden function instead of calling
32060 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
32061 register, so it is better to call __stack_chk_fail directly. */
32063 static tree ATTRIBUTE_UNUSED
32064 ix86_stack_protect_fail (void)
32066 return TARGET_64BIT
32067 ? default_external_stack_protect_fail ()
32068 : default_hidden_stack_protect_fail ();
32071 /* Select a format to encode pointers in exception handling data. CODE
32072 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
32073 true if the symbol may be affected by dynamic relocations.
32075 ??? All x86 object file formats are capable of representing this.
32076 After all, the relocation needed is the same as for the call insn.
32077 Whether or not a particular assembler allows us to enter such, I
32078 guess we'll have to see. */
32080 asm_preferred_eh_data_format (int code, int global)
32084 int type = DW_EH_PE_sdata8;
32086 || ix86_cmodel == CM_SMALL_PIC
32087 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
32088 type = DW_EH_PE_sdata4;
32089 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
32091 if (ix86_cmodel == CM_SMALL
32092 || (ix86_cmodel == CM_MEDIUM && code))
32093 return DW_EH_PE_udata4;
32094 return DW_EH_PE_absptr;
32097 /* Expand copysign from SIGN to the positive value ABS_VALUE
32098 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
32101 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
32103 enum machine_mode mode = GET_MODE (sign);
32104 rtx sgn = gen_reg_rtx (mode);
32105 if (mask == NULL_RTX)
32107 enum machine_mode vmode;
32109 if (mode == SFmode)
32111 else if (mode == DFmode)
32116 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
32117 if (!VECTOR_MODE_P (mode))
32119 /* We need to generate a scalar mode mask in this case. */
32120 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
32121 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
32122 mask = gen_reg_rtx (mode);
32123 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
32127 mask = gen_rtx_NOT (mode, mask);
32128 emit_insn (gen_rtx_SET (VOIDmode, sgn,
32129 gen_rtx_AND (mode, mask, sign)));
32130 emit_insn (gen_rtx_SET (VOIDmode, result,
32131 gen_rtx_IOR (mode, abs_value, sgn)));
32134 /* Expand fabs (OP0) and return a new rtx that holds the result. The
32135 mask for masking out the sign-bit is stored in *SMASK, if that is
32138 ix86_expand_sse_fabs (rtx op0, rtx *smask)
32140 enum machine_mode vmode, mode = GET_MODE (op0);
32143 xa = gen_reg_rtx (mode);
32144 if (mode == SFmode)
32146 else if (mode == DFmode)
32150 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
32151 if (!VECTOR_MODE_P (mode))
32153 /* We need to generate a scalar mode mask in this case. */
32154 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
32155 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
32156 mask = gen_reg_rtx (mode);
32157 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
32159 emit_insn (gen_rtx_SET (VOIDmode, xa,
32160 gen_rtx_AND (mode, op0, mask)));
32168 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
32169 swapping the operands if SWAP_OPERANDS is true. The expanded
32170 code is a forward jump to a newly created label in case the
32171 comparison is true. The generated label rtx is returned. */
32173 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
32174 bool swap_operands)
32185 label = gen_label_rtx ();
32186 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
32187 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32188 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
32189 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
32190 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
32191 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
32192 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32193 JUMP_LABEL (tmp) = label;
32198 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
32199 using comparison code CODE. Operands are swapped for the comparison if
32200 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
32202 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
32203 bool swap_operands)
32205 rtx (*insn)(rtx, rtx, rtx, rtx);
32206 enum machine_mode mode = GET_MODE (op0);
32207 rtx mask = gen_reg_rtx (mode);
32216 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
32218 emit_insn (insn (mask, op0, op1,
32219 gen_rtx_fmt_ee (code, mode, op0, op1)));
32223 /* Generate and return a rtx of mode MODE for 2**n where n is the number
32224 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
32226 ix86_gen_TWO52 (enum machine_mode mode)
32228 REAL_VALUE_TYPE TWO52r;
32231 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
32232 TWO52 = const_double_from_real_value (TWO52r, mode);
32233 TWO52 = force_reg (mode, TWO52);
32238 /* Expand SSE sequence for computing lround from OP1 storing
32241 ix86_expand_lround (rtx op0, rtx op1)
32243 /* C code for the stuff we're doing below:
32244 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
32247 enum machine_mode mode = GET_MODE (op1);
32248 const struct real_format *fmt;
32249 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32252 /* load nextafter (0.5, 0.0) */
32253 fmt = REAL_MODE_FORMAT (mode);
32254 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32255 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32257 /* adj = copysign (0.5, op1) */
32258 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
32259 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
32261 /* adj = op1 + adj */
32262 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
32264 /* op0 = (imode)adj */
32265 expand_fix (op0, adj, 0);
32268 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
32271 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
32273 /* C code for the stuff we're doing below (for do_floor):
32275 xi -= (double)xi > op1 ? 1 : 0;
32278 enum machine_mode fmode = GET_MODE (op1);
32279 enum machine_mode imode = GET_MODE (op0);
32280 rtx ireg, freg, label, tmp;
32282 /* reg = (long)op1 */
32283 ireg = gen_reg_rtx (imode);
32284 expand_fix (ireg, op1, 0);
32286 /* freg = (double)reg */
32287 freg = gen_reg_rtx (fmode);
32288 expand_float (freg, ireg, 0);
32290 /* ireg = (freg > op1) ? ireg - 1 : ireg */
32291 label = ix86_expand_sse_compare_and_jump (UNLE,
32292 freg, op1, !do_floor);
32293 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
32294 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
32295 emit_move_insn (ireg, tmp);
32297 emit_label (label);
32298 LABEL_NUSES (label) = 1;
32300 emit_move_insn (op0, ireg);
32303 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
32304 result in OPERAND0. */
32306 ix86_expand_rint (rtx operand0, rtx operand1)
32308 /* C code for the stuff we're doing below:
32309 xa = fabs (operand1);
32310 if (!isless (xa, 2**52))
32312 xa = xa + 2**52 - 2**52;
32313 return copysign (xa, operand1);
32315 enum machine_mode mode = GET_MODE (operand0);
32316 rtx res, xa, label, TWO52, mask;
32318 res = gen_reg_rtx (mode);
32319 emit_move_insn (res, operand1);
32321 /* xa = abs (operand1) */
32322 xa = ix86_expand_sse_fabs (res, &mask);
32324 /* if (!isless (xa, TWO52)) goto label; */
32325 TWO52 = ix86_gen_TWO52 (mode);
32326 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32328 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32329 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32331 ix86_sse_copysign_to_positive (res, xa, res, mask);
32333 emit_label (label);
32334 LABEL_NUSES (label) = 1;
32336 emit_move_insn (operand0, res);
32339 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32342 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
32344 /* C code for the stuff we expand below.
32345 double xa = fabs (x), x2;
32346 if (!isless (xa, TWO52))
32348 xa = xa + TWO52 - TWO52;
32349 x2 = copysign (xa, x);
32358 enum machine_mode mode = GET_MODE (operand0);
32359 rtx xa, TWO52, tmp, label, one, res, mask;
32361 TWO52 = ix86_gen_TWO52 (mode);
32363 /* Temporary for holding the result, initialized to the input
32364 operand to ease control flow. */
32365 res = gen_reg_rtx (mode);
32366 emit_move_insn (res, operand1);
32368 /* xa = abs (operand1) */
32369 xa = ix86_expand_sse_fabs (res, &mask);
32371 /* if (!isless (xa, TWO52)) goto label; */
32372 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32374 /* xa = xa + TWO52 - TWO52; */
32375 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32376 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32378 /* xa = copysign (xa, operand1) */
32379 ix86_sse_copysign_to_positive (xa, xa, res, mask);
32381 /* generate 1.0 or -1.0 */
32382 one = force_reg (mode,
32383 const_double_from_real_value (do_floor
32384 ? dconst1 : dconstm1, mode));
32386 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32387 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32388 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32389 gen_rtx_AND (mode, one, tmp)));
32390 /* We always need to subtract here to preserve signed zero. */
32391 tmp = expand_simple_binop (mode, MINUS,
32392 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32393 emit_move_insn (res, tmp);
32395 emit_label (label);
32396 LABEL_NUSES (label) = 1;
32398 emit_move_insn (operand0, res);
32401 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32404 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
32406 /* C code for the stuff we expand below.
32407 double xa = fabs (x), x2;
32408 if (!isless (xa, TWO52))
32410 x2 = (double)(long)x;
32417 if (HONOR_SIGNED_ZEROS (mode))
32418 return copysign (x2, x);
32421 enum machine_mode mode = GET_MODE (operand0);
32422 rtx xa, xi, TWO52, tmp, label, one, res, mask;
32424 TWO52 = ix86_gen_TWO52 (mode);
32426 /* Temporary for holding the result, initialized to the input
32427 operand to ease control flow. */
32428 res = gen_reg_rtx (mode);
32429 emit_move_insn (res, operand1);
32431 /* xa = abs (operand1) */
32432 xa = ix86_expand_sse_fabs (res, &mask);
32434 /* if (!isless (xa, TWO52)) goto label; */
32435 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32437 /* xa = (double)(long)x */
32438 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32439 expand_fix (xi, res, 0);
32440 expand_float (xa, xi, 0);
32443 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32445 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32446 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32447 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32448 gen_rtx_AND (mode, one, tmp)));
32449 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
32450 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32451 emit_move_insn (res, tmp);
32453 if (HONOR_SIGNED_ZEROS (mode))
32454 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32456 emit_label (label);
32457 LABEL_NUSES (label) = 1;
32459 emit_move_insn (operand0, res);
32462 /* Expand SSE sequence for computing round from OPERAND1 storing
32463 into OPERAND0. Sequence that works without relying on DImode truncation
32464 via cvttsd2siq that is only available on 64bit targets. */
32466 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
32468 /* C code for the stuff we expand below.
32469 double xa = fabs (x), xa2, x2;
32470 if (!isless (xa, TWO52))
32472 Using the absolute value and copying back sign makes
32473 -0.0 -> -0.0 correct.
32474 xa2 = xa + TWO52 - TWO52;
32479 else if (dxa > 0.5)
32481 x2 = copysign (xa2, x);
32484 enum machine_mode mode = GET_MODE (operand0);
32485 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
32487 TWO52 = ix86_gen_TWO52 (mode);
32489 /* Temporary for holding the result, initialized to the input
32490 operand to ease control flow. */
32491 res = gen_reg_rtx (mode);
32492 emit_move_insn (res, operand1);
32494 /* xa = abs (operand1) */
32495 xa = ix86_expand_sse_fabs (res, &mask);
32497 /* if (!isless (xa, TWO52)) goto label; */
32498 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32500 /* xa2 = xa + TWO52 - TWO52; */
32501 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32502 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
32504 /* dxa = xa2 - xa; */
32505 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
32507 /* generate 0.5, 1.0 and -0.5 */
32508 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
32509 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
32510 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
32514 tmp = gen_reg_rtx (mode);
32515 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
32516 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
32517 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32518 gen_rtx_AND (mode, one, tmp)));
32519 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32520 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
32521 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
32522 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32523 gen_rtx_AND (mode, one, tmp)));
32524 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32526 /* res = copysign (xa2, operand1) */
32527 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
32529 emit_label (label);
32530 LABEL_NUSES (label) = 1;
32532 emit_move_insn (operand0, res);
32535 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32538 ix86_expand_trunc (rtx operand0, rtx operand1)
32540 /* C code for SSE variant we expand below.
32541 double xa = fabs (x), x2;
32542 if (!isless (xa, TWO52))
32544 x2 = (double)(long)x;
32545 if (HONOR_SIGNED_ZEROS (mode))
32546 return copysign (x2, x);
32549 enum machine_mode mode = GET_MODE (operand0);
32550 rtx xa, xi, TWO52, label, res, mask;
32552 TWO52 = ix86_gen_TWO52 (mode);
32554 /* Temporary for holding the result, initialized to the input
32555 operand to ease control flow. */
32556 res = gen_reg_rtx (mode);
32557 emit_move_insn (res, operand1);
32559 /* xa = abs (operand1) */
32560 xa = ix86_expand_sse_fabs (res, &mask);
32562 /* if (!isless (xa, TWO52)) goto label; */
32563 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32565 /* x = (double)(long)x */
32566 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32567 expand_fix (xi, res, 0);
32568 expand_float (res, xi, 0);
32570 if (HONOR_SIGNED_ZEROS (mode))
32571 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32573 emit_label (label);
32574 LABEL_NUSES (label) = 1;
32576 emit_move_insn (operand0, res);
32579 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32582 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
32584 enum machine_mode mode = GET_MODE (operand0);
32585 rtx xa, mask, TWO52, label, one, res, smask, tmp;
32587 /* C code for SSE variant we expand below.
32588 double xa = fabs (x), x2;
32589 if (!isless (xa, TWO52))
32591 xa2 = xa + TWO52 - TWO52;
32595 x2 = copysign (xa2, x);
32599 TWO52 = ix86_gen_TWO52 (mode);
32601 /* Temporary for holding the result, initialized to the input
32602 operand to ease control flow. */
32603 res = gen_reg_rtx (mode);
32604 emit_move_insn (res, operand1);
32606 /* xa = abs (operand1) */
32607 xa = ix86_expand_sse_fabs (res, &smask);
32609 /* if (!isless (xa, TWO52)) goto label; */
32610 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32612 /* res = xa + TWO52 - TWO52; */
32613 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32614 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
32615 emit_move_insn (res, tmp);
32618 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32620 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
32621 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
32622 emit_insn (gen_rtx_SET (VOIDmode, mask,
32623 gen_rtx_AND (mode, mask, one)));
32624 tmp = expand_simple_binop (mode, MINUS,
32625 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
32626 emit_move_insn (res, tmp);
32628 /* res = copysign (res, operand1) */
32629 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
32631 emit_label (label);
32632 LABEL_NUSES (label) = 1;
32634 emit_move_insn (operand0, res);
32637 /* Expand SSE sequence for computing round from OPERAND1 storing
32640 ix86_expand_round (rtx operand0, rtx operand1)
32642 /* C code for the stuff we're doing below:
32643 double xa = fabs (x);
32644 if (!isless (xa, TWO52))
32646 xa = (double)(long)(xa + nextafter (0.5, 0.0));
32647 return copysign (xa, x);
32649 enum machine_mode mode = GET_MODE (operand0);
32650 rtx res, TWO52, xa, label, xi, half, mask;
32651 const struct real_format *fmt;
32652 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32654 /* Temporary for holding the result, initialized to the input
32655 operand to ease control flow. */
32656 res = gen_reg_rtx (mode);
32657 emit_move_insn (res, operand1);
32659 TWO52 = ix86_gen_TWO52 (mode);
32660 xa = ix86_expand_sse_fabs (res, &mask);
32661 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32663 /* load nextafter (0.5, 0.0) */
32664 fmt = REAL_MODE_FORMAT (mode);
32665 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32666 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32668 /* xa = xa + 0.5 */
32669 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
32670 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
32672 /* xa = (double)(int64_t)xa */
32673 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32674 expand_fix (xi, xa, 0);
32675 expand_float (xa, xi, 0);
32677 /* res = copysign (xa, operand1) */
32678 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
32680 emit_label (label);
32681 LABEL_NUSES (label) = 1;
32683 emit_move_insn (operand0, res);
32686 /* Expand SSE sequence for computing round
32687 from OP1 storing into OP0 using sse4 round insn. */
32689 ix86_expand_round_sse4 (rtx op0, rtx op1)
32691 enum machine_mode mode = GET_MODE (op0);
32692 rtx e1, e2, e3, res, half, mask;
32693 const struct real_format *fmt;
32694 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32695 rtx (*gen_round) (rtx, rtx, rtx);
32700 gen_round = gen_sse4_1_roundsf2;
32703 gen_round = gen_sse4_1_rounddf2;
32706 gcc_unreachable ();
32709 /* e1 = fabs(op1) */
32710 e1 = ix86_expand_sse_fabs (op1, &mask);
32712 /* load nextafter (0.5, 0.0) */
32713 fmt = REAL_MODE_FORMAT (mode);
32714 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32715 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32717 /* e2 = e1 + 0.5 */
32718 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
32719 e2 = expand_simple_binop (mode, PLUS, e1, half, NULL_RTX, 0, OPTAB_DIRECT);
32721 /* e3 = trunc(e2) */
32722 e3 = gen_reg_rtx (mode);
32723 emit_insn (gen_round (e3, e2, GEN_INT (ROUND_TRUNC)));
32725 /* res = copysign (e3, op1) */
32726 res = gen_reg_rtx (mode);
32727 ix86_sse_copysign_to_positive (res, e3, op1, mask);
32729 emit_move_insn (op0, res);
32733 /* Table of valid machine attributes. */
32734 static const struct attribute_spec ix86_attribute_table[] =
32736 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
32737 affects_type_identity } */
32738 /* Stdcall attribute says callee is responsible for popping arguments
32739 if they are not variable. */
32740 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32742 /* Fastcall attribute says callee is responsible for popping arguments
32743 if they are not variable. */
32744 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32746 /* Thiscall attribute says callee is responsible for popping arguments
32747 if they are not variable. */
32748 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32750 /* Cdecl attribute says the callee is a normal C declaration */
32751 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32753 /* Regparm attribute specifies how many integer arguments are to be
32754 passed in registers. */
32755 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
32757 /* Sseregparm attribute says we are using x86_64 calling conventions
32758 for FP arguments. */
32759 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32761 /* force_align_arg_pointer says this function realigns the stack at entry. */
32762 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
32763 false, true, true, ix86_handle_cconv_attribute, false },
32764 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32765 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
32766 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
32767 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
32770 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32772 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32774 #ifdef SUBTARGET_ATTRIBUTE_TABLE
32775 SUBTARGET_ATTRIBUTE_TABLE,
32777 /* ms_abi and sysv_abi calling convention function attributes. */
32778 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32779 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32780 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
32782 { "callee_pop_aggregate_return", 1, 1, false, true, true,
32783 ix86_handle_callee_pop_aggregate_return, true },
32785 { NULL, 0, 0, false, false, false, NULL, false }
32788 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32790 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
32791 tree vectype ATTRIBUTE_UNUSED,
32792 int misalign ATTRIBUTE_UNUSED)
32794 switch (type_of_cost)
32797 return ix86_cost->scalar_stmt_cost;
32800 return ix86_cost->scalar_load_cost;
32803 return ix86_cost->scalar_store_cost;
32806 return ix86_cost->vec_stmt_cost;
32809 return ix86_cost->vec_align_load_cost;
32812 return ix86_cost->vec_store_cost;
32814 case vec_to_scalar:
32815 return ix86_cost->vec_to_scalar_cost;
32817 case scalar_to_vec:
32818 return ix86_cost->scalar_to_vec_cost;
32820 case unaligned_load:
32821 case unaligned_store:
32822 return ix86_cost->vec_unalign_load_cost;
32824 case cond_branch_taken:
32825 return ix86_cost->cond_taken_branch_cost;
32827 case cond_branch_not_taken:
32828 return ix86_cost->cond_not_taken_branch_cost;
32834 gcc_unreachable ();
32839 /* Implement targetm.vectorize.builtin_vec_perm. */
32842 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
32844 tree itype = TREE_TYPE (vec_type);
32845 bool u = TYPE_UNSIGNED (itype);
32846 enum machine_mode vmode = TYPE_MODE (vec_type);
32847 enum ix86_builtins fcode;
32848 bool ok = TARGET_SSE2;
32854 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
32857 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
32859 itype = ix86_get_builtin_type (IX86_BT_DI);
32864 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
32868 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
32870 itype = ix86_get_builtin_type (IX86_BT_SI);
32874 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
32877 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
32880 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
32883 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
32893 *mask_type = itype;
32894 return ix86_builtins[(int) fcode];
32897 /* Return a vector mode with twice as many elements as VMODE. */
32898 /* ??? Consider moving this to a table generated by genmodes.c. */
32900 static enum machine_mode
32901 doublesize_vector_mode (enum machine_mode vmode)
32905 case V2SFmode: return V4SFmode;
32906 case V1DImode: return V2DImode;
32907 case V2SImode: return V4SImode;
32908 case V4HImode: return V8HImode;
32909 case V8QImode: return V16QImode;
32911 case V2DFmode: return V4DFmode;
32912 case V4SFmode: return V8SFmode;
32913 case V2DImode: return V4DImode;
32914 case V4SImode: return V8SImode;
32915 case V8HImode: return V16HImode;
32916 case V16QImode: return V32QImode;
32918 case V4DFmode: return V8DFmode;
32919 case V8SFmode: return V16SFmode;
32920 case V4DImode: return V8DImode;
32921 case V8SImode: return V16SImode;
32922 case V16HImode: return V32HImode;
32923 case V32QImode: return V64QImode;
32926 gcc_unreachable ();
32930 /* Construct (set target (vec_select op0 (parallel perm))) and
32931 return true if that's a valid instruction in the active ISA. */
32934 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
32936 rtx rperm[MAX_VECT_LEN], x;
32939 for (i = 0; i < nelt; ++i)
32940 rperm[i] = GEN_INT (perm[i]);
32942 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
32943 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
32944 x = gen_rtx_SET (VOIDmode, target, x);
32947 if (recog_memoized (x) < 0)
32955 /* Similar, but generate a vec_concat from op0 and op1 as well. */
32958 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
32959 const unsigned char *perm, unsigned nelt)
32961 enum machine_mode v2mode;
32964 v2mode = doublesize_vector_mode (GET_MODE (op0));
32965 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
32966 return expand_vselect (target, x, perm, nelt);
32969 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32970 in terms of blendp[sd] / pblendw / pblendvb. */
32973 expand_vec_perm_blend (struct expand_vec_perm_d *d)
32975 enum machine_mode vmode = d->vmode;
32976 unsigned i, mask, nelt = d->nelt;
32977 rtx target, op0, op1, x;
32979 if (!TARGET_SSE4_1 || d->op0 == d->op1)
32981 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
32984 /* This is a blend, not a permute. Elements must stay in their
32985 respective lanes. */
32986 for (i = 0; i < nelt; ++i)
32988 unsigned e = d->perm[i];
32989 if (!(e == i || e == i + nelt))
32996 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
32997 decision should be extracted elsewhere, so that we only try that
32998 sequence once all budget==3 options have been tried. */
33000 /* For bytes, see if bytes move in pairs so we can use pblendw with
33001 an immediate argument, rather than pblendvb with a vector argument. */
33002 if (vmode == V16QImode)
33004 bool pblendw_ok = true;
33005 for (i = 0; i < 16 && pblendw_ok; i += 2)
33006 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
33010 rtx rperm[16], vperm;
33012 for (i = 0; i < nelt; ++i)
33013 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
33015 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
33016 vperm = force_reg (V16QImode, vperm);
33018 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
33023 target = d->target;
33035 for (i = 0; i < nelt; ++i)
33036 mask |= (d->perm[i] >= nelt) << i;
33040 for (i = 0; i < 2; ++i)
33041 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
33045 for (i = 0; i < 4; ++i)
33046 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
33050 for (i = 0; i < 8; ++i)
33051 mask |= (d->perm[i * 2] >= 16) << i;
33055 target = gen_lowpart (vmode, target);
33056 op0 = gen_lowpart (vmode, op0);
33057 op1 = gen_lowpart (vmode, op1);
33061 gcc_unreachable ();
33064 /* This matches five different patterns with the different modes. */
33065 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
33066 x = gen_rtx_SET (VOIDmode, target, x);
33072 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33073 in terms of the variable form of vpermilps.
33075 Note that we will have already failed the immediate input vpermilps,
33076 which requires that the high and low part shuffle be identical; the
33077 variable form doesn't require that. */
33080 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
33082 rtx rperm[8], vperm;
33085 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
33088 /* We can only permute within the 128-bit lane. */
33089 for (i = 0; i < 8; ++i)
33091 unsigned e = d->perm[i];
33092 if (i < 4 ? e >= 4 : e < 4)
33099 for (i = 0; i < 8; ++i)
33101 unsigned e = d->perm[i];
33103 /* Within each 128-bit lane, the elements of op0 are numbered
33104 from 0 and the elements of op1 are numbered from 4. */
33110 rperm[i] = GEN_INT (e);
33113 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
33114 vperm = force_reg (V8SImode, vperm);
33115 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
33120 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33121 in terms of pshufb or vpperm. */
33124 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
33126 unsigned i, nelt, eltsz;
33127 rtx rperm[16], vperm, target, op0, op1;
33129 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
33131 if (GET_MODE_SIZE (d->vmode) != 16)
33138 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33140 for (i = 0; i < nelt; ++i)
33142 unsigned j, e = d->perm[i];
33143 for (j = 0; j < eltsz; ++j)
33144 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
33147 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
33148 vperm = force_reg (V16QImode, vperm);
33150 target = gen_lowpart (V16QImode, d->target);
33151 op0 = gen_lowpart (V16QImode, d->op0);
33152 if (d->op0 == d->op1)
33153 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
33156 op1 = gen_lowpart (V16QImode, d->op1);
33157 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
33163 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
33164 in a single instruction. */
33167 expand_vec_perm_1 (struct expand_vec_perm_d *d)
33169 unsigned i, nelt = d->nelt;
33170 unsigned char perm2[MAX_VECT_LEN];
33172 /* Check plain VEC_SELECT first, because AVX has instructions that could
33173 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
33174 input where SEL+CONCAT may not. */
33175 if (d->op0 == d->op1)
33177 int mask = nelt - 1;
33179 for (i = 0; i < nelt; i++)
33180 perm2[i] = d->perm[i] & mask;
33182 if (expand_vselect (d->target, d->op0, perm2, nelt))
33185 /* There are plenty of patterns in sse.md that are written for
33186 SEL+CONCAT and are not replicated for a single op. Perhaps
33187 that should be changed, to avoid the nastiness here. */
33189 /* Recognize interleave style patterns, which means incrementing
33190 every other permutation operand. */
33191 for (i = 0; i < nelt; i += 2)
33193 perm2[i] = d->perm[i] & mask;
33194 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
33196 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
33199 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
33202 for (i = 0; i < nelt; i += 4)
33204 perm2[i + 0] = d->perm[i + 0] & mask;
33205 perm2[i + 1] = d->perm[i + 1] & mask;
33206 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
33207 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
33210 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
33215 /* Finally, try the fully general two operand permute. */
33216 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
33219 /* Recognize interleave style patterns with reversed operands. */
33220 if (d->op0 != d->op1)
33222 for (i = 0; i < nelt; ++i)
33224 unsigned e = d->perm[i];
33232 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
33236 /* Try the SSE4.1 blend variable merge instructions. */
33237 if (expand_vec_perm_blend (d))
33240 /* Try one of the AVX vpermil variable permutations. */
33241 if (expand_vec_perm_vpermil (d))
33244 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
33245 if (expand_vec_perm_pshufb (d))
33251 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33252 in terms of a pair of pshuflw + pshufhw instructions. */
33255 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
33257 unsigned char perm2[MAX_VECT_LEN];
33261 if (d->vmode != V8HImode || d->op0 != d->op1)
33264 /* The two permutations only operate in 64-bit lanes. */
33265 for (i = 0; i < 4; ++i)
33266 if (d->perm[i] >= 4)
33268 for (i = 4; i < 8; ++i)
33269 if (d->perm[i] < 4)
33275 /* Emit the pshuflw. */
33276 memcpy (perm2, d->perm, 4);
33277 for (i = 4; i < 8; ++i)
33279 ok = expand_vselect (d->target, d->op0, perm2, 8);
33282 /* Emit the pshufhw. */
33283 memcpy (perm2 + 4, d->perm + 4, 4);
33284 for (i = 0; i < 4; ++i)
33286 ok = expand_vselect (d->target, d->target, perm2, 8);
33292 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33293 the permutation using the SSSE3 palignr instruction. This succeeds
33294 when all of the elements in PERM fit within one vector and we merely
33295 need to shift them down so that a single vector permutation has a
33296 chance to succeed. */
33299 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
33301 unsigned i, nelt = d->nelt;
33306 /* Even with AVX, palignr only operates on 128-bit vectors. */
33307 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33310 min = nelt, max = 0;
33311 for (i = 0; i < nelt; ++i)
33313 unsigned e = d->perm[i];
33319 if (min == 0 || max - min >= nelt)
33322 /* Given that we have SSSE3, we know we'll be able to implement the
33323 single operand permutation after the palignr with pshufb. */
33327 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
33328 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
33329 gen_lowpart (TImode, d->op1),
33330 gen_lowpart (TImode, d->op0), shift));
33332 d->op0 = d->op1 = d->target;
33335 for (i = 0; i < nelt; ++i)
33337 unsigned e = d->perm[i] - min;
33343 /* Test for the degenerate case where the alignment by itself
33344 produces the desired permutation. */
33348 ok = expand_vec_perm_1 (d);
33354 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33355 a two vector permutation into a single vector permutation by using
33356 an interleave operation to merge the vectors. */
33359 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
33361 struct expand_vec_perm_d dremap, dfinal;
33362 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
33363 unsigned contents, h1, h2, h3, h4;
33364 unsigned char remap[2 * MAX_VECT_LEN];
33368 if (d->op0 == d->op1)
33371 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
33372 lanes. We can use similar techniques with the vperm2f128 instruction,
33373 but it requires slightly different logic. */
33374 if (GET_MODE_SIZE (d->vmode) != 16)
33377 /* Examine from whence the elements come. */
33379 for (i = 0; i < nelt; ++i)
33380 contents |= 1u << d->perm[i];
33382 /* Split the two input vectors into 4 halves. */
33383 h1 = (1u << nelt2) - 1;
33388 memset (remap, 0xff, sizeof (remap));
33391 /* If the elements from the low halves use interleave low, and similarly
33392 for interleave high. If the elements are from mis-matched halves, we
33393 can use shufps for V4SF/V4SI or do a DImode shuffle. */
33394 if ((contents & (h1 | h3)) == contents)
33396 for (i = 0; i < nelt2; ++i)
33399 remap[i + nelt] = i * 2 + 1;
33400 dremap.perm[i * 2] = i;
33401 dremap.perm[i * 2 + 1] = i + nelt;
33404 else if ((contents & (h2 | h4)) == contents)
33406 for (i = 0; i < nelt2; ++i)
33408 remap[i + nelt2] = i * 2;
33409 remap[i + nelt + nelt2] = i * 2 + 1;
33410 dremap.perm[i * 2] = i + nelt2;
33411 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
33414 else if ((contents & (h1 | h4)) == contents)
33416 for (i = 0; i < nelt2; ++i)
33419 remap[i + nelt + nelt2] = i + nelt2;
33420 dremap.perm[i] = i;
33421 dremap.perm[i + nelt2] = i + nelt + nelt2;
33425 dremap.vmode = V2DImode;
33427 dremap.perm[0] = 0;
33428 dremap.perm[1] = 3;
33431 else if ((contents & (h2 | h3)) == contents)
33433 for (i = 0; i < nelt2; ++i)
33435 remap[i + nelt2] = i;
33436 remap[i + nelt] = i + nelt2;
33437 dremap.perm[i] = i + nelt2;
33438 dremap.perm[i + nelt2] = i + nelt;
33442 dremap.vmode = V2DImode;
33444 dremap.perm[0] = 1;
33445 dremap.perm[1] = 2;
33451 /* Use the remapping array set up above to move the elements from their
33452 swizzled locations into their final destinations. */
33454 for (i = 0; i < nelt; ++i)
33456 unsigned e = remap[d->perm[i]];
33457 gcc_assert (e < nelt);
33458 dfinal.perm[i] = e;
33460 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
33461 dfinal.op1 = dfinal.op0;
33462 dremap.target = dfinal.op0;
33464 /* Test if the final remap can be done with a single insn. For V4SFmode or
33465 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
33467 ok = expand_vec_perm_1 (&dfinal);
33468 seq = get_insns ();
33474 if (dremap.vmode != dfinal.vmode)
33476 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
33477 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
33478 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
33481 ok = expand_vec_perm_1 (&dremap);
33488 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
33489 permutation with two pshufb insns and an ior. We should have already
33490 failed all two instruction sequences. */
33493 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
33495 rtx rperm[2][16], vperm, l, h, op, m128;
33496 unsigned int i, nelt, eltsz;
33498 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33500 gcc_assert (d->op0 != d->op1);
33503 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33505 /* Generate two permutation masks. If the required element is within
33506 the given vector it is shuffled into the proper lane. If the required
33507 element is in the other vector, force a zero into the lane by setting
33508 bit 7 in the permutation mask. */
33509 m128 = GEN_INT (-128);
33510 for (i = 0; i < nelt; ++i)
33512 unsigned j, e = d->perm[i];
33513 unsigned which = (e >= nelt);
33517 for (j = 0; j < eltsz; ++j)
33519 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
33520 rperm[1-which][i*eltsz + j] = m128;
33524 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
33525 vperm = force_reg (V16QImode, vperm);
33527 l = gen_reg_rtx (V16QImode);
33528 op = gen_lowpart (V16QImode, d->op0);
33529 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
33531 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
33532 vperm = force_reg (V16QImode, vperm);
33534 h = gen_reg_rtx (V16QImode);
33535 op = gen_lowpart (V16QImode, d->op1);
33536 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
33538 op = gen_lowpart (V16QImode, d->target);
33539 emit_insn (gen_iorv16qi3 (op, l, h));
33544 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
33545 and extract-odd permutations. */
33548 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
33555 t1 = gen_reg_rtx (V4DFmode);
33556 t2 = gen_reg_rtx (V4DFmode);
33558 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
33559 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
33560 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
33562 /* Now an unpck[lh]pd will produce the result required. */
33564 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
33566 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
33572 int mask = odd ? 0xdd : 0x88;
33574 t1 = gen_reg_rtx (V8SFmode);
33575 t2 = gen_reg_rtx (V8SFmode);
33576 t3 = gen_reg_rtx (V8SFmode);
33578 /* Shuffle within the 128-bit lanes to produce:
33579 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
33580 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
33583 /* Shuffle the lanes around to produce:
33584 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
33585 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
33588 /* Shuffle within the 128-bit lanes to produce:
33589 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
33590 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
33592 /* Shuffle within the 128-bit lanes to produce:
33593 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
33594 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
33596 /* Shuffle the lanes around to produce:
33597 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
33598 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
33607 /* These are always directly implementable by expand_vec_perm_1. */
33608 gcc_unreachable ();
33612 return expand_vec_perm_pshufb2 (d);
33615 /* We need 2*log2(N)-1 operations to achieve odd/even
33616 with interleave. */
33617 t1 = gen_reg_rtx (V8HImode);
33618 t2 = gen_reg_rtx (V8HImode);
33619 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
33620 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
33621 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
33622 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
33624 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
33626 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
33633 return expand_vec_perm_pshufb2 (d);
33636 t1 = gen_reg_rtx (V16QImode);
33637 t2 = gen_reg_rtx (V16QImode);
33638 t3 = gen_reg_rtx (V16QImode);
33639 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
33640 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
33641 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
33642 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
33643 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
33644 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
33646 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
33648 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
33654 gcc_unreachable ();
33660 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33661 extract-even and extract-odd permutations. */
33664 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
33666 unsigned i, odd, nelt = d->nelt;
33669 if (odd != 0 && odd != 1)
33672 for (i = 1; i < nelt; ++i)
33673 if (d->perm[i] != 2 * i + odd)
33676 return expand_vec_perm_even_odd_1 (d, odd);
33679 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
33680 permutations. We assume that expand_vec_perm_1 has already failed. */
33683 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
33685 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
33686 enum machine_mode vmode = d->vmode;
33687 unsigned char perm2[4];
33695 /* These are special-cased in sse.md so that we can optionally
33696 use the vbroadcast instruction. They expand to two insns
33697 if the input happens to be in a register. */
33698 gcc_unreachable ();
33704 /* These are always implementable using standard shuffle patterns. */
33705 gcc_unreachable ();
33709 /* These can be implemented via interleave. We save one insn by
33710 stopping once we have promoted to V4SImode and then use pshufd. */
33713 optab otab = vec_interleave_low_optab;
33717 otab = vec_interleave_high_optab;
33722 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
33723 vmode = get_mode_wider_vector (vmode);
33724 op0 = gen_lowpart (vmode, op0);
33726 while (vmode != V4SImode);
33728 memset (perm2, elt, 4);
33729 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
33734 gcc_unreachable ();
33738 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33739 broadcast permutations. */
33742 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
33744 unsigned i, elt, nelt = d->nelt;
33746 if (d->op0 != d->op1)
33750 for (i = 1; i < nelt; ++i)
33751 if (d->perm[i] != elt)
33754 return expand_vec_perm_broadcast_1 (d);
33757 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
33758 With all of the interface bits taken care of, perform the expansion
33759 in D and return true on success. */
33762 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
33764 /* Try a single instruction expansion. */
33765 if (expand_vec_perm_1 (d))
33768 /* Try sequences of two instructions. */
33770 if (expand_vec_perm_pshuflw_pshufhw (d))
33773 if (expand_vec_perm_palignr (d))
33776 if (expand_vec_perm_interleave2 (d))
33779 if (expand_vec_perm_broadcast (d))
33782 /* Try sequences of three instructions. */
33784 if (expand_vec_perm_pshufb2 (d))
33787 /* ??? Look for narrow permutations whose element orderings would
33788 allow the promotion to a wider mode. */
33790 /* ??? Look for sequences of interleave or a wider permute that place
33791 the data into the correct lanes for a half-vector shuffle like
33792 pshuf[lh]w or vpermilps. */
33794 /* ??? Look for sequences of interleave that produce the desired results.
33795 The combinatorics of punpck[lh] get pretty ugly... */
33797 if (expand_vec_perm_even_odd (d))
33803 /* Extract the values from the vector CST into the permutation array in D.
33804 Return 0 on error, 1 if all values from the permutation come from the
33805 first vector, 2 if all values from the second vector, and 3 otherwise. */
33808 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
33810 tree list = TREE_VECTOR_CST_ELTS (cst);
33811 unsigned i, nelt = d->nelt;
33814 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
33816 unsigned HOST_WIDE_INT e;
33818 if (!host_integerp (TREE_VALUE (list), 1))
33820 e = tree_low_cst (TREE_VALUE (list), 1);
33824 ret |= (e < nelt ? 1 : 2);
33827 gcc_assert (list == NULL);
33829 /* For all elements from second vector, fold the elements to first. */
33831 for (i = 0; i < nelt; ++i)
33832 d->perm[i] -= nelt;
33838 ix86_expand_vec_perm_builtin (tree exp)
33840 struct expand_vec_perm_d d;
33841 tree arg0, arg1, arg2;
33843 arg0 = CALL_EXPR_ARG (exp, 0);
33844 arg1 = CALL_EXPR_ARG (exp, 1);
33845 arg2 = CALL_EXPR_ARG (exp, 2);
33847 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
33848 d.nelt = GET_MODE_NUNITS (d.vmode);
33849 d.testing_p = false;
33850 gcc_assert (VECTOR_MODE_P (d.vmode));
33852 if (TREE_CODE (arg2) != VECTOR_CST)
33854 error_at (EXPR_LOCATION (exp),
33855 "vector permutation requires vector constant");
33859 switch (extract_vec_perm_cst (&d, arg2))
33865 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
33869 if (!operand_equal_p (arg0, arg1, 0))
33871 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33872 d.op0 = force_reg (d.vmode, d.op0);
33873 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33874 d.op1 = force_reg (d.vmode, d.op1);
33878 /* The elements of PERM do not suggest that only the first operand
33879 is used, but both operands are identical. Allow easier matching
33880 of the permutation by folding the permutation into the single
33883 unsigned i, nelt = d.nelt;
33884 for (i = 0; i < nelt; ++i)
33885 if (d.perm[i] >= nelt)
33891 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33892 d.op0 = force_reg (d.vmode, d.op0);
33897 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33898 d.op0 = force_reg (d.vmode, d.op0);
33903 d.target = gen_reg_rtx (d.vmode);
33904 if (ix86_expand_vec_perm_builtin_1 (&d))
33907 /* For compiler generated permutations, we should never got here, because
33908 the compiler should also be checking the ok hook. But since this is a
33909 builtin the user has access too, so don't abort. */
33913 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
33916 sorry ("vector permutation (%d %d %d %d)",
33917 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
33920 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
33921 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33922 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
33925 sorry ("vector permutation "
33926 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
33927 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33928 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
33929 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
33930 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
33933 gcc_unreachable ();
33936 return CONST0_RTX (d.vmode);
33939 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
33942 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
33944 struct expand_vec_perm_d d;
33948 d.vmode = TYPE_MODE (vec_type);
33949 d.nelt = GET_MODE_NUNITS (d.vmode);
33950 d.testing_p = true;
33952 /* Given sufficient ISA support we can just return true here
33953 for selected vector modes. */
33954 if (GET_MODE_SIZE (d.vmode) == 16)
33956 /* All implementable with a single vpperm insn. */
33959 /* All implementable with 2 pshufb + 1 ior. */
33962 /* All implementable with shufpd or unpck[lh]pd. */
33967 vec_mask = extract_vec_perm_cst (&d, mask);
33969 /* This hook is cannot be called in response to something that the
33970 user does (unlike the builtin expander) so we shouldn't ever see
33971 an error generated from the extract. */
33972 gcc_assert (vec_mask > 0 && vec_mask <= 3);
33973 one_vec = (vec_mask != 3);
33975 /* Implementable with shufps or pshufd. */
33976 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
33979 /* Otherwise we have to go through the motions and see if we can
33980 figure out how to generate the requested permutation. */
33981 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
33982 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
33984 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
33987 ret = ix86_expand_vec_perm_builtin_1 (&d);
33994 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
33996 struct expand_vec_perm_d d;
34002 d.vmode = GET_MODE (targ);
34003 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
34004 d.testing_p = false;
34006 for (i = 0; i < nelt; ++i)
34007 d.perm[i] = i * 2 + odd;
34009 /* We'll either be able to implement the permutation directly... */
34010 if (expand_vec_perm_1 (&d))
34013 /* ... or we use the special-case patterns. */
34014 expand_vec_perm_even_odd_1 (&d, odd);
34017 /* Expand an insert into a vector register through pinsr insn.
34018 Return true if successful. */
34021 ix86_expand_pinsr (rtx *operands)
34023 rtx dst = operands[0];
34024 rtx src = operands[3];
34026 unsigned int size = INTVAL (operands[1]);
34027 unsigned int pos = INTVAL (operands[2]);
34029 if (GET_CODE (dst) == SUBREG)
34031 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
34032 dst = SUBREG_REG (dst);
34035 if (GET_CODE (src) == SUBREG)
34036 src = SUBREG_REG (src);
34038 switch (GET_MODE (dst))
34045 enum machine_mode srcmode, dstmode;
34046 rtx (*pinsr)(rtx, rtx, rtx, rtx);
34048 srcmode = mode_for_size (size, MODE_INT, 0);
34053 if (!TARGET_SSE4_1)
34055 dstmode = V16QImode;
34056 pinsr = gen_sse4_1_pinsrb;
34062 dstmode = V8HImode;
34063 pinsr = gen_sse2_pinsrw;
34067 if (!TARGET_SSE4_1)
34069 dstmode = V4SImode;
34070 pinsr = gen_sse4_1_pinsrd;
34074 gcc_assert (TARGET_64BIT);
34075 if (!TARGET_SSE4_1)
34077 dstmode = V2DImode;
34078 pinsr = gen_sse4_1_pinsrq;
34085 dst = gen_lowpart (dstmode, dst);
34086 src = gen_lowpart (srcmode, src);
34090 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
34099 /* This function returns the calling abi specific va_list type node.
34100 It returns the FNDECL specific va_list type. */
34103 ix86_fn_abi_va_list (tree fndecl)
34106 return va_list_type_node;
34107 gcc_assert (fndecl != NULL_TREE);
34109 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
34110 return ms_va_list_type_node;
34112 return sysv_va_list_type_node;
34115 /* Returns the canonical va_list type specified by TYPE. If there
34116 is no valid TYPE provided, it return NULL_TREE. */
34119 ix86_canonical_va_list_type (tree type)
34123 /* Resolve references and pointers to va_list type. */
34124 if (TREE_CODE (type) == MEM_REF)
34125 type = TREE_TYPE (type);
34126 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
34127 type = TREE_TYPE (type);
34128 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
34129 type = TREE_TYPE (type);
34131 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
34133 wtype = va_list_type_node;
34134 gcc_assert (wtype != NULL_TREE);
34136 if (TREE_CODE (wtype) == ARRAY_TYPE)
34138 /* If va_list is an array type, the argument may have decayed
34139 to a pointer type, e.g. by being passed to another function.
34140 In that case, unwrap both types so that we can compare the
34141 underlying records. */
34142 if (TREE_CODE (htype) == ARRAY_TYPE
34143 || POINTER_TYPE_P (htype))
34145 wtype = TREE_TYPE (wtype);
34146 htype = TREE_TYPE (htype);
34149 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34150 return va_list_type_node;
34151 wtype = sysv_va_list_type_node;
34152 gcc_assert (wtype != NULL_TREE);
34154 if (TREE_CODE (wtype) == ARRAY_TYPE)
34156 /* If va_list is an array type, the argument may have decayed
34157 to a pointer type, e.g. by being passed to another function.
34158 In that case, unwrap both types so that we can compare the
34159 underlying records. */
34160 if (TREE_CODE (htype) == ARRAY_TYPE
34161 || POINTER_TYPE_P (htype))
34163 wtype = TREE_TYPE (wtype);
34164 htype = TREE_TYPE (htype);
34167 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34168 return sysv_va_list_type_node;
34169 wtype = ms_va_list_type_node;
34170 gcc_assert (wtype != NULL_TREE);
34172 if (TREE_CODE (wtype) == ARRAY_TYPE)
34174 /* If va_list is an array type, the argument may have decayed
34175 to a pointer type, e.g. by being passed to another function.
34176 In that case, unwrap both types so that we can compare the
34177 underlying records. */
34178 if (TREE_CODE (htype) == ARRAY_TYPE
34179 || POINTER_TYPE_P (htype))
34181 wtype = TREE_TYPE (wtype);
34182 htype = TREE_TYPE (htype);
34185 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34186 return ms_va_list_type_node;
34189 return std_canonical_va_list_type (type);
34192 /* Iterate through the target-specific builtin types for va_list.
34193 IDX denotes the iterator, *PTREE is set to the result type of
34194 the va_list builtin, and *PNAME to its internal type.
34195 Returns zero if there is no element for this index, otherwise
34196 IDX should be increased upon the next call.
34197 Note, do not iterate a base builtin's name like __builtin_va_list.
34198 Used from c_common_nodes_and_builtins. */
34201 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
34211 *ptree = ms_va_list_type_node;
34212 *pname = "__builtin_ms_va_list";
34216 *ptree = sysv_va_list_type_node;
34217 *pname = "__builtin_sysv_va_list";
34225 #undef TARGET_SCHED_DISPATCH
34226 #define TARGET_SCHED_DISPATCH has_dispatch
34227 #undef TARGET_SCHED_DISPATCH_DO
34228 #define TARGET_SCHED_DISPATCH_DO do_dispatch
34230 /* The size of the dispatch window is the total number of bytes of
34231 object code allowed in a window. */
34232 #define DISPATCH_WINDOW_SIZE 16
34234 /* Number of dispatch windows considered for scheduling. */
34235 #define MAX_DISPATCH_WINDOWS 3
34237 /* Maximum number of instructions in a window. */
34240 /* Maximum number of immediate operands in a window. */
34243 /* Maximum number of immediate bits allowed in a window. */
34244 #define MAX_IMM_SIZE 128
34246 /* Maximum number of 32 bit immediates allowed in a window. */
34247 #define MAX_IMM_32 4
34249 /* Maximum number of 64 bit immediates allowed in a window. */
34250 #define MAX_IMM_64 2
34252 /* Maximum total of loads or prefetches allowed in a window. */
34255 /* Maximum total of stores allowed in a window. */
34256 #define MAX_STORE 1
34262 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
34263 enum dispatch_group {
34278 /* Number of allowable groups in a dispatch window. It is an array
34279 indexed by dispatch_group enum. 100 is used as a big number,
34280 because the number of these kind of operations does not have any
34281 effect in dispatch window, but we need them for other reasons in
34283 static unsigned int num_allowable_groups[disp_last] = {
34284 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
34287 char group_name[disp_last + 1][16] = {
34288 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
34289 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
34290 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
34293 /* Instruction path. */
34296 path_single, /* Single micro op. */
34297 path_double, /* Double micro op. */
34298 path_multi, /* Instructions with more than 2 micro op.. */
34302 /* sched_insn_info defines a window to the instructions scheduled in
34303 the basic block. It contains a pointer to the insn_info table and
34304 the instruction scheduled.
34306 Windows are allocated for each basic block and are linked
34308 typedef struct sched_insn_info_s {
34310 enum dispatch_group group;
34311 enum insn_path path;
34316 /* Linked list of dispatch windows. This is a two way list of
34317 dispatch windows of a basic block. It contains information about
34318 the number of uops in the window and the total number of
34319 instructions and of bytes in the object code for this dispatch
34321 typedef struct dispatch_windows_s {
34322 int num_insn; /* Number of insn in the window. */
34323 int num_uops; /* Number of uops in the window. */
34324 int window_size; /* Number of bytes in the window. */
34325 int window_num; /* Window number between 0 or 1. */
34326 int num_imm; /* Number of immediates in an insn. */
34327 int num_imm_32; /* Number of 32 bit immediates in an insn. */
34328 int num_imm_64; /* Number of 64 bit immediates in an insn. */
34329 int imm_size; /* Total immediates in the window. */
34330 int num_loads; /* Total memory loads in the window. */
34331 int num_stores; /* Total memory stores in the window. */
34332 int violation; /* Violation exists in window. */
34333 sched_insn_info *window; /* Pointer to the window. */
34334 struct dispatch_windows_s *next;
34335 struct dispatch_windows_s *prev;
34336 } dispatch_windows;
34338 /* Immediate valuse used in an insn. */
34339 typedef struct imm_info_s
34346 static dispatch_windows *dispatch_window_list;
34347 static dispatch_windows *dispatch_window_list1;
34349 /* Get dispatch group of insn. */
34351 static enum dispatch_group
34352 get_mem_group (rtx insn)
34354 enum attr_memory memory;
34356 if (INSN_CODE (insn) < 0)
34357 return disp_no_group;
34358 memory = get_attr_memory (insn);
34359 if (memory == MEMORY_STORE)
34362 if (memory == MEMORY_LOAD)
34365 if (memory == MEMORY_BOTH)
34366 return disp_load_store;
34368 return disp_no_group;
34371 /* Return true if insn is a compare instruction. */
34376 enum attr_type type;
34378 type = get_attr_type (insn);
34379 return (type == TYPE_TEST
34380 || type == TYPE_ICMP
34381 || type == TYPE_FCMP
34382 || GET_CODE (PATTERN (insn)) == COMPARE);
34385 /* Return true if a dispatch violation encountered. */
34388 dispatch_violation (void)
34390 if (dispatch_window_list->next)
34391 return dispatch_window_list->next->violation;
34392 return dispatch_window_list->violation;
34395 /* Return true if insn is a branch instruction. */
34398 is_branch (rtx insn)
34400 return (CALL_P (insn) || JUMP_P (insn));
34403 /* Return true if insn is a prefetch instruction. */
34406 is_prefetch (rtx insn)
34408 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
34411 /* This function initializes a dispatch window and the list container holding a
34412 pointer to the window. */
34415 init_window (int window_num)
34418 dispatch_windows *new_list;
34420 if (window_num == 0)
34421 new_list = dispatch_window_list;
34423 new_list = dispatch_window_list1;
34425 new_list->num_insn = 0;
34426 new_list->num_uops = 0;
34427 new_list->window_size = 0;
34428 new_list->next = NULL;
34429 new_list->prev = NULL;
34430 new_list->window_num = window_num;
34431 new_list->num_imm = 0;
34432 new_list->num_imm_32 = 0;
34433 new_list->num_imm_64 = 0;
34434 new_list->imm_size = 0;
34435 new_list->num_loads = 0;
34436 new_list->num_stores = 0;
34437 new_list->violation = false;
34439 for (i = 0; i < MAX_INSN; i++)
34441 new_list->window[i].insn = NULL;
34442 new_list->window[i].group = disp_no_group;
34443 new_list->window[i].path = no_path;
34444 new_list->window[i].byte_len = 0;
34445 new_list->window[i].imm_bytes = 0;
34450 /* This function allocates and initializes a dispatch window and the
34451 list container holding a pointer to the window. */
34453 static dispatch_windows *
34454 allocate_window (void)
34456 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
34457 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
34462 /* This routine initializes the dispatch scheduling information. It
34463 initiates building dispatch scheduler tables and constructs the
34464 first dispatch window. */
34467 init_dispatch_sched (void)
34469 /* Allocate a dispatch list and a window. */
34470 dispatch_window_list = allocate_window ();
34471 dispatch_window_list1 = allocate_window ();
34476 /* This function returns true if a branch is detected. End of a basic block
34477 does not have to be a branch, but here we assume only branches end a
34481 is_end_basic_block (enum dispatch_group group)
34483 return group == disp_branch;
34486 /* This function is called when the end of a window processing is reached. */
34489 process_end_window (void)
34491 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
34492 if (dispatch_window_list->next)
34494 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
34495 gcc_assert (dispatch_window_list->window_size
34496 + dispatch_window_list1->window_size <= 48);
34502 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
34503 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
34504 for 48 bytes of instructions. Note that these windows are not dispatch
34505 windows that their sizes are DISPATCH_WINDOW_SIZE. */
34507 static dispatch_windows *
34508 allocate_next_window (int window_num)
34510 if (window_num == 0)
34512 if (dispatch_window_list->next)
34515 return dispatch_window_list;
34518 dispatch_window_list->next = dispatch_window_list1;
34519 dispatch_window_list1->prev = dispatch_window_list;
34521 return dispatch_window_list1;
34524 /* Increment the number of immediate operands of an instruction. */
34527 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
34532 switch ( GET_CODE (*in_rtx))
34537 (imm_values->imm)++;
34538 if (x86_64_immediate_operand (*in_rtx, SImode))
34539 (imm_values->imm32)++;
34541 (imm_values->imm64)++;
34545 (imm_values->imm)++;
34546 (imm_values->imm64)++;
34550 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
34552 (imm_values->imm)++;
34553 (imm_values->imm32)++;
34564 /* Compute number of immediate operands of an instruction. */
34567 find_constant (rtx in_rtx, imm_info *imm_values)
34569 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
34570 (rtx_function) find_constant_1, (void *) imm_values);
34573 /* Return total size of immediate operands of an instruction along with number
34574 of corresponding immediate-operands. It initializes its parameters to zero
34575 befor calling FIND_CONSTANT.
34576 INSN is the input instruction. IMM is the total of immediates.
34577 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
34581 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
34583 imm_info imm_values = {0, 0, 0};
34585 find_constant (insn, &imm_values);
34586 *imm = imm_values.imm;
34587 *imm32 = imm_values.imm32;
34588 *imm64 = imm_values.imm64;
34589 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
34592 /* This function indicates if an operand of an instruction is an
34596 has_immediate (rtx insn)
34598 int num_imm_operand;
34599 int num_imm32_operand;
34600 int num_imm64_operand;
34603 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34604 &num_imm64_operand);
34608 /* Return single or double path for instructions. */
34610 static enum insn_path
34611 get_insn_path (rtx insn)
34613 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
34615 if ((int)path == 0)
34616 return path_single;
34618 if ((int)path == 1)
34619 return path_double;
34624 /* Return insn dispatch group. */
34626 static enum dispatch_group
34627 get_insn_group (rtx insn)
34629 enum dispatch_group group = get_mem_group (insn);
34633 if (is_branch (insn))
34634 return disp_branch;
34639 if (has_immediate (insn))
34642 if (is_prefetch (insn))
34643 return disp_prefetch;
34645 return disp_no_group;
34648 /* Count number of GROUP restricted instructions in a dispatch
34649 window WINDOW_LIST. */
34652 count_num_restricted (rtx insn, dispatch_windows *window_list)
34654 enum dispatch_group group = get_insn_group (insn);
34656 int num_imm_operand;
34657 int num_imm32_operand;
34658 int num_imm64_operand;
34660 if (group == disp_no_group)
34663 if (group == disp_imm)
34665 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34666 &num_imm64_operand);
34667 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
34668 || num_imm_operand + window_list->num_imm > MAX_IMM
34669 || (num_imm32_operand > 0
34670 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
34671 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
34672 || (num_imm64_operand > 0
34673 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
34674 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
34675 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
34676 && num_imm64_operand > 0
34677 && ((window_list->num_imm_64 > 0
34678 && window_list->num_insn >= 2)
34679 || window_list->num_insn >= 3)))
34685 if ((group == disp_load_store
34686 && (window_list->num_loads >= MAX_LOAD
34687 || window_list->num_stores >= MAX_STORE))
34688 || ((group == disp_load
34689 || group == disp_prefetch)
34690 && window_list->num_loads >= MAX_LOAD)
34691 || (group == disp_store
34692 && window_list->num_stores >= MAX_STORE))
34698 /* This function returns true if insn satisfies dispatch rules on the
34699 last window scheduled. */
34702 fits_dispatch_window (rtx insn)
34704 dispatch_windows *window_list = dispatch_window_list;
34705 dispatch_windows *window_list_next = dispatch_window_list->next;
34706 unsigned int num_restrict;
34707 enum dispatch_group group = get_insn_group (insn);
34708 enum insn_path path = get_insn_path (insn);
34711 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
34712 instructions should be given the lowest priority in the
34713 scheduling process in Haifa scheduler to make sure they will be
34714 scheduled in the same dispatch window as the refrence to them. */
34715 if (group == disp_jcc || group == disp_cmp)
34718 /* Check nonrestricted. */
34719 if (group == disp_no_group || group == disp_branch)
34722 /* Get last dispatch window. */
34723 if (window_list_next)
34724 window_list = window_list_next;
34726 if (window_list->window_num == 1)
34728 sum = window_list->prev->window_size + window_list->window_size;
34731 || (min_insn_size (insn) + sum) >= 48)
34732 /* Window 1 is full. Go for next window. */
34736 num_restrict = count_num_restricted (insn, window_list);
34738 if (num_restrict > num_allowable_groups[group])
34741 /* See if it fits in the first window. */
34742 if (window_list->window_num == 0)
34744 /* The first widow should have only single and double path
34746 if (path == path_double
34747 && (window_list->num_uops + 2) > MAX_INSN)
34749 else if (path != path_single)
34755 /* Add an instruction INSN with NUM_UOPS micro-operations to the
34756 dispatch window WINDOW_LIST. */
34759 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
34761 int byte_len = min_insn_size (insn);
34762 int num_insn = window_list->num_insn;
34764 sched_insn_info *window = window_list->window;
34765 enum dispatch_group group = get_insn_group (insn);
34766 enum insn_path path = get_insn_path (insn);
34767 int num_imm_operand;
34768 int num_imm32_operand;
34769 int num_imm64_operand;
34771 if (!window_list->violation && group != disp_cmp
34772 && !fits_dispatch_window (insn))
34773 window_list->violation = true;
34775 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34776 &num_imm64_operand);
34778 /* Initialize window with new instruction. */
34779 window[num_insn].insn = insn;
34780 window[num_insn].byte_len = byte_len;
34781 window[num_insn].group = group;
34782 window[num_insn].path = path;
34783 window[num_insn].imm_bytes = imm_size;
34785 window_list->window_size += byte_len;
34786 window_list->num_insn = num_insn + 1;
34787 window_list->num_uops = window_list->num_uops + num_uops;
34788 window_list->imm_size += imm_size;
34789 window_list->num_imm += num_imm_operand;
34790 window_list->num_imm_32 += num_imm32_operand;
34791 window_list->num_imm_64 += num_imm64_operand;
34793 if (group == disp_store)
34794 window_list->num_stores += 1;
34795 else if (group == disp_load
34796 || group == disp_prefetch)
34797 window_list->num_loads += 1;
34798 else if (group == disp_load_store)
34800 window_list->num_stores += 1;
34801 window_list->num_loads += 1;
34805 /* Adds a scheduled instruction, INSN, to the current dispatch window.
34806 If the total bytes of instructions or the number of instructions in
34807 the window exceed allowable, it allocates a new window. */
34810 add_to_dispatch_window (rtx insn)
34813 dispatch_windows *window_list;
34814 dispatch_windows *next_list;
34815 dispatch_windows *window0_list;
34816 enum insn_path path;
34817 enum dispatch_group insn_group;
34825 if (INSN_CODE (insn) < 0)
34828 byte_len = min_insn_size (insn);
34829 window_list = dispatch_window_list;
34830 next_list = window_list->next;
34831 path = get_insn_path (insn);
34832 insn_group = get_insn_group (insn);
34834 /* Get the last dispatch window. */
34836 window_list = dispatch_window_list->next;
34838 if (path == path_single)
34840 else if (path == path_double)
34843 insn_num_uops = (int) path;
34845 /* If current window is full, get a new window.
34846 Window number zero is full, if MAX_INSN uops are scheduled in it.
34847 Window number one is full, if window zero's bytes plus window
34848 one's bytes is 32, or if the bytes of the new instruction added
34849 to the total makes it greater than 48, or it has already MAX_INSN
34850 instructions in it. */
34851 num_insn = window_list->num_insn;
34852 num_uops = window_list->num_uops;
34853 window_num = window_list->window_num;
34854 insn_fits = fits_dispatch_window (insn);
34856 if (num_insn >= MAX_INSN
34857 || num_uops + insn_num_uops > MAX_INSN
34860 window_num = ~window_num & 1;
34861 window_list = allocate_next_window (window_num);
34864 if (window_num == 0)
34866 add_insn_window (insn, window_list, insn_num_uops);
34867 if (window_list->num_insn >= MAX_INSN
34868 && insn_group == disp_branch)
34870 process_end_window ();
34874 else if (window_num == 1)
34876 window0_list = window_list->prev;
34877 sum = window0_list->window_size + window_list->window_size;
34879 || (byte_len + sum) >= 48)
34881 process_end_window ();
34882 window_list = dispatch_window_list;
34885 add_insn_window (insn, window_list, insn_num_uops);
34888 gcc_unreachable ();
34890 if (is_end_basic_block (insn_group))
34892 /* End of basic block is reached do end-basic-block process. */
34893 process_end_window ();
34898 /* Print the dispatch window, WINDOW_NUM, to FILE. */
34900 DEBUG_FUNCTION static void
34901 debug_dispatch_window_file (FILE *file, int window_num)
34903 dispatch_windows *list;
34906 if (window_num == 0)
34907 list = dispatch_window_list;
34909 list = dispatch_window_list1;
34911 fprintf (file, "Window #%d:\n", list->window_num);
34912 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
34913 list->num_insn, list->num_uops, list->window_size);
34914 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34915 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
34917 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
34919 fprintf (file, " insn info:\n");
34921 for (i = 0; i < MAX_INSN; i++)
34923 if (!list->window[i].insn)
34925 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
34926 i, group_name[list->window[i].group],
34927 i, (void *)list->window[i].insn,
34928 i, list->window[i].path,
34929 i, list->window[i].byte_len,
34930 i, list->window[i].imm_bytes);
34934 /* Print to stdout a dispatch window. */
34936 DEBUG_FUNCTION void
34937 debug_dispatch_window (int window_num)
34939 debug_dispatch_window_file (stdout, window_num);
34942 /* Print INSN dispatch information to FILE. */
34944 DEBUG_FUNCTION static void
34945 debug_insn_dispatch_info_file (FILE *file, rtx insn)
34948 enum insn_path path;
34949 enum dispatch_group group;
34951 int num_imm_operand;
34952 int num_imm32_operand;
34953 int num_imm64_operand;
34955 if (INSN_CODE (insn) < 0)
34958 byte_len = min_insn_size (insn);
34959 path = get_insn_path (insn);
34960 group = get_insn_group (insn);
34961 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34962 &num_imm64_operand);
34964 fprintf (file, " insn info:\n");
34965 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
34966 group_name[group], path, byte_len);
34967 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34968 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
34971 /* Print to STDERR the status of the ready list with respect to
34972 dispatch windows. */
34974 DEBUG_FUNCTION void
34975 debug_ready_dispatch (void)
34978 int no_ready = number_in_ready ();
34980 fprintf (stdout, "Number of ready: %d\n", no_ready);
34982 for (i = 0; i < no_ready; i++)
34983 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
34986 /* This routine is the driver of the dispatch scheduler. */
34989 do_dispatch (rtx insn, int mode)
34991 if (mode == DISPATCH_INIT)
34992 init_dispatch_sched ();
34993 else if (mode == ADD_TO_DISPATCH_WINDOW)
34994 add_to_dispatch_window (insn);
34997 /* Return TRUE if Dispatch Scheduling is supported. */
35000 has_dispatch (rtx insn, int action)
35002 if ((ix86_tune == PROCESSOR_BDVER1 || ix86_tune == PROCESSOR_BDVER2)
35003 && flag_dispatch_scheduler)
35009 case IS_DISPATCH_ON:
35014 return is_cmp (insn);
35016 case DISPATCH_VIOLATION:
35017 return dispatch_violation ();
35019 case FITS_DISPATCH_WINDOW:
35020 return fits_dispatch_window (insn);
35026 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
35027 place emms and femms instructions. */
35029 static enum machine_mode
35030 ix86_preferred_simd_mode (enum machine_mode mode)
35047 if (TARGET_AVX && !TARGET_PREFER_AVX128)
35053 if (!TARGET_VECTORIZE_DOUBLE)
35055 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
35057 else if (TARGET_SSE2)
35066 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
35069 static unsigned int
35070 ix86_autovectorize_vector_sizes (void)
35072 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
35075 /* Initialize the GCC target structure. */
35076 #undef TARGET_RETURN_IN_MEMORY
35077 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
35079 #undef TARGET_LEGITIMIZE_ADDRESS
35080 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
35082 #undef TARGET_ATTRIBUTE_TABLE
35083 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
35084 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35085 # undef TARGET_MERGE_DECL_ATTRIBUTES
35086 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
35089 #undef TARGET_COMP_TYPE_ATTRIBUTES
35090 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
35092 #undef TARGET_INIT_BUILTINS
35093 #define TARGET_INIT_BUILTINS ix86_init_builtins
35094 #undef TARGET_BUILTIN_DECL
35095 #define TARGET_BUILTIN_DECL ix86_builtin_decl
35096 #undef TARGET_EXPAND_BUILTIN
35097 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
35099 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
35100 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
35101 ix86_builtin_vectorized_function
35103 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
35104 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
35106 #undef TARGET_BUILTIN_RECIPROCAL
35107 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
35109 #undef TARGET_ASM_FUNCTION_EPILOGUE
35110 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
35112 #undef TARGET_ENCODE_SECTION_INFO
35113 #ifndef SUBTARGET_ENCODE_SECTION_INFO
35114 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
35116 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
35119 #undef TARGET_ASM_OPEN_PAREN
35120 #define TARGET_ASM_OPEN_PAREN ""
35121 #undef TARGET_ASM_CLOSE_PAREN
35122 #define TARGET_ASM_CLOSE_PAREN ""
35124 #undef TARGET_ASM_BYTE_OP
35125 #define TARGET_ASM_BYTE_OP ASM_BYTE
35127 #undef TARGET_ASM_ALIGNED_HI_OP
35128 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
35129 #undef TARGET_ASM_ALIGNED_SI_OP
35130 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
35132 #undef TARGET_ASM_ALIGNED_DI_OP
35133 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
35136 #undef TARGET_PROFILE_BEFORE_PROLOGUE
35137 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
35139 #undef TARGET_ASM_UNALIGNED_HI_OP
35140 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
35141 #undef TARGET_ASM_UNALIGNED_SI_OP
35142 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
35143 #undef TARGET_ASM_UNALIGNED_DI_OP
35144 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
35146 #undef TARGET_PRINT_OPERAND
35147 #define TARGET_PRINT_OPERAND ix86_print_operand
35148 #undef TARGET_PRINT_OPERAND_ADDRESS
35149 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
35150 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
35151 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
35152 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
35153 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
35155 #undef TARGET_SCHED_INIT_GLOBAL
35156 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
35157 #undef TARGET_SCHED_ADJUST_COST
35158 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
35159 #undef TARGET_SCHED_ISSUE_RATE
35160 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
35161 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
35162 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
35163 ia32_multipass_dfa_lookahead
35165 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
35166 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
35169 #undef TARGET_HAVE_TLS
35170 #define TARGET_HAVE_TLS true
35172 #undef TARGET_CANNOT_FORCE_CONST_MEM
35173 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
35174 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
35175 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
35177 #undef TARGET_DELEGITIMIZE_ADDRESS
35178 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
35180 #undef TARGET_MS_BITFIELD_LAYOUT_P
35181 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
35184 #undef TARGET_BINDS_LOCAL_P
35185 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
35187 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35188 #undef TARGET_BINDS_LOCAL_P
35189 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
35192 #undef TARGET_ASM_OUTPUT_MI_THUNK
35193 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
35194 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
35195 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
35197 #undef TARGET_ASM_FILE_START
35198 #define TARGET_ASM_FILE_START x86_file_start
35200 #undef TARGET_OPTION_OVERRIDE
35201 #define TARGET_OPTION_OVERRIDE ix86_option_override
35203 #undef TARGET_REGISTER_MOVE_COST
35204 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
35205 #undef TARGET_MEMORY_MOVE_COST
35206 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
35207 #undef TARGET_RTX_COSTS
35208 #define TARGET_RTX_COSTS ix86_rtx_costs
35209 #undef TARGET_ADDRESS_COST
35210 #define TARGET_ADDRESS_COST ix86_address_cost
35212 #undef TARGET_FIXED_CONDITION_CODE_REGS
35213 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
35214 #undef TARGET_CC_MODES_COMPATIBLE
35215 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
35217 #undef TARGET_MACHINE_DEPENDENT_REORG
35218 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
35220 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
35221 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
35223 #undef TARGET_BUILD_BUILTIN_VA_LIST
35224 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
35226 #undef TARGET_ENUM_VA_LIST_P
35227 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
35229 #undef TARGET_FN_ABI_VA_LIST
35230 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
35232 #undef TARGET_CANONICAL_VA_LIST_TYPE
35233 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
35235 #undef TARGET_EXPAND_BUILTIN_VA_START
35236 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
35238 #undef TARGET_MD_ASM_CLOBBERS
35239 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
35241 #undef TARGET_PROMOTE_PROTOTYPES
35242 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
35243 #undef TARGET_STRUCT_VALUE_RTX
35244 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
35245 #undef TARGET_SETUP_INCOMING_VARARGS
35246 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
35247 #undef TARGET_MUST_PASS_IN_STACK
35248 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
35249 #undef TARGET_FUNCTION_ARG_ADVANCE
35250 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
35251 #undef TARGET_FUNCTION_ARG
35252 #define TARGET_FUNCTION_ARG ix86_function_arg
35253 #undef TARGET_FUNCTION_ARG_BOUNDARY
35254 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
35255 #undef TARGET_PASS_BY_REFERENCE
35256 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
35257 #undef TARGET_INTERNAL_ARG_POINTER
35258 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
35259 #undef TARGET_UPDATE_STACK_BOUNDARY
35260 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
35261 #undef TARGET_GET_DRAP_RTX
35262 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
35263 #undef TARGET_STRICT_ARGUMENT_NAMING
35264 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
35265 #undef TARGET_STATIC_CHAIN
35266 #define TARGET_STATIC_CHAIN ix86_static_chain
35267 #undef TARGET_TRAMPOLINE_INIT
35268 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
35269 #undef TARGET_RETURN_POPS_ARGS
35270 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
35272 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
35273 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
35275 #undef TARGET_SCALAR_MODE_SUPPORTED_P
35276 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
35278 #undef TARGET_VECTOR_MODE_SUPPORTED_P
35279 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
35281 #undef TARGET_C_MODE_FOR_SUFFIX
35282 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
35285 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
35286 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
35289 #ifdef SUBTARGET_INSERT_ATTRIBUTES
35290 #undef TARGET_INSERT_ATTRIBUTES
35291 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
35294 #undef TARGET_MANGLE_TYPE
35295 #define TARGET_MANGLE_TYPE ix86_mangle_type
35297 #ifndef TARGET_MACHO
35298 #undef TARGET_STACK_PROTECT_FAIL
35299 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
35302 #undef TARGET_FUNCTION_VALUE
35303 #define TARGET_FUNCTION_VALUE ix86_function_value
35305 #undef TARGET_FUNCTION_VALUE_REGNO_P
35306 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
35308 #undef TARGET_PROMOTE_FUNCTION_MODE
35309 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
35311 #undef TARGET_SECONDARY_RELOAD
35312 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
35314 #undef TARGET_CLASS_MAX_NREGS
35315 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
35317 #undef TARGET_PREFERRED_RELOAD_CLASS
35318 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
35319 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
35320 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
35321 #undef TARGET_CLASS_LIKELY_SPILLED_P
35322 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
35324 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
35325 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
35326 ix86_builtin_vectorization_cost
35327 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
35328 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
35329 ix86_vectorize_builtin_vec_perm
35330 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
35331 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
35332 ix86_vectorize_builtin_vec_perm_ok
35333 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
35334 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
35335 ix86_preferred_simd_mode
35336 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
35337 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
35338 ix86_autovectorize_vector_sizes
35340 #undef TARGET_SET_CURRENT_FUNCTION
35341 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
35343 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
35344 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
35346 #undef TARGET_OPTION_SAVE
35347 #define TARGET_OPTION_SAVE ix86_function_specific_save
35349 #undef TARGET_OPTION_RESTORE
35350 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
35352 #undef TARGET_OPTION_PRINT
35353 #define TARGET_OPTION_PRINT ix86_function_specific_print
35355 #undef TARGET_CAN_INLINE_P
35356 #define TARGET_CAN_INLINE_P ix86_can_inline_p
35358 #undef TARGET_EXPAND_TO_RTL_HOOK
35359 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
35361 #undef TARGET_LEGITIMATE_ADDRESS_P
35362 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
35364 #undef TARGET_LEGITIMATE_CONSTANT_P
35365 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
35367 #undef TARGET_FRAME_POINTER_REQUIRED
35368 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
35370 #undef TARGET_CAN_ELIMINATE
35371 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
35373 #undef TARGET_EXTRA_LIVE_ON_ENTRY
35374 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
35376 #undef TARGET_ASM_CODE_END
35377 #define TARGET_ASM_CODE_END ix86_code_end
35379 #undef TARGET_CONDITIONAL_REGISTER_USAGE
35380 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
35383 #undef TARGET_INIT_LIBFUNCS
35384 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
35387 struct gcc_target targetm = TARGET_INITIALIZER;
35389 #include "gt-i386.h"