1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
43 #include "sched-int.h"
46 #include "target-def.h"
49 /* This is used for communication between ASM_OUTPUT_LABEL and
50 ASM_OUTPUT_LABELREF. */
51 int ia64_asm_output_label = 0;
53 /* Define the information needed to generate branch and scc insns. This is
54 stored from the compare operation. */
55 struct rtx_def * ia64_compare_op0;
56 struct rtx_def * ia64_compare_op1;
58 /* Register names for ia64_expand_prologue. */
59 static const char * const ia64_reg_numbers[96] =
60 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
61 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
62 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
63 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
64 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
65 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
66 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
67 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
68 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
69 "r104","r105","r106","r107","r108","r109","r110","r111",
70 "r112","r113","r114","r115","r116","r117","r118","r119",
71 "r120","r121","r122","r123","r124","r125","r126","r127"};
73 /* ??? These strings could be shared with REGISTER_NAMES. */
74 static const char * const ia64_input_reg_names[8] =
75 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
77 /* ??? These strings could be shared with REGISTER_NAMES. */
78 static const char * const ia64_local_reg_names[80] =
79 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
80 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
81 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
82 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
83 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
84 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
85 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
86 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
87 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
88 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
90 /* ??? These strings could be shared with REGISTER_NAMES. */
91 static const char * const ia64_output_reg_names[8] =
92 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
94 /* String used with the -mfixed-range= option. */
95 const char *ia64_fixed_range_string;
97 /* Determines whether we run our final scheduling pass or not. We always
98 avoid the normal second scheduling pass. */
99 static int ia64_flag_schedule_insns2;
101 /* Variables which are this size or smaller are put in the sdata/sbss
104 unsigned int ia64_section_threshold;
106 static int find_gr_spill PARAMS ((int));
107 static int next_scratch_gr_reg PARAMS ((void));
108 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
109 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
110 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
111 static void finish_spill_pointers PARAMS ((void));
112 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
113 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
114 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
115 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
116 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
117 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
119 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
120 static void fix_range PARAMS ((const char *));
121 static void ia64_add_gc_roots PARAMS ((void));
122 static void ia64_init_machine_status PARAMS ((struct function *));
123 static void ia64_mark_machine_status PARAMS ((struct function *));
124 static void ia64_free_machine_status PARAMS ((struct function *));
125 static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
126 static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
127 static void emit_predicate_relation_info PARAMS ((void));
128 static bool ia64_in_small_data_p PARAMS ((tree));
129 static void process_epilogue PARAMS ((void));
130 static int process_set PARAMS ((FILE *, rtx));
132 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
134 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
136 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
138 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
140 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
141 static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int));
142 static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
143 static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
144 static void ia64_output_function_end_prologue PARAMS ((FILE *));
146 static int ia64_issue_rate PARAMS ((void));
147 static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
148 static void ia64_sched_init PARAMS ((FILE *, int, int));
149 static void ia64_sched_finish PARAMS ((FILE *, int));
150 static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *,
152 static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
153 static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
154 static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
156 static void ia64_aix_select_section PARAMS ((tree, int,
157 unsigned HOST_WIDE_INT))
159 static void ia64_aix_unique_section PARAMS ((tree, int))
162 /* Table of valid machine attributes. */
163 static const struct attribute_spec ia64_attribute_table[] =
165 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
166 { "syscall_linkage", 0, 0, false, true, true, NULL },
167 { NULL, 0, 0, false, false, false, NULL }
170 /* Initialize the GCC target structure. */
171 #undef TARGET_ATTRIBUTE_TABLE
172 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
174 #undef TARGET_INIT_BUILTINS
175 #define TARGET_INIT_BUILTINS ia64_init_builtins
177 #undef TARGET_EXPAND_BUILTIN
178 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
180 #undef TARGET_ASM_BYTE_OP
181 #define TARGET_ASM_BYTE_OP "\tdata1\t"
182 #undef TARGET_ASM_ALIGNED_HI_OP
183 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
184 #undef TARGET_ASM_ALIGNED_SI_OP
185 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
186 #undef TARGET_ASM_ALIGNED_DI_OP
187 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
188 #undef TARGET_ASM_UNALIGNED_HI_OP
189 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
190 #undef TARGET_ASM_UNALIGNED_SI_OP
191 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
192 #undef TARGET_ASM_UNALIGNED_DI_OP
193 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
194 #undef TARGET_ASM_INTEGER
195 #define TARGET_ASM_INTEGER ia64_assemble_integer
197 #undef TARGET_ASM_FUNCTION_PROLOGUE
198 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
199 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
200 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
201 #undef TARGET_ASM_FUNCTION_EPILOGUE
202 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
204 #undef TARGET_IN_SMALL_DATA_P
205 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
207 #undef TARGET_SCHED_ADJUST_COST
208 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
209 #undef TARGET_SCHED_ISSUE_RATE
210 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
211 #undef TARGET_SCHED_VARIABLE_ISSUE
212 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
213 #undef TARGET_SCHED_INIT
214 #define TARGET_SCHED_INIT ia64_sched_init
215 #undef TARGET_SCHED_FINISH
216 #define TARGET_SCHED_FINISH ia64_sched_finish
217 #undef TARGET_SCHED_REORDER
218 #define TARGET_SCHED_REORDER ia64_sched_reorder
219 #undef TARGET_SCHED_REORDER2
220 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
222 struct gcc_target targetm = TARGET_INITIALIZER;
224 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
227 call_operand (op, mode)
229 enum machine_mode mode;
231 if (mode != GET_MODE (op))
234 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
235 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
238 /* Return 1 if OP refers to a symbol in the sdata section. */
241 sdata_symbolic_operand (op, mode)
243 enum machine_mode mode ATTRIBUTE_UNUSED;
245 switch (GET_CODE (op))
248 if (GET_CODE (XEXP (op, 0)) != PLUS
249 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
251 op = XEXP (XEXP (op, 0), 0);
255 if (CONSTANT_POOL_ADDRESS_P (op))
256 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
258 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
267 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
270 got_symbolic_operand (op, mode)
272 enum machine_mode mode ATTRIBUTE_UNUSED;
274 switch (GET_CODE (op))
278 if (GET_CODE (op) != PLUS)
280 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
283 if (GET_CODE (op) != CONST_INT)
288 /* Ok if we're not using GOT entries at all. */
289 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
292 /* "Ok" while emitting rtl, since otherwise we won't be provided
293 with the entire offset during emission, which makes it very
294 hard to split the offset into high and low parts. */
295 if (rtx_equal_function_value_matters)
298 /* Force the low 14 bits of the constant to zero so that we do not
299 use up so many GOT entries. */
300 return (INTVAL (op) & 0x3fff) == 0;
312 /* Return 1 if OP refers to a symbol. */
315 symbolic_operand (op, mode)
317 enum machine_mode mode ATTRIBUTE_UNUSED;
319 switch (GET_CODE (op))
332 /* Return 1 if OP refers to a function. */
335 function_operand (op, mode)
337 enum machine_mode mode ATTRIBUTE_UNUSED;
339 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
345 /* Return 1 if OP is setjmp or a similar function. */
347 /* ??? This is an unsatisfying solution. Should rethink. */
350 setjmp_operand (op, mode)
352 enum machine_mode mode ATTRIBUTE_UNUSED;
357 if (GET_CODE (op) != SYMBOL_REF)
362 /* The following code is borrowed from special_function_p in calls.c. */
364 /* Disregard prefix _, __ or __x. */
367 if (name[1] == '_' && name[2] == 'x')
369 else if (name[1] == '_')
379 && (! strcmp (name, "setjmp")
380 || ! strcmp (name, "setjmp_syscall")))
382 && ! strcmp (name, "sigsetjmp"))
384 && ! strcmp (name, "savectx")));
386 else if ((name[0] == 'q' && name[1] == 's'
387 && ! strcmp (name, "qsetjmp"))
388 || (name[0] == 'v' && name[1] == 'f'
389 && ! strcmp (name, "vfork")))
395 /* Return 1 if OP is a general operand, but when pic exclude symbolic
398 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
399 from PREDICATE_CODES. */
402 move_operand (op, mode)
404 enum machine_mode mode;
406 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
409 return general_operand (op, mode);
412 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
415 gr_register_operand (op, mode)
417 enum machine_mode mode;
419 if (! register_operand (op, mode))
421 if (GET_CODE (op) == SUBREG)
422 op = SUBREG_REG (op);
423 if (GET_CODE (op) == REG)
425 unsigned int regno = REGNO (op);
426 if (regno < FIRST_PSEUDO_REGISTER)
427 return GENERAL_REGNO_P (regno);
432 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
435 fr_register_operand (op, mode)
437 enum machine_mode mode;
439 if (! register_operand (op, mode))
441 if (GET_CODE (op) == SUBREG)
442 op = SUBREG_REG (op);
443 if (GET_CODE (op) == REG)
445 unsigned int regno = REGNO (op);
446 if (regno < FIRST_PSEUDO_REGISTER)
447 return FR_REGNO_P (regno);
452 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
455 grfr_register_operand (op, mode)
457 enum machine_mode mode;
459 if (! register_operand (op, mode))
461 if (GET_CODE (op) == SUBREG)
462 op = SUBREG_REG (op);
463 if (GET_CODE (op) == REG)
465 unsigned int regno = REGNO (op);
466 if (regno < FIRST_PSEUDO_REGISTER)
467 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
472 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
475 gr_nonimmediate_operand (op, mode)
477 enum machine_mode mode;
479 if (! nonimmediate_operand (op, mode))
481 if (GET_CODE (op) == SUBREG)
482 op = SUBREG_REG (op);
483 if (GET_CODE (op) == REG)
485 unsigned int regno = REGNO (op);
486 if (regno < FIRST_PSEUDO_REGISTER)
487 return GENERAL_REGNO_P (regno);
492 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
495 fr_nonimmediate_operand (op, mode)
497 enum machine_mode mode;
499 if (! nonimmediate_operand (op, mode))
501 if (GET_CODE (op) == SUBREG)
502 op = SUBREG_REG (op);
503 if (GET_CODE (op) == REG)
505 unsigned int regno = REGNO (op);
506 if (regno < FIRST_PSEUDO_REGISTER)
507 return FR_REGNO_P (regno);
512 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
515 grfr_nonimmediate_operand (op, mode)
517 enum machine_mode mode;
519 if (! nonimmediate_operand (op, mode))
521 if (GET_CODE (op) == SUBREG)
522 op = SUBREG_REG (op);
523 if (GET_CODE (op) == REG)
525 unsigned int regno = REGNO (op);
526 if (regno < FIRST_PSEUDO_REGISTER)
527 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
532 /* Return 1 if OP is a GR register operand, or zero. */
535 gr_reg_or_0_operand (op, mode)
537 enum machine_mode mode;
539 return (op == const0_rtx || gr_register_operand (op, mode));
542 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
545 gr_reg_or_5bit_operand (op, mode)
547 enum machine_mode mode;
549 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
550 || GET_CODE (op) == CONSTANT_P_RTX
551 || gr_register_operand (op, mode));
554 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
557 gr_reg_or_6bit_operand (op, mode)
559 enum machine_mode mode;
561 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
562 || GET_CODE (op) == CONSTANT_P_RTX
563 || gr_register_operand (op, mode));
566 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
569 gr_reg_or_8bit_operand (op, mode)
571 enum machine_mode mode;
573 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
574 || GET_CODE (op) == CONSTANT_P_RTX
575 || gr_register_operand (op, mode));
578 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
581 grfr_reg_or_8bit_operand (op, mode)
583 enum machine_mode mode;
585 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
586 || GET_CODE (op) == CONSTANT_P_RTX
587 || grfr_register_operand (op, mode));
590 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
594 gr_reg_or_8bit_adjusted_operand (op, mode)
596 enum machine_mode mode;
598 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
599 || GET_CODE (op) == CONSTANT_P_RTX
600 || gr_register_operand (op, mode));
603 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
604 immediate and an 8 bit adjusted immediate operand. This is necessary
605 because when we emit a compare, we don't know what the condition will be,
606 so we need the union of the immediates accepted by GT and LT. */
609 gr_reg_or_8bit_and_adjusted_operand (op, mode)
611 enum machine_mode mode;
613 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
614 && CONST_OK_FOR_L (INTVAL (op)))
615 || GET_CODE (op) == CONSTANT_P_RTX
616 || gr_register_operand (op, mode));
619 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
622 gr_reg_or_14bit_operand (op, mode)
624 enum machine_mode mode;
626 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
627 || GET_CODE (op) == CONSTANT_P_RTX
628 || gr_register_operand (op, mode));
631 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
634 gr_reg_or_22bit_operand (op, mode)
636 enum machine_mode mode;
638 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
639 || GET_CODE (op) == CONSTANT_P_RTX
640 || gr_register_operand (op, mode));
643 /* Return 1 if OP is a 6 bit immediate operand. */
646 shift_count_operand (op, mode)
648 enum machine_mode mode ATTRIBUTE_UNUSED;
650 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
651 || GET_CODE (op) == CONSTANT_P_RTX);
654 /* Return 1 if OP is a 5 bit immediate operand. */
657 shift_32bit_count_operand (op, mode)
659 enum machine_mode mode ATTRIBUTE_UNUSED;
661 return ((GET_CODE (op) == CONST_INT
662 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
663 || GET_CODE (op) == CONSTANT_P_RTX);
666 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
669 shladd_operand (op, mode)
671 enum machine_mode mode ATTRIBUTE_UNUSED;
673 return (GET_CODE (op) == CONST_INT
674 && (INTVAL (op) == 2 || INTVAL (op) == 4
675 || INTVAL (op) == 8 || INTVAL (op) == 16));
678 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
681 fetchadd_operand (op, mode)
683 enum machine_mode mode ATTRIBUTE_UNUSED;
685 return (GET_CODE (op) == CONST_INT
686 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
687 INTVAL (op) == -4 || INTVAL (op) == -1 ||
688 INTVAL (op) == 1 || INTVAL (op) == 4 ||
689 INTVAL (op) == 8 || INTVAL (op) == 16));
692 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
695 fr_reg_or_fp01_operand (op, mode)
697 enum machine_mode mode;
699 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
700 || fr_register_operand (op, mode));
703 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
704 POST_MODIFY with a REG as displacement. */
707 destination_operand (op, mode)
709 enum machine_mode mode;
711 if (! nonimmediate_operand (op, mode))
713 if (GET_CODE (op) == MEM
714 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
715 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
720 /* Like memory_operand, but don't allow post-increments. */
723 not_postinc_memory_operand (op, mode)
725 enum machine_mode mode;
727 return (memory_operand (op, mode)
728 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
731 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
732 signed immediate operand. */
735 normal_comparison_operator (op, mode)
737 enum machine_mode mode;
739 enum rtx_code code = GET_CODE (op);
740 return ((mode == VOIDmode || GET_MODE (op) == mode)
741 && (code == EQ || code == NE
742 || code == GT || code == LE || code == GTU || code == LEU));
745 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
746 signed immediate operand. */
749 adjusted_comparison_operator (op, mode)
751 enum machine_mode mode;
753 enum rtx_code code = GET_CODE (op);
754 return ((mode == VOIDmode || GET_MODE (op) == mode)
755 && (code == LT || code == GE || code == LTU || code == GEU));
758 /* Return 1 if this is a signed inequality operator. */
761 signed_inequality_operator (op, mode)
763 enum machine_mode mode;
765 enum rtx_code code = GET_CODE (op);
766 return ((mode == VOIDmode || GET_MODE (op) == mode)
767 && (code == GE || code == GT
768 || code == LE || code == LT));
771 /* Return 1 if this operator is valid for predication. */
774 predicate_operator (op, mode)
776 enum machine_mode mode;
778 enum rtx_code code = GET_CODE (op);
779 return ((GET_MODE (op) == mode || mode == VOIDmode)
780 && (code == EQ || code == NE));
783 /* Return 1 if this operator can be used in a conditional operation. */
786 condop_operator (op, mode)
788 enum machine_mode mode;
790 enum rtx_code code = GET_CODE (op);
791 return ((GET_MODE (op) == mode || mode == VOIDmode)
792 && (code == PLUS || code == MINUS || code == AND
793 || code == IOR || code == XOR));
796 /* Return 1 if this is the ar.lc register. */
799 ar_lc_reg_operand (op, mode)
801 enum machine_mode mode;
803 return (GET_MODE (op) == DImode
804 && (mode == DImode || mode == VOIDmode)
805 && GET_CODE (op) == REG
806 && REGNO (op) == AR_LC_REGNUM);
809 /* Return 1 if this is the ar.ccv register. */
812 ar_ccv_reg_operand (op, mode)
814 enum machine_mode mode;
816 return ((GET_MODE (op) == mode || mode == VOIDmode)
817 && GET_CODE (op) == REG
818 && REGNO (op) == AR_CCV_REGNUM);
821 /* Return 1 if this is the ar.pfs register. */
824 ar_pfs_reg_operand (op, mode)
826 enum machine_mode mode;
828 return ((GET_MODE (op) == mode || mode == VOIDmode)
829 && GET_CODE (op) == REG
830 && REGNO (op) == AR_PFS_REGNUM);
833 /* Like general_operand, but don't allow (mem (addressof)). */
836 general_tfmode_operand (op, mode)
838 enum machine_mode mode;
840 if (! general_operand (op, mode))
842 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
850 destination_tfmode_operand (op, mode)
852 enum machine_mode mode;
854 if (! destination_operand (op, mode))
856 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
864 tfreg_or_fp01_operand (op, mode)
866 enum machine_mode mode;
868 if (GET_CODE (op) == SUBREG)
870 return fr_reg_or_fp01_operand (op, mode);
873 /* Return 1 if OP is valid as a base register in a reg + offset address. */
876 basereg_operand (op, mode)
878 enum machine_mode mode;
880 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
881 checks from pa.c basereg_operand as well? Seems to be OK without them
884 return (register_operand (op, mode) &&
885 REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
888 /* Return 1 if the operands of a move are ok. */
891 ia64_move_ok (dst, src)
894 /* If we're under init_recog_no_volatile, we'll not be able to use
895 memory_operand. So check the code directly and don't worry about
896 the validity of the underlying address, which should have been
897 checked elsewhere anyway. */
898 if (GET_CODE (dst) != MEM)
900 if (GET_CODE (src) == MEM)
902 if (register_operand (src, VOIDmode))
905 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
906 if (INTEGRAL_MODE_P (GET_MODE (dst)))
907 return src == const0_rtx;
909 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
912 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
913 Return the length of the field, or <= 0 on failure. */
916 ia64_depz_field_mask (rop, rshift)
919 unsigned HOST_WIDE_INT op = INTVAL (rop);
920 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
922 /* Get rid of the zero bits we're shifting in. */
925 /* We must now have a solid block of 1's at bit 0. */
926 return exact_log2 (op + 1);
929 /* Expand a symbolic constant load. */
930 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
933 ia64_expand_load_address (dest, src, scratch)
934 rtx dest, src, scratch;
938 /* The destination could be a MEM during initial rtl generation,
939 which isn't a valid destination for the PIC load address patterns. */
940 if (! register_operand (dest, DImode))
941 temp = gen_reg_rtx (DImode);
946 emit_insn (gen_load_gprel64 (temp, src));
947 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
948 emit_insn (gen_load_fptr (temp, src));
949 else if (sdata_symbolic_operand (src, DImode))
950 emit_insn (gen_load_gprel (temp, src));
951 else if (GET_CODE (src) == CONST
952 && GET_CODE (XEXP (src, 0)) == PLUS
953 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
954 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
956 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
957 rtx sym = XEXP (XEXP (src, 0), 0);
958 HOST_WIDE_INT ofs, hi, lo;
960 /* Split the offset into a sign extended 14-bit low part
961 and a complementary high part. */
962 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
963 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
967 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
969 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
971 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
977 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
979 insn = emit_insn (gen_load_symptr (temp, src, scratch));
980 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
984 emit_move_insn (dest, temp);
988 ia64_gp_save_reg (setjmp_p)
991 rtx save = cfun->machine->ia64_gp_save;
995 /* We can't save GP in a pseudo if we are calling setjmp, because
996 pseudos won't be restored by longjmp. For now, we save it in r4. */
997 /* ??? It would be more efficient to save this directly into a stack
998 slot. Unfortunately, the stack slot address gets cse'd across
999 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
1002 /* ??? Get the barf bag, Virginia. We've got to replace this thing
1003 in place, since this rtx is used in exception handling receivers.
1004 Moreover, we must get this rtx out of regno_reg_rtx or reload
1005 will do the wrong thing. */
1006 unsigned int old_regno = REGNO (save);
1007 if (setjmp_p && old_regno != GR_REG (4))
1009 REGNO (save) = GR_REG (4);
1010 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
1016 save = gen_rtx_REG (DImode, GR_REG (4));
1017 else if (! optimize)
1018 save = gen_rtx_REG (DImode, LOC_REG (0));
1020 save = gen_reg_rtx (DImode);
1021 cfun->machine->ia64_gp_save = save;
1027 /* Split a post-reload TImode reference into two DImode components. */
1030 ia64_split_timode (out, in, scratch)
1034 switch (GET_CODE (in))
1037 out[0] = gen_rtx_REG (DImode, REGNO (in));
1038 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
1043 rtx base = XEXP (in, 0);
1045 switch (GET_CODE (base))
1048 out[0] = adjust_address (in, DImode, 0);
1051 base = XEXP (base, 0);
1052 out[0] = adjust_address (in, DImode, 0);
1055 /* Since we're changing the mode, we need to change to POST_MODIFY
1056 as well to preserve the size of the increment. Either that or
1057 do the update in two steps, but we've already got this scratch
1058 register handy so let's use it. */
1060 base = XEXP (base, 0);
1062 = change_address (in, DImode,
1064 (Pmode, base, plus_constant (base, 16)));
1067 base = XEXP (base, 0);
1069 = change_address (in, DImode,
1071 (Pmode, base, plus_constant (base, -16)));
1077 if (scratch == NULL_RTX)
1079 out[1] = change_address (in, DImode, scratch);
1080 return gen_adddi3 (scratch, base, GEN_INT (8));
1085 split_double (in, &out[0], &out[1]);
1093 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1094 through memory plus an extra GR scratch register. Except that you can
1095 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1096 SECONDARY_RELOAD_CLASS, but not both.
1098 We got into problems in the first place by allowing a construct like
1099 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1100 This solution attempts to prevent this situation from occurring. When
1101 we see something like the above, we spill the inner register to memory. */
1104 spill_tfmode_operand (in, force)
1108 if (GET_CODE (in) == SUBREG
1109 && GET_MODE (SUBREG_REG (in)) == TImode
1110 && GET_CODE (SUBREG_REG (in)) == REG)
1112 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
1113 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1115 else if (force && GET_CODE (in) == REG)
1117 rtx mem = gen_mem_addressof (in, NULL_TREE);
1118 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1120 else if (GET_CODE (in) == MEM
1121 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1122 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1127 /* Emit comparison instruction if necessary, returning the expression
1128 that holds the compare result in the proper mode. */
1131 ia64_expand_compare (code, mode)
1133 enum machine_mode mode;
1135 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1138 /* If we have a BImode input, then we already have a compare result, and
1139 do not need to emit another comparison. */
1140 if (GET_MODE (op0) == BImode)
1142 if ((code == NE || code == EQ) && op1 == const0_rtx)
1149 cmp = gen_reg_rtx (BImode);
1150 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1151 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1155 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1158 /* Emit the appropriate sequence for a call. */
1161 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1167 rtx insn, b0, pfs, gp_save, narg_rtx, dest;
1171 addr = XEXP (addr, 0);
1172 b0 = gen_rtx_REG (DImode, R_BR (0));
1173 pfs = gen_rtx_REG (DImode, AR_PFS_REGNUM);
1177 else if (IN_REGNO_P (REGNO (nextarg)))
1178 narg = REGNO (nextarg) - IN_REG (0);
1180 narg = REGNO (nextarg) - OUT_REG (0);
1181 narg_rtx = GEN_INT (narg);
1183 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1186 insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs);
1188 insn = gen_call_nopic (addr, narg_rtx, b0);
1190 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1191 emit_call_insn (insn);
1195 indirect_p = ! symbolic_operand (addr, VOIDmode);
1197 if (sibcall_p || (TARGET_CONST_GP && !indirect_p))
1200 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1203 emit_move_insn (gp_save, pic_offset_table_rtx);
1205 /* If this is an indirect call, then we have the address of a descriptor. */
1208 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1209 emit_move_insn (pic_offset_table_rtx,
1210 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1216 insn = gen_sibcall_pic (dest, narg_rtx, b0, pfs);
1218 insn = gen_call_pic (dest, narg_rtx, b0);
1220 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1221 emit_call_insn (insn);
1224 emit_move_insn (pic_offset_table_rtx, gp_save);
1227 /* Begin the assembly file. */
1230 emit_safe_across_calls (f)
1233 unsigned int rs, re;
1240 while (rs < 64 && call_used_regs[PR_REG (rs)])
1244 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1248 fputs ("\t.pred.safe_across_calls ", f);
1254 fprintf (f, "p%u", rs);
1256 fprintf (f, "p%u-p%u", rs, re - 1);
1264 /* Structure to be filled in by ia64_compute_frame_size with register
1265 save masks and offsets for the current function. */
1267 struct ia64_frame_info
1269 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1270 the caller's scratch area. */
1271 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1272 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1273 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
1274 HARD_REG_SET mask; /* mask of saved registers. */
1275 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1276 registers or long-term scratches. */
1277 int n_spilled; /* number of spilled registers. */
1278 int reg_fp; /* register for fp. */
1279 int reg_save_b0; /* save register for b0. */
1280 int reg_save_pr; /* save register for prs. */
1281 int reg_save_ar_pfs; /* save register for ar.pfs. */
1282 int reg_save_ar_unat; /* save register for ar.unat. */
1283 int reg_save_ar_lc; /* save register for ar.lc. */
1284 int n_input_regs; /* number of input registers used. */
1285 int n_local_regs; /* number of local registers used. */
1286 int n_output_regs; /* number of output registers used. */
1287 int n_rotate_regs; /* number of rotating registers used. */
1289 char need_regstk; /* true if a .regstk directive needed. */
1290 char initialized; /* true if the data is finalized. */
1293 /* Current frame information calculated by ia64_compute_frame_size. */
1294 static struct ia64_frame_info current_frame_info;
1296 /* Helper function for ia64_compute_frame_size: find an appropriate general
1297 register to spill some special register to. SPECIAL_SPILL_MASK contains
1298 bits in GR0 to GR31 that have already been allocated by this routine.
1299 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1302 find_gr_spill (try_locals)
1307 /* If this is a leaf function, first try an otherwise unused
1308 call-clobbered register. */
1309 if (current_function_is_leaf)
1311 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1312 if (! regs_ever_live[regno]
1313 && call_used_regs[regno]
1314 && ! fixed_regs[regno]
1315 && ! global_regs[regno]
1316 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1318 current_frame_info.gr_used_mask |= 1 << regno;
1325 regno = current_frame_info.n_local_regs;
1326 /* If there is a frame pointer, then we can't use loc79, because
1327 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1328 reg_name switching code in ia64_expand_prologue. */
1329 if (regno < (80 - frame_pointer_needed))
1331 current_frame_info.n_local_regs = regno + 1;
1332 return LOC_REG (0) + regno;
1336 /* Failed to find a general register to spill to. Must use stack. */
1340 /* In order to make for nice schedules, we try to allocate every temporary
1341 to a different register. We must of course stay away from call-saved,
1342 fixed, and global registers. We must also stay away from registers
1343 allocated in current_frame_info.gr_used_mask, since those include regs
1344 used all through the prologue.
1346 Any register allocated here must be used immediately. The idea is to
1347 aid scheduling, not to solve data flow problems. */
1349 static int last_scratch_gr_reg;
1352 next_scratch_gr_reg ()
1356 for (i = 0; i < 32; ++i)
1358 regno = (last_scratch_gr_reg + i + 1) & 31;
1359 if (call_used_regs[regno]
1360 && ! fixed_regs[regno]
1361 && ! global_regs[regno]
1362 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1364 last_scratch_gr_reg = regno;
1369 /* There must be _something_ available. */
1373 /* Helper function for ia64_compute_frame_size, called through
1374 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1377 mark_reg_gr_used_mask (reg, data)
1379 void *data ATTRIBUTE_UNUSED;
1381 unsigned int regno = REGNO (reg);
1384 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1385 for (i = 0; i < n; ++i)
1386 current_frame_info.gr_used_mask |= 1 << (regno + i);
1390 /* Returns the number of bytes offset between the frame pointer and the stack
1391 pointer for the current function. SIZE is the number of bytes of space
1392 needed for local variables. */
1395 ia64_compute_frame_size (size)
1398 HOST_WIDE_INT total_size;
1399 HOST_WIDE_INT spill_size = 0;
1400 HOST_WIDE_INT extra_spill_size = 0;
1401 HOST_WIDE_INT pretend_args_size;
1404 int spilled_gr_p = 0;
1405 int spilled_fr_p = 0;
1409 if (current_frame_info.initialized)
1412 memset (¤t_frame_info, 0, sizeof current_frame_info);
1413 CLEAR_HARD_REG_SET (mask);
1415 /* Don't allocate scratches to the return register. */
1416 diddle_return_value (mark_reg_gr_used_mask, NULL);
1418 /* Don't allocate scratches to the EH scratch registers. */
1419 if (cfun->machine->ia64_eh_epilogue_sp)
1420 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1421 if (cfun->machine->ia64_eh_epilogue_bsp)
1422 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1424 /* Find the size of the register stack frame. We have only 80 local
1425 registers, because we reserve 8 for the inputs and 8 for the
1428 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1429 since we'll be adjusting that down later. */
1430 regno = LOC_REG (78) + ! frame_pointer_needed;
1431 for (; regno >= LOC_REG (0); regno--)
1432 if (regs_ever_live[regno])
1434 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1436 /* For functions marked with the syscall_linkage attribute, we must mark
1437 all eight input registers as in use, so that locals aren't visible to
1440 if (cfun->machine->n_varargs > 0
1441 || lookup_attribute ("syscall_linkage",
1442 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1443 current_frame_info.n_input_regs = 8;
1446 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1447 if (regs_ever_live[regno])
1449 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1452 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1453 if (regs_ever_live[regno])
1455 i = regno - OUT_REG (0) + 1;
1457 /* When -p profiling, we need one output register for the mcount argument.
1458 Likwise for -a profiling for the bb_init_func argument. For -ax
1459 profiling, we need two output registers for the two bb_init_trace_func
1461 if (current_function_profile)
1463 current_frame_info.n_output_regs = i;
1465 /* ??? No rotating register support yet. */
1466 current_frame_info.n_rotate_regs = 0;
1468 /* Discover which registers need spilling, and how much room that
1469 will take. Begin with floating point and general registers,
1470 which will always wind up on the stack. */
1472 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1473 if (regs_ever_live[regno] && ! call_used_regs[regno])
1475 SET_HARD_REG_BIT (mask, regno);
1481 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1482 if (regs_ever_live[regno] && ! call_used_regs[regno])
1484 SET_HARD_REG_BIT (mask, regno);
1490 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1491 if (regs_ever_live[regno] && ! call_used_regs[regno])
1493 SET_HARD_REG_BIT (mask, regno);
1498 /* Now come all special registers that might get saved in other
1499 general registers. */
1501 if (frame_pointer_needed)
1503 current_frame_info.reg_fp = find_gr_spill (1);
1504 /* If we did not get a register, then we take LOC79. This is guaranteed
1505 to be free, even if regs_ever_live is already set, because this is
1506 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1507 as we don't count loc79 above. */
1508 if (current_frame_info.reg_fp == 0)
1510 current_frame_info.reg_fp = LOC_REG (79);
1511 current_frame_info.n_local_regs++;
1515 if (! current_function_is_leaf)
1517 /* Emit a save of BR0 if we call other functions. Do this even
1518 if this function doesn't return, as EH depends on this to be
1519 able to unwind the stack. */
1520 SET_HARD_REG_BIT (mask, BR_REG (0));
1522 current_frame_info.reg_save_b0 = find_gr_spill (1);
1523 if (current_frame_info.reg_save_b0 == 0)
1529 /* Similarly for ar.pfs. */
1530 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1531 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1532 if (current_frame_info.reg_save_ar_pfs == 0)
1534 extra_spill_size += 8;
1540 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1542 SET_HARD_REG_BIT (mask, BR_REG (0));
1548 /* Unwind descriptor hackery: things are most efficient if we allocate
1549 consecutive GR save registers for RP, PFS, FP in that order. However,
1550 it is absolutely critical that FP get the only hard register that's
1551 guaranteed to be free, so we allocated it first. If all three did
1552 happen to be allocated hard regs, and are consecutive, rearrange them
1553 into the preferred order now. */
1554 if (current_frame_info.reg_fp != 0
1555 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1556 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1558 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1559 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1560 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1563 /* See if we need to store the predicate register block. */
1564 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1565 if (regs_ever_live[regno] && ! call_used_regs[regno])
1567 if (regno <= PR_REG (63))
1569 SET_HARD_REG_BIT (mask, PR_REG (0));
1570 current_frame_info.reg_save_pr = find_gr_spill (1);
1571 if (current_frame_info.reg_save_pr == 0)
1573 extra_spill_size += 8;
1577 /* ??? Mark them all as used so that register renaming and such
1578 are free to use them. */
1579 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1580 regs_ever_live[regno] = 1;
1583 /* If we're forced to use st8.spill, we're forced to save and restore
1585 if (spilled_gr_p || cfun->machine->n_varargs)
1587 regs_ever_live[AR_UNAT_REGNUM] = 1;
1588 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1589 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1590 if (current_frame_info.reg_save_ar_unat == 0)
1592 extra_spill_size += 8;
1597 if (regs_ever_live[AR_LC_REGNUM])
1599 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1600 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1601 if (current_frame_info.reg_save_ar_lc == 0)
1603 extra_spill_size += 8;
1608 /* If we have an odd number of words of pretend arguments written to
1609 the stack, then the FR save area will be unaligned. We round the
1610 size of this area up to keep things 16 byte aligned. */
1612 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1614 pretend_args_size = current_function_pretend_args_size;
1616 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1617 + current_function_outgoing_args_size);
1618 total_size = IA64_STACK_ALIGN (total_size);
1620 /* We always use the 16-byte scratch area provided by the caller, but
1621 if we are a leaf function, there's no one to which we need to provide
1623 if (current_function_is_leaf)
1624 total_size = MAX (0, total_size - 16);
1626 current_frame_info.total_size = total_size;
1627 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1628 current_frame_info.spill_size = spill_size;
1629 current_frame_info.extra_spill_size = extra_spill_size;
1630 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1631 current_frame_info.n_spilled = n_spilled;
1632 current_frame_info.initialized = reload_completed;
1635 /* Compute the initial difference between the specified pair of registers. */
1638 ia64_initial_elimination_offset (from, to)
1641 HOST_WIDE_INT offset;
1643 ia64_compute_frame_size (get_frame_size ());
1646 case FRAME_POINTER_REGNUM:
1647 if (to == HARD_FRAME_POINTER_REGNUM)
1649 if (current_function_is_leaf)
1650 offset = -current_frame_info.total_size;
1652 offset = -(current_frame_info.total_size
1653 - current_function_outgoing_args_size - 16);
1655 else if (to == STACK_POINTER_REGNUM)
1657 if (current_function_is_leaf)
1660 offset = 16 + current_function_outgoing_args_size;
1666 case ARG_POINTER_REGNUM:
1667 /* Arguments start above the 16 byte save area, unless stdarg
1668 in which case we store through the 16 byte save area. */
1669 if (to == HARD_FRAME_POINTER_REGNUM)
1670 offset = 16 - current_function_pretend_args_size;
1671 else if (to == STACK_POINTER_REGNUM)
1672 offset = (current_frame_info.total_size
1673 + 16 - current_function_pretend_args_size);
1678 case RETURN_ADDRESS_POINTER_REGNUM:
1689 /* If there are more than a trivial number of register spills, we use
1690 two interleaved iterators so that we can get two memory references
1693 In order to simplify things in the prologue and epilogue expanders,
1694 we use helper functions to fix up the memory references after the
1695 fact with the appropriate offsets to a POST_MODIFY memory mode.
1696 The following data structure tracks the state of the two iterators
1697 while insns are being emitted. */
1699 struct spill_fill_data
1701 rtx init_after; /* point at which to emit initializations */
1702 rtx init_reg[2]; /* initial base register */
1703 rtx iter_reg[2]; /* the iterator registers */
1704 rtx *prev_addr[2]; /* address of last memory use */
1705 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
1706 HOST_WIDE_INT prev_off[2]; /* last offset */
1707 int n_iter; /* number of iterators in use */
1708 int next_iter; /* next iterator to use */
1709 unsigned int save_gr_used_mask;
1712 static struct spill_fill_data spill_fill_data;
1715 setup_spill_pointers (n_spills, init_reg, cfa_off)
1718 HOST_WIDE_INT cfa_off;
1722 spill_fill_data.init_after = get_last_insn ();
1723 spill_fill_data.init_reg[0] = init_reg;
1724 spill_fill_data.init_reg[1] = init_reg;
1725 spill_fill_data.prev_addr[0] = NULL;
1726 spill_fill_data.prev_addr[1] = NULL;
1727 spill_fill_data.prev_insn[0] = NULL;
1728 spill_fill_data.prev_insn[1] = NULL;
1729 spill_fill_data.prev_off[0] = cfa_off;
1730 spill_fill_data.prev_off[1] = cfa_off;
1731 spill_fill_data.next_iter = 0;
1732 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1734 spill_fill_data.n_iter = 1 + (n_spills > 2);
1735 for (i = 0; i < spill_fill_data.n_iter; ++i)
1737 int regno = next_scratch_gr_reg ();
1738 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1739 current_frame_info.gr_used_mask |= 1 << regno;
1744 finish_spill_pointers ()
1746 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1750 spill_restore_mem (reg, cfa_off)
1752 HOST_WIDE_INT cfa_off;
1754 int iter = spill_fill_data.next_iter;
1755 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1756 rtx disp_rtx = GEN_INT (disp);
1759 if (spill_fill_data.prev_addr[iter])
1761 if (CONST_OK_FOR_N (disp))
1763 *spill_fill_data.prev_addr[iter]
1764 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1765 gen_rtx_PLUS (DImode,
1766 spill_fill_data.iter_reg[iter],
1768 REG_NOTES (spill_fill_data.prev_insn[iter])
1769 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
1770 REG_NOTES (spill_fill_data.prev_insn[iter]));
1774 /* ??? Could use register post_modify for loads. */
1775 if (! CONST_OK_FOR_I (disp))
1777 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1778 emit_move_insn (tmp, disp_rtx);
1781 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1782 spill_fill_data.iter_reg[iter], disp_rtx));
1785 /* Micro-optimization: if we've created a frame pointer, it's at
1786 CFA 0, which may allow the real iterator to be initialized lower,
1787 slightly increasing parallelism. Also, if there are few saves
1788 it may eliminate the iterator entirely. */
1790 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1791 && frame_pointer_needed)
1793 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1794 set_mem_alias_set (mem, get_varargs_alias_set ());
1802 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1803 spill_fill_data.init_reg[iter]);
1808 if (! CONST_OK_FOR_I (disp))
1810 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1811 emit_move_insn (tmp, disp_rtx);
1815 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1816 spill_fill_data.init_reg[iter],
1819 seq = gen_sequence ();
1823 /* Careful for being the first insn in a sequence. */
1824 if (spill_fill_data.init_after)
1825 insn = emit_insn_after (seq, spill_fill_data.init_after);
1828 rtx first = get_insns ();
1830 insn = emit_insn_before (seq, first);
1832 insn = emit_insn (seq);
1834 spill_fill_data.init_after = insn;
1836 /* If DISP is 0, we may or may not have a further adjustment
1837 afterward. If we do, then the load/store insn may be modified
1838 to be a post-modify. If we don't, then this copy may be
1839 eliminated by copyprop_hardreg_forward, which makes this
1840 insn garbage, which runs afoul of the sanity check in
1841 propagate_one_insn. So mark this insn as legal to delete. */
1843 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1847 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
1849 /* ??? Not all of the spills are for varargs, but some of them are.
1850 The rest of the spills belong in an alias set of their own. But
1851 it doesn't actually hurt to include them here. */
1852 set_mem_alias_set (mem, get_varargs_alias_set ());
1854 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1855 spill_fill_data.prev_off[iter] = cfa_off;
1857 if (++iter >= spill_fill_data.n_iter)
1859 spill_fill_data.next_iter = iter;
1865 do_spill (move_fn, reg, cfa_off, frame_reg)
1866 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1868 HOST_WIDE_INT cfa_off;
1870 int iter = spill_fill_data.next_iter;
1873 mem = spill_restore_mem (reg, cfa_off);
1874 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
1875 spill_fill_data.prev_insn[iter] = insn;
1882 RTX_FRAME_RELATED_P (insn) = 1;
1884 /* Don't even pretend that the unwind code can intuit its way
1885 through a pair of interleaved post_modify iterators. Just
1886 provide the correct answer. */
1888 if (frame_pointer_needed)
1890 base = hard_frame_pointer_rtx;
1895 base = stack_pointer_rtx;
1896 off = current_frame_info.total_size - cfa_off;
1900 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1901 gen_rtx_SET (VOIDmode,
1902 gen_rtx_MEM (GET_MODE (reg),
1903 plus_constant (base, off)),
1910 do_restore (move_fn, reg, cfa_off)
1911 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1913 HOST_WIDE_INT cfa_off;
1915 int iter = spill_fill_data.next_iter;
1918 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1919 GEN_INT (cfa_off)));
1920 spill_fill_data.prev_insn[iter] = insn;
1923 /* Wrapper functions that discards the CONST_INT spill offset. These
1924 exist so that we can give gr_spill/gr_fill the offset they need and
1925 use a consistant function interface. */
1928 gen_movdi_x (dest, src, offset)
1930 rtx offset ATTRIBUTE_UNUSED;
1932 return gen_movdi (dest, src);
1936 gen_fr_spill_x (dest, src, offset)
1938 rtx offset ATTRIBUTE_UNUSED;
1940 return gen_fr_spill (dest, src);
1944 gen_fr_restore_x (dest, src, offset)
1946 rtx offset ATTRIBUTE_UNUSED;
1948 return gen_fr_restore (dest, src);
1951 /* Called after register allocation to add any instructions needed for the
1952 prologue. Using a prologue insn is favored compared to putting all of the
1953 instructions in output_function_prologue(), since it allows the scheduler
1954 to intermix instructions with the saves of the caller saved registers. In
1955 some cases, it might be necessary to emit a barrier instruction as the last
1956 insn to prevent such scheduling.
1958 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
1959 so that the debug info generation code can handle them properly.
1961 The register save area is layed out like so:
1963 [ varargs spill area ]
1964 [ fr register spill area ]
1965 [ br register spill area ]
1966 [ ar register spill area ]
1967 [ pr register spill area ]
1968 [ gr register spill area ] */
1970 /* ??? Get inefficient code when the frame size is larger than can fit in an
1971 adds instruction. */
1974 ia64_expand_prologue ()
1976 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
1977 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
1980 ia64_compute_frame_size (get_frame_size ());
1981 last_scratch_gr_reg = 15;
1983 /* If there is no epilogue, then we don't need some prologue insns.
1984 We need to avoid emitting the dead prologue insns, because flow
1985 will complain about them. */
1990 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
1991 if ((e->flags & EDGE_FAKE) == 0
1992 && (e->flags & EDGE_FALLTHRU) != 0)
1994 epilogue_p = (e != NULL);
1999 /* Set the local, input, and output register names. We need to do this
2000 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2001 half. If we use in/loc/out register names, then we get assembler errors
2002 in crtn.S because there is no alloc insn or regstk directive in there. */
2003 if (! TARGET_REG_NAMES)
2005 int inputs = current_frame_info.n_input_regs;
2006 int locals = current_frame_info.n_local_regs;
2007 int outputs = current_frame_info.n_output_regs;
2009 for (i = 0; i < inputs; i++)
2010 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2011 for (i = 0; i < locals; i++)
2012 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2013 for (i = 0; i < outputs; i++)
2014 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2017 /* Set the frame pointer register name. The regnum is logically loc79,
2018 but of course we'll not have allocated that many locals. Rather than
2019 worrying about renumbering the existing rtxs, we adjust the name. */
2020 /* ??? This code means that we can never use one local register when
2021 there is a frame pointer. loc79 gets wasted in this case, as it is
2022 renamed to a register that will never be used. See also the try_locals
2023 code in find_gr_spill. */
2024 if (current_frame_info.reg_fp)
2026 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2027 reg_names[HARD_FRAME_POINTER_REGNUM]
2028 = reg_names[current_frame_info.reg_fp];
2029 reg_names[current_frame_info.reg_fp] = tmp;
2032 /* Fix up the return address placeholder. */
2033 /* ??? We can fail if __builtin_return_address is used, and we didn't
2034 allocate a register in which to save b0. I can't think of a way to
2035 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
2036 then be sure that I got the right one. Further, reload doesn't seem
2037 to care if an eliminable register isn't used, and "eliminates" it
2039 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
2040 && current_frame_info.reg_save_b0 != 0)
2041 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
2043 /* We don't need an alloc instruction if we've used no outputs or locals. */
2044 if (current_frame_info.n_local_regs == 0
2045 && current_frame_info.n_output_regs == 0
2046 && current_frame_info.n_input_regs <= current_function_args_info.int_regs)
2048 /* If there is no alloc, but there are input registers used, then we
2049 need a .regstk directive. */
2050 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2051 ar_pfs_save_reg = NULL_RTX;
2055 current_frame_info.need_regstk = 0;
2057 if (current_frame_info.reg_save_ar_pfs)
2058 regno = current_frame_info.reg_save_ar_pfs;
2060 regno = next_scratch_gr_reg ();
2061 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2063 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2064 GEN_INT (current_frame_info.n_input_regs),
2065 GEN_INT (current_frame_info.n_local_regs),
2066 GEN_INT (current_frame_info.n_output_regs),
2067 GEN_INT (current_frame_info.n_rotate_regs)));
2068 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2071 /* Set up frame pointer, stack pointer, and spill iterators. */
2073 n_varargs = cfun->machine->n_varargs;
2074 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2075 stack_pointer_rtx, 0);
2077 if (frame_pointer_needed)
2079 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2080 RTX_FRAME_RELATED_P (insn) = 1;
2083 if (current_frame_info.total_size != 0)
2085 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2088 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2089 offset = frame_size_rtx;
2092 regno = next_scratch_gr_reg ();
2093 offset = gen_rtx_REG (DImode, regno);
2094 emit_move_insn (offset, frame_size_rtx);
2097 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2098 stack_pointer_rtx, offset));
2100 if (! frame_pointer_needed)
2102 RTX_FRAME_RELATED_P (insn) = 1;
2103 if (GET_CODE (offset) != CONST_INT)
2106 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2107 gen_rtx_SET (VOIDmode,
2109 gen_rtx_PLUS (DImode,
2116 /* ??? At this point we must generate a magic insn that appears to
2117 modify the stack pointer, the frame pointer, and all spill
2118 iterators. This would allow the most scheduling freedom. For
2119 now, just hard stop. */
2120 emit_insn (gen_blockage ());
2123 /* Must copy out ar.unat before doing any integer spills. */
2124 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2126 if (current_frame_info.reg_save_ar_unat)
2128 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2131 alt_regno = next_scratch_gr_reg ();
2132 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2133 current_frame_info.gr_used_mask |= 1 << alt_regno;
2136 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2137 insn = emit_move_insn (ar_unat_save_reg, reg);
2138 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2140 /* Even if we're not going to generate an epilogue, we still
2141 need to save the register so that EH works. */
2142 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2143 emit_insn (gen_prologue_use (ar_unat_save_reg));
2146 ar_unat_save_reg = NULL_RTX;
2148 /* Spill all varargs registers. Do this before spilling any GR registers,
2149 since we want the UNAT bits for the GR registers to override the UNAT
2150 bits from varargs, which we don't care about. */
2153 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2155 reg = gen_rtx_REG (DImode, regno);
2156 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2159 /* Locate the bottom of the register save area. */
2160 cfa_off = (current_frame_info.spill_cfa_off
2161 + current_frame_info.spill_size
2162 + current_frame_info.extra_spill_size);
2164 /* Save the predicate register block either in a register or in memory. */
2165 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2167 reg = gen_rtx_REG (DImode, PR_REG (0));
2168 if (current_frame_info.reg_save_pr != 0)
2170 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2171 insn = emit_move_insn (alt_reg, reg);
2173 /* ??? Denote pr spill/fill by a DImode move that modifies all
2174 64 hard registers. */
2175 RTX_FRAME_RELATED_P (insn) = 1;
2177 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2178 gen_rtx_SET (VOIDmode, alt_reg, reg),
2181 /* Even if we're not going to generate an epilogue, we still
2182 need to save the register so that EH works. */
2184 emit_insn (gen_prologue_use (alt_reg));
2188 alt_regno = next_scratch_gr_reg ();
2189 alt_reg = gen_rtx_REG (DImode, alt_regno);
2190 insn = emit_move_insn (alt_reg, reg);
2191 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2196 /* Handle AR regs in numerical order. All of them get special handling. */
2197 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2198 && current_frame_info.reg_save_ar_unat == 0)
2200 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2201 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2205 /* The alloc insn already copied ar.pfs into a general register. The
2206 only thing we have to do now is copy that register to a stack slot
2207 if we'd not allocated a local register for the job. */
2208 if (current_frame_info.reg_save_ar_pfs == 0
2209 && ! current_function_is_leaf)
2211 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2212 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2216 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2218 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2219 if (current_frame_info.reg_save_ar_lc != 0)
2221 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2222 insn = emit_move_insn (alt_reg, reg);
2223 RTX_FRAME_RELATED_P (insn) = 1;
2225 /* Even if we're not going to generate an epilogue, we still
2226 need to save the register so that EH works. */
2228 emit_insn (gen_prologue_use (alt_reg));
2232 alt_regno = next_scratch_gr_reg ();
2233 alt_reg = gen_rtx_REG (DImode, alt_regno);
2234 emit_move_insn (alt_reg, reg);
2235 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2240 /* We should now be at the base of the gr/br/fr spill area. */
2241 if (cfa_off != (current_frame_info.spill_cfa_off
2242 + current_frame_info.spill_size))
2245 /* Spill all general registers. */
2246 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2247 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2249 reg = gen_rtx_REG (DImode, regno);
2250 do_spill (gen_gr_spill, reg, cfa_off, reg);
2254 /* Handle BR0 specially -- it may be getting stored permanently in
2255 some GR register. */
2256 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2258 reg = gen_rtx_REG (DImode, BR_REG (0));
2259 if (current_frame_info.reg_save_b0 != 0)
2261 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2262 insn = emit_move_insn (alt_reg, reg);
2263 RTX_FRAME_RELATED_P (insn) = 1;
2265 /* Even if we're not going to generate an epilogue, we still
2266 need to save the register so that EH works. */
2268 emit_insn (gen_prologue_use (alt_reg));
2272 alt_regno = next_scratch_gr_reg ();
2273 alt_reg = gen_rtx_REG (DImode, alt_regno);
2274 emit_move_insn (alt_reg, reg);
2275 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2280 /* Spill the rest of the BR registers. */
2281 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2282 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2284 alt_regno = next_scratch_gr_reg ();
2285 alt_reg = gen_rtx_REG (DImode, alt_regno);
2286 reg = gen_rtx_REG (DImode, regno);
2287 emit_move_insn (alt_reg, reg);
2288 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2292 /* Align the frame and spill all FR registers. */
2293 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2294 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2298 reg = gen_rtx_REG (TFmode, regno);
2299 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2303 if (cfa_off != current_frame_info.spill_cfa_off)
2306 finish_spill_pointers ();
2309 /* Called after register allocation to add any instructions needed for the
2310 epilogue. Using an epilogue insn is favored compared to putting all of the
2311 instructions in output_function_prologue(), since it allows the scheduler
2312 to intermix instructions with the saves of the caller saved registers. In
2313 some cases, it might be necessary to emit a barrier instruction as the last
2314 insn to prevent such scheduling. */
2317 ia64_expand_epilogue (sibcall_p)
2320 rtx insn, reg, alt_reg, ar_unat_save_reg;
2321 int regno, alt_regno, cfa_off;
2323 ia64_compute_frame_size (get_frame_size ());
2325 /* If there is a frame pointer, then we use it instead of the stack
2326 pointer, so that the stack pointer does not need to be valid when
2327 the epilogue starts. See EXIT_IGNORE_STACK. */
2328 if (frame_pointer_needed)
2329 setup_spill_pointers (current_frame_info.n_spilled,
2330 hard_frame_pointer_rtx, 0);
2332 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2333 current_frame_info.total_size);
2335 if (current_frame_info.total_size != 0)
2337 /* ??? At this point we must generate a magic insn that appears to
2338 modify the spill iterators and the frame pointer. This would
2339 allow the most scheduling freedom. For now, just hard stop. */
2340 emit_insn (gen_blockage ());
2343 /* Locate the bottom of the register save area. */
2344 cfa_off = (current_frame_info.spill_cfa_off
2345 + current_frame_info.spill_size
2346 + current_frame_info.extra_spill_size);
2348 /* Restore the predicate registers. */
2349 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2351 if (current_frame_info.reg_save_pr != 0)
2352 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2355 alt_regno = next_scratch_gr_reg ();
2356 alt_reg = gen_rtx_REG (DImode, alt_regno);
2357 do_restore (gen_movdi_x, alt_reg, cfa_off);
2360 reg = gen_rtx_REG (DImode, PR_REG (0));
2361 emit_move_insn (reg, alt_reg);
2364 /* Restore the application registers. */
2366 /* Load the saved unat from the stack, but do not restore it until
2367 after the GRs have been restored. */
2368 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2370 if (current_frame_info.reg_save_ar_unat != 0)
2372 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2375 alt_regno = next_scratch_gr_reg ();
2376 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2377 current_frame_info.gr_used_mask |= 1 << alt_regno;
2378 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2383 ar_unat_save_reg = NULL_RTX;
2385 if (current_frame_info.reg_save_ar_pfs != 0)
2387 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2388 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2389 emit_move_insn (reg, alt_reg);
2391 else if (! current_function_is_leaf)
2393 alt_regno = next_scratch_gr_reg ();
2394 alt_reg = gen_rtx_REG (DImode, alt_regno);
2395 do_restore (gen_movdi_x, alt_reg, cfa_off);
2397 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2398 emit_move_insn (reg, alt_reg);
2401 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2403 if (current_frame_info.reg_save_ar_lc != 0)
2404 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2407 alt_regno = next_scratch_gr_reg ();
2408 alt_reg = gen_rtx_REG (DImode, alt_regno);
2409 do_restore (gen_movdi_x, alt_reg, cfa_off);
2412 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2413 emit_move_insn (reg, alt_reg);
2416 /* We should now be at the base of the gr/br/fr spill area. */
2417 if (cfa_off != (current_frame_info.spill_cfa_off
2418 + current_frame_info.spill_size))
2421 /* Restore all general registers. */
2422 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2423 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2425 reg = gen_rtx_REG (DImode, regno);
2426 do_restore (gen_gr_restore, reg, cfa_off);
2430 /* Restore the branch registers. Handle B0 specially, as it may
2431 have gotten stored in some GR register. */
2432 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2434 if (current_frame_info.reg_save_b0 != 0)
2435 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2438 alt_regno = next_scratch_gr_reg ();
2439 alt_reg = gen_rtx_REG (DImode, alt_regno);
2440 do_restore (gen_movdi_x, alt_reg, cfa_off);
2443 reg = gen_rtx_REG (DImode, BR_REG (0));
2444 emit_move_insn (reg, alt_reg);
2447 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2448 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2450 alt_regno = next_scratch_gr_reg ();
2451 alt_reg = gen_rtx_REG (DImode, alt_regno);
2452 do_restore (gen_movdi_x, alt_reg, cfa_off);
2454 reg = gen_rtx_REG (DImode, regno);
2455 emit_move_insn (reg, alt_reg);
2458 /* Restore floating point registers. */
2459 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2460 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2464 reg = gen_rtx_REG (TFmode, regno);
2465 do_restore (gen_fr_restore_x, reg, cfa_off);
2469 /* Restore ar.unat for real. */
2470 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2472 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2473 emit_move_insn (reg, ar_unat_save_reg);
2476 if (cfa_off != current_frame_info.spill_cfa_off)
2479 finish_spill_pointers ();
2481 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2483 /* ??? At this point we must generate a magic insn that appears to
2484 modify the spill iterators, the stack pointer, and the frame
2485 pointer. This would allow the most scheduling freedom. For now,
2487 emit_insn (gen_blockage ());
2490 if (cfun->machine->ia64_eh_epilogue_sp)
2491 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2492 else if (frame_pointer_needed)
2494 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2495 RTX_FRAME_RELATED_P (insn) = 1;
2497 else if (current_frame_info.total_size)
2499 rtx offset, frame_size_rtx;
2501 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2502 if (CONST_OK_FOR_I (current_frame_info.total_size))
2503 offset = frame_size_rtx;
2506 regno = next_scratch_gr_reg ();
2507 offset = gen_rtx_REG (DImode, regno);
2508 emit_move_insn (offset, frame_size_rtx);
2511 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2514 RTX_FRAME_RELATED_P (insn) = 1;
2515 if (GET_CODE (offset) != CONST_INT)
2518 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2519 gen_rtx_SET (VOIDmode,
2521 gen_rtx_PLUS (DImode,
2528 if (cfun->machine->ia64_eh_epilogue_bsp)
2529 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2532 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2535 int fp = GR_REG (2);
2536 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2537 first available call clobbered register. If there was a frame_pointer
2538 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2539 so we have to make sure we're using the string "r2" when emitting
2540 the register name for the assmbler. */
2541 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2542 fp = HARD_FRAME_POINTER_REGNUM;
2544 /* We must emit an alloc to force the input registers to become output
2545 registers. Otherwise, if the callee tries to pass its parameters
2546 through to another call without an intervening alloc, then these
2548 /* ??? We don't need to preserve all input registers. We only need to
2549 preserve those input registers used as arguments to the sibling call.
2550 It is unclear how to compute that number here. */
2551 if (current_frame_info.n_input_regs != 0)
2552 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2553 GEN_INT (0), GEN_INT (0),
2554 GEN_INT (current_frame_info.n_input_regs),
2559 /* Return 1 if br.ret can do all the work required to return from a
2563 ia64_direct_return ()
2565 if (reload_completed && ! frame_pointer_needed)
2567 ia64_compute_frame_size (get_frame_size ());
2569 return (current_frame_info.total_size == 0
2570 && current_frame_info.n_spilled == 0
2571 && current_frame_info.reg_save_b0 == 0
2572 && current_frame_info.reg_save_pr == 0
2573 && current_frame_info.reg_save_ar_pfs == 0
2574 && current_frame_info.reg_save_ar_unat == 0
2575 && current_frame_info.reg_save_ar_lc == 0);
2581 ia64_hard_regno_rename_ok (from, to)
2585 /* Don't clobber any of the registers we reserved for the prologue. */
2586 if (to == current_frame_info.reg_fp
2587 || to == current_frame_info.reg_save_b0
2588 || to == current_frame_info.reg_save_pr
2589 || to == current_frame_info.reg_save_ar_pfs
2590 || to == current_frame_info.reg_save_ar_unat
2591 || to == current_frame_info.reg_save_ar_lc)
2594 if (from == current_frame_info.reg_fp
2595 || from == current_frame_info.reg_save_b0
2596 || from == current_frame_info.reg_save_pr
2597 || from == current_frame_info.reg_save_ar_pfs
2598 || from == current_frame_info.reg_save_ar_unat
2599 || from == current_frame_info.reg_save_ar_lc)
2602 /* Don't use output registers outside the register frame. */
2603 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2606 /* Retain even/oddness on predicate register pairs. */
2607 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2608 return (from & 1) == (to & 1);
2610 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2611 if (from == GR_REG (4) && current_function_calls_setjmp)
2617 /* Target hook for assembling integer objects. Handle word-sized
2618 aligned objects and detect the cases when @fptr is needed. */
2621 ia64_assemble_integer (x, size, aligned_p)
2626 if (size == UNITS_PER_WORD && aligned_p
2627 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
2628 && GET_CODE (x) == SYMBOL_REF
2629 && SYMBOL_REF_FLAG (x))
2631 fputs ("\tdata8\t@fptr(", asm_out_file);
2632 output_addr_const (asm_out_file, x);
2633 fputs (")\n", asm_out_file);
2636 return default_assemble_integer (x, size, aligned_p);
2639 /* Emit the function prologue. */
2642 ia64_output_function_prologue (file, size)
2644 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2646 int mask, grsave, grsave_prev;
2648 if (current_frame_info.need_regstk)
2649 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2650 current_frame_info.n_input_regs,
2651 current_frame_info.n_local_regs,
2652 current_frame_info.n_output_regs,
2653 current_frame_info.n_rotate_regs);
2655 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2658 /* Emit the .prologue directive. */
2661 grsave = grsave_prev = 0;
2662 if (current_frame_info.reg_save_b0 != 0)
2665 grsave = grsave_prev = current_frame_info.reg_save_b0;
2667 if (current_frame_info.reg_save_ar_pfs != 0
2668 && (grsave_prev == 0
2669 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2672 if (grsave_prev == 0)
2673 grsave = current_frame_info.reg_save_ar_pfs;
2674 grsave_prev = current_frame_info.reg_save_ar_pfs;
2676 if (current_frame_info.reg_fp != 0
2677 && (grsave_prev == 0
2678 || current_frame_info.reg_fp == grsave_prev + 1))
2681 if (grsave_prev == 0)
2682 grsave = HARD_FRAME_POINTER_REGNUM;
2683 grsave_prev = current_frame_info.reg_fp;
2685 if (current_frame_info.reg_save_pr != 0
2686 && (grsave_prev == 0
2687 || current_frame_info.reg_save_pr == grsave_prev + 1))
2690 if (grsave_prev == 0)
2691 grsave = current_frame_info.reg_save_pr;
2695 fprintf (file, "\t.prologue %d, %d\n", mask,
2696 ia64_dbx_register_number (grsave));
2698 fputs ("\t.prologue\n", file);
2700 /* Emit a .spill directive, if necessary, to relocate the base of
2701 the register spill area. */
2702 if (current_frame_info.spill_cfa_off != -16)
2703 fprintf (file, "\t.spill %ld\n",
2704 (long) (current_frame_info.spill_cfa_off
2705 + current_frame_info.spill_size));
2708 /* Emit the .body directive at the scheduled end of the prologue. */
2711 ia64_output_function_end_prologue (file)
2714 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2717 fputs ("\t.body\n", file);
2720 /* Emit the function epilogue. */
2723 ia64_output_function_epilogue (file, size)
2724 FILE *file ATTRIBUTE_UNUSED;
2725 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2729 /* Reset from the function's potential modifications. */
2730 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
2732 if (current_frame_info.reg_fp)
2734 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2735 reg_names[HARD_FRAME_POINTER_REGNUM]
2736 = reg_names[current_frame_info.reg_fp];
2737 reg_names[current_frame_info.reg_fp] = tmp;
2739 if (! TARGET_REG_NAMES)
2741 for (i = 0; i < current_frame_info.n_input_regs; i++)
2742 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2743 for (i = 0; i < current_frame_info.n_local_regs; i++)
2744 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2745 for (i = 0; i < current_frame_info.n_output_regs; i++)
2746 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2749 current_frame_info.initialized = 0;
2753 ia64_dbx_register_number (regno)
2756 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2757 from its home at loc79 to something inside the register frame. We
2758 must perform the same renumbering here for the debug info. */
2759 if (current_frame_info.reg_fp)
2761 if (regno == HARD_FRAME_POINTER_REGNUM)
2762 regno = current_frame_info.reg_fp;
2763 else if (regno == current_frame_info.reg_fp)
2764 regno = HARD_FRAME_POINTER_REGNUM;
2767 if (IN_REGNO_P (regno))
2768 return 32 + regno - IN_REG (0);
2769 else if (LOC_REGNO_P (regno))
2770 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2771 else if (OUT_REGNO_P (regno))
2772 return (32 + current_frame_info.n_input_regs
2773 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2779 ia64_initialize_trampoline (addr, fnaddr, static_chain)
2780 rtx addr, fnaddr, static_chain;
2782 rtx addr_reg, eight = GEN_INT (8);
2784 /* Load up our iterator. */
2785 addr_reg = gen_reg_rtx (Pmode);
2786 emit_move_insn (addr_reg, addr);
2788 /* The first two words are the fake descriptor:
2789 __ia64_trampoline, ADDR+16. */
2790 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2791 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2792 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2794 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2795 copy_to_reg (plus_constant (addr, 16)));
2796 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2798 /* The third word is the target descriptor. */
2799 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2800 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2802 /* The fourth word is the static chain. */
2803 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2806 /* Do any needed setup for a variadic function. CUM has not been updated
2807 for the last named argument which has type TYPE and mode MODE.
2809 We generate the actual spill instructions during prologue generation. */
2812 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
2813 CUMULATIVE_ARGS cum;
2817 int second_time ATTRIBUTE_UNUSED;
2819 /* If this is a stdarg function, then skip the current argument. */
2820 if (! current_function_varargs)
2821 ia64_function_arg_advance (&cum, int_mode, type, 1);
2823 if (cum.words < MAX_ARGUMENT_SLOTS)
2825 int n = MAX_ARGUMENT_SLOTS - cum.words;
2826 *pretend_size = n * UNITS_PER_WORD;
2827 cfun->machine->n_varargs = n;
2831 /* Check whether TYPE is a homogeneous floating point aggregate. If
2832 it is, return the mode of the floating point type that appears
2833 in all leafs. If it is not, return VOIDmode.
2835 An aggregate is a homogeneous floating point aggregate is if all
2836 fields/elements in it have the same floating point type (e.g,
2837 SFmode). 128-bit quad-precision floats are excluded. */
2839 static enum machine_mode
2840 hfa_element_mode (type, nested)
2844 enum machine_mode element_mode = VOIDmode;
2845 enum machine_mode mode;
2846 enum tree_code code = TREE_CODE (type);
2847 int know_element_mode = 0;
2852 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2853 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2854 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2855 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2859 /* Fortran complex types are supposed to be HFAs, so we need to handle
2860 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2863 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
2864 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
2865 * BITS_PER_UNIT, MODE_FLOAT, 0);
2870 /* ??? Should exclude 128-bit long double here. */
2871 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2872 mode if this is contained within an aggregate. */
2874 return TYPE_MODE (type);
2879 return hfa_element_mode (TREE_TYPE (type), 1);
2883 case QUAL_UNION_TYPE:
2884 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2886 if (TREE_CODE (t) != FIELD_DECL)
2889 mode = hfa_element_mode (TREE_TYPE (t), 1);
2890 if (know_element_mode)
2892 if (mode != element_mode)
2895 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
2899 know_element_mode = 1;
2900 element_mode = mode;
2903 return element_mode;
2906 /* If we reach here, we probably have some front-end specific type
2907 that the backend doesn't know about. This can happen via the
2908 aggregate_value_p call in init_function_start. All we can do is
2909 ignore unknown tree types. */
2916 /* Return rtx for register where argument is passed, or zero if it is passed
2919 /* ??? 128-bit quad-precision floats are always passed in general
2923 ia64_function_arg (cum, mode, type, named, incoming)
2924 CUMULATIVE_ARGS *cum;
2925 enum machine_mode mode;
2930 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
2931 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2932 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2935 enum machine_mode hfa_mode = VOIDmode;
2937 /* Integer and float arguments larger than 8 bytes start at the next even
2938 boundary. Aggregates larger than 8 bytes start at the next even boundary
2939 if the aggregate has 16 byte alignment. Net effect is that types with
2940 alignment greater than 8 start at the next even boundary. */
2941 /* ??? The ABI does not specify how to handle aggregates with alignment from
2942 9 to 15 bytes, or greater than 16. We handle them all as if they had
2943 16 byte alignment. Such aggregates can occur only if gcc extensions are
2945 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2947 && (cum->words & 1))
2950 /* If all argument slots are used, then it must go on the stack. */
2951 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2954 /* Check for and handle homogeneous FP aggregates. */
2956 hfa_mode = hfa_element_mode (type, 0);
2958 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2959 and unprototyped hfas are passed specially. */
2960 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2964 int fp_regs = cum->fp_regs;
2965 int int_regs = cum->words + offset;
2966 int hfa_size = GET_MODE_SIZE (hfa_mode);
2970 /* If prototyped, pass it in FR regs then GR regs.
2971 If not prototyped, pass it in both FR and GR regs.
2973 If this is an SFmode aggregate, then it is possible to run out of
2974 FR regs while GR regs are still left. In that case, we pass the
2975 remaining part in the GR regs. */
2977 /* Fill the FP regs. We do this always. We stop if we reach the end
2978 of the argument, the last FP register, or the last argument slot. */
2980 byte_size = ((mode == BLKmode)
2981 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2982 args_byte_size = int_regs * UNITS_PER_WORD;
2984 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2985 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
2987 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2988 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
2992 args_byte_size += hfa_size;
2996 /* If no prototype, then the whole thing must go in GR regs. */
2997 if (! cum->prototype)
2999 /* If this is an SFmode aggregate, then we might have some left over
3000 that needs to go in GR regs. */
3001 else if (byte_size != offset)
3002 int_regs += offset / UNITS_PER_WORD;
3004 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3006 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3008 enum machine_mode gr_mode = DImode;
3010 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3011 then this goes in a GR reg left adjusted/little endian, right
3012 adjusted/big endian. */
3013 /* ??? Currently this is handled wrong, because 4-byte hunks are
3014 always right adjusted/little endian. */
3017 /* If we have an even 4 byte hunk because the aggregate is a
3018 multiple of 4 bytes in size, then this goes in a GR reg right
3019 adjusted/little endian. */
3020 else if (byte_size - offset == 4)
3022 /* Complex floats need to have float mode. */
3023 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3026 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3027 gen_rtx_REG (gr_mode, (basereg
3030 offset += GET_MODE_SIZE (gr_mode);
3031 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
3032 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
3035 /* If we ended up using just one location, just return that one loc. */
3037 return XEXP (loc[0], 0);
3039 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3042 /* Integral and aggregates go in general registers. If we have run out of
3043 FR registers, then FP values must also go in general registers. This can
3044 happen when we have a SFmode HFA. */
3045 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
3046 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3047 return gen_rtx_REG (mode, basereg + cum->words + offset);
3049 /* If there is a prototype, then FP values go in a FR register when
3050 named, and in a GR registeer when unnamed. */
3051 else if (cum->prototype)
3054 return gen_rtx_REG (mode, basereg + cum->words + offset);
3056 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3058 /* If there is no prototype, then FP values go in both FR and GR
3062 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3063 gen_rtx_REG (mode, (FR_ARG_FIRST
3066 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3068 (basereg + cum->words
3072 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3076 /* Return number of words, at the beginning of the argument, that must be
3077 put in registers. 0 is the argument is entirely in registers or entirely
3081 ia64_function_arg_partial_nregs (cum, mode, type, named)
3082 CUMULATIVE_ARGS *cum;
3083 enum machine_mode mode;
3085 int named ATTRIBUTE_UNUSED;
3087 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3088 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3092 /* Arguments with alignment larger than 8 bytes start at the next even
3094 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3096 && (cum->words & 1))
3099 /* If all argument slots are used, then it must go on the stack. */
3100 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3103 /* It doesn't matter whether the argument goes in FR or GR regs. If
3104 it fits within the 8 argument slots, then it goes entirely in
3105 registers. If it extends past the last argument slot, then the rest
3106 goes on the stack. */
3108 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3111 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3114 /* Update CUM to point after this argument. This is patterned after
3115 ia64_function_arg. */
3118 ia64_function_arg_advance (cum, mode, type, named)
3119 CUMULATIVE_ARGS *cum;
3120 enum machine_mode mode;
3124 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3125 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3128 enum machine_mode hfa_mode = VOIDmode;
3130 /* If all arg slots are already full, then there is nothing to do. */
3131 if (cum->words >= MAX_ARGUMENT_SLOTS)
3134 /* Arguments with alignment larger than 8 bytes start at the next even
3136 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3138 && (cum->words & 1))
3141 cum->words += words + offset;
3143 /* Check for and handle homogeneous FP aggregates. */
3145 hfa_mode = hfa_element_mode (type, 0);
3147 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3148 and unprototyped hfas are passed specially. */
3149 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3151 int fp_regs = cum->fp_regs;
3152 /* This is the original value of cum->words + offset. */
3153 int int_regs = cum->words - words;
3154 int hfa_size = GET_MODE_SIZE (hfa_mode);
3158 /* If prototyped, pass it in FR regs then GR regs.
3159 If not prototyped, pass it in both FR and GR regs.
3161 If this is an SFmode aggregate, then it is possible to run out of
3162 FR regs while GR regs are still left. In that case, we pass the
3163 remaining part in the GR regs. */
3165 /* Fill the FP regs. We do this always. We stop if we reach the end
3166 of the argument, the last FP register, or the last argument slot. */
3168 byte_size = ((mode == BLKmode)
3169 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3170 args_byte_size = int_regs * UNITS_PER_WORD;
3172 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3173 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3176 args_byte_size += hfa_size;
3180 cum->fp_regs = fp_regs;
3183 /* Integral and aggregates go in general registers. If we have run out of
3184 FR registers, then FP values must also go in general registers. This can
3185 happen when we have a SFmode HFA. */
3186 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3187 cum->int_regs = cum->words;
3189 /* If there is a prototype, then FP values go in a FR register when
3190 named, and in a GR registeer when unnamed. */
3191 else if (cum->prototype)
3194 cum->int_regs = cum->words;
3196 /* ??? Complex types should not reach here. */
3197 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3199 /* If there is no prototype, then FP values go in both FR and GR
3203 /* ??? Complex types should not reach here. */
3204 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3205 cum->int_regs = cum->words;
3209 /* Variable sized types are passed by reference. */
3210 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3213 ia64_function_arg_pass_by_reference (cum, mode, type, named)
3214 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
3215 enum machine_mode mode ATTRIBUTE_UNUSED;
3217 int named ATTRIBUTE_UNUSED;
3219 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3222 /* Implement va_start. */
3225 ia64_va_start (stdarg_p, valist, nextarg)
3233 arg_words = current_function_args_info.words;
3238 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
3240 nextarg = plus_constant (nextarg, ofs);
3241 std_expand_builtin_va_start (1, valist, nextarg);
3244 /* Implement va_arg. */
3247 ia64_va_arg (valist, type)
3252 /* Variable sized types are passed by reference. */
3253 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3255 rtx addr = std_expand_builtin_va_arg (valist, build_pointer_type (type));
3256 return gen_rtx_MEM (ptr_mode, force_reg (Pmode, addr));
3259 /* Arguments with alignment larger than 8 bytes start at the next even
3261 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3263 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3264 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3265 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3266 build_int_2 (-2 * UNITS_PER_WORD, -1));
3267 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3268 TREE_SIDE_EFFECTS (t) = 1;
3269 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3272 return std_expand_builtin_va_arg (valist, type);
3275 /* Return 1 if function return value returned in memory. Return 0 if it is
3279 ia64_return_in_memory (valtype)
3282 enum machine_mode mode;
3283 enum machine_mode hfa_mode;
3284 HOST_WIDE_INT byte_size;
3286 mode = TYPE_MODE (valtype);
3287 byte_size = GET_MODE_SIZE (mode);
3288 if (mode == BLKmode)
3290 byte_size = int_size_in_bytes (valtype);
3295 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3297 hfa_mode = hfa_element_mode (valtype, 0);
3298 if (hfa_mode != VOIDmode)
3300 int hfa_size = GET_MODE_SIZE (hfa_mode);
3302 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3307 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3313 /* Return rtx for register that holds the function return value. */
3316 ia64_function_value (valtype, func)
3318 tree func ATTRIBUTE_UNUSED;
3320 enum machine_mode mode;
3321 enum machine_mode hfa_mode;
3323 mode = TYPE_MODE (valtype);
3324 hfa_mode = hfa_element_mode (valtype, 0);
3326 if (hfa_mode != VOIDmode)
3334 hfa_size = GET_MODE_SIZE (hfa_mode);
3335 byte_size = ((mode == BLKmode)
3336 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3338 for (i = 0; offset < byte_size; i++)
3340 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3341 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3347 return XEXP (loc[0], 0);
3349 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3351 else if (FLOAT_TYPE_P (valtype) &&
3352 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3353 return gen_rtx_REG (mode, FR_ARG_FIRST);
3355 return gen_rtx_REG (mode, GR_RET_FIRST);
3358 /* Print a memory address as an operand to reference that memory location. */
3360 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3361 also call this from ia64_print_operand for memory addresses. */
3364 ia64_print_operand_address (stream, address)
3365 FILE * stream ATTRIBUTE_UNUSED;
3366 rtx address ATTRIBUTE_UNUSED;
3370 /* Print an operand to an assembler instruction.
3371 C Swap and print a comparison operator.
3372 D Print an FP comparison operator.
3373 E Print 32 - constant, for SImode shifts as extract.
3374 e Print 64 - constant, for DImode rotates.
3375 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3376 a floating point register emitted normally.
3377 I Invert a predicate register by adding 1.
3378 J Select the proper predicate register for a condition.
3379 j Select the inverse predicate register for a condition.
3380 O Append .acq for volatile load.
3381 P Postincrement of a MEM.
3382 Q Append .rel for volatile store.
3383 S Shift amount for shladd instruction.
3384 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3385 for Intel assembler.
3386 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3387 for Intel assembler.
3388 r Print register name, or constant 0 as r0. HP compatibility for
3391 ia64_print_operand (file, x, code)
3401 /* Handled below. */
3406 enum rtx_code c = swap_condition (GET_CODE (x));
3407 fputs (GET_RTX_NAME (c), file);
3412 switch (GET_CODE (x))
3424 str = GET_RTX_NAME (GET_CODE (x));
3431 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3435 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3439 if (x == CONST0_RTX (GET_MODE (x)))
3440 str = reg_names [FR_REG (0)];
3441 else if (x == CONST1_RTX (GET_MODE (x)))
3442 str = reg_names [FR_REG (1)];
3443 else if (GET_CODE (x) == REG)
3444 str = reg_names [REGNO (x)];
3451 fputs (reg_names [REGNO (x) + 1], file);
3457 unsigned int regno = REGNO (XEXP (x, 0));
3458 if (GET_CODE (x) == EQ)
3462 fputs (reg_names [regno], file);
3467 if (MEM_VOLATILE_P (x))
3468 fputs(".acq", file);
3473 HOST_WIDE_INT value;
3475 switch (GET_CODE (XEXP (x, 0)))
3481 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3482 if (GET_CODE (x) == CONST_INT)
3484 else if (GET_CODE (x) == REG)
3486 fprintf (file, ", %s", reg_names[REGNO (x)]);
3494 value = GET_MODE_SIZE (GET_MODE (x));
3498 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3504 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3509 if (MEM_VOLATILE_P (x))
3510 fputs(".rel", file);
3514 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3518 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3520 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3526 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3528 const char *prefix = "0x";
3529 if (INTVAL (x) & 0x80000000)
3531 fprintf (file, "0xffffffff");
3534 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3540 /* If this operand is the constant zero, write it as register zero.
3541 Any register, zero, or CONST_INT value is OK here. */
3542 if (GET_CODE (x) == REG)
3543 fputs (reg_names[REGNO (x)], file);
3544 else if (x == CONST0_RTX (GET_MODE (x)))
3546 else if (GET_CODE (x) == CONST_INT)
3547 output_addr_const (file, x);
3549 output_operand_lossage ("invalid %%r value");
3556 /* For conditional branches, returns or calls, substitute
3557 sptk, dptk, dpnt, or spnt for %s. */
3558 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3561 int pred_val = INTVAL (XEXP (x, 0));
3563 /* Guess top and bottom 10% statically predicted. */
3564 if (pred_val < REG_BR_PROB_BASE / 50)
3566 else if (pred_val < REG_BR_PROB_BASE / 2)
3568 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3573 else if (GET_CODE (current_output_insn) == CALL_INSN)
3578 fputs (which, file);
3583 x = current_insn_predicate;
3586 unsigned int regno = REGNO (XEXP (x, 0));
3587 if (GET_CODE (x) == EQ)
3589 fprintf (file, "(%s) ", reg_names [regno]);
3594 output_operand_lossage ("ia64_print_operand: unknown code");
3598 switch (GET_CODE (x))
3600 /* This happens for the spill/restore instructions. */
3605 /* ... fall through ... */
3608 fputs (reg_names [REGNO (x)], file);
3613 rtx addr = XEXP (x, 0);
3614 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
3615 addr = XEXP (addr, 0);
3616 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3621 output_addr_const (file, x);
3628 /* Calulate the cost of moving data from a register in class FROM to
3629 one in class TO, using MODE. */
3632 ia64_register_move_cost (mode, from, to)
3633 enum machine_mode mode;
3634 enum reg_class from, to;
3636 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3637 if (to == ADDL_REGS)
3639 if (from == ADDL_REGS)
3642 /* All costs are symmetric, so reduce cases by putting the
3643 lower number class as the destination. */
3646 enum reg_class tmp = to;
3647 to = from, from = tmp;
3650 /* Moving from FR<->GR in TFmode must be more expensive than 2,
3651 so that we get secondary memory reloads. Between FR_REGS,
3652 we have to make this at least as expensive as MEMORY_MOVE_COST
3653 to avoid spectacularly poor register class preferencing. */
3656 if (to != GR_REGS || from != GR_REGS)
3657 return MEMORY_MOVE_COST (mode, to, 0);
3665 /* Moving between PR registers takes two insns. */
3666 if (from == PR_REGS)
3668 /* Moving between PR and anything but GR is impossible. */
3669 if (from != GR_REGS)
3670 return MEMORY_MOVE_COST (mode, to, 0);
3674 /* Moving between BR and anything but GR is impossible. */
3675 if (from != GR_REGS && from != GR_AND_BR_REGS)
3676 return MEMORY_MOVE_COST (mode, to, 0);
3681 /* Moving between AR and anything but GR is impossible. */
3682 if (from != GR_REGS)
3683 return MEMORY_MOVE_COST (mode, to, 0);
3688 case GR_AND_FR_REGS:
3689 case GR_AND_BR_REGS:
3700 /* This function returns the register class required for a secondary
3701 register when copying between one of the registers in CLASS, and X,
3702 using MODE. A return value of NO_REGS means that no secondary register
3706 ia64_secondary_reload_class (class, mode, x)
3707 enum reg_class class;
3708 enum machine_mode mode ATTRIBUTE_UNUSED;
3713 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3714 regno = true_regnum (x);
3721 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
3722 interaction. We end up with two pseudos with overlapping lifetimes
3723 both of which are equiv to the same constant, and both which need
3724 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
3725 changes depending on the path length, which means the qty_first_reg
3726 check in make_regs_eqv can give different answers at different times.
3727 At some point I'll probably need a reload_indi pattern to handle
3730 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
3731 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
3732 non-general registers for good measure. */
3733 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
3736 /* This is needed if a pseudo used as a call_operand gets spilled to a
3738 if (GET_CODE (x) == MEM)
3743 /* Need to go through general regsters to get to other class regs. */
3744 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
3747 /* This can happen when a paradoxical subreg is an operand to the
3749 /* ??? This shouldn't be necessary after instruction scheduling is
3750 enabled, because paradoxical subregs are not accepted by
3751 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3752 stop the paradoxical subreg stupidity in the *_operand functions
3754 if (GET_CODE (x) == MEM
3755 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3756 || GET_MODE (x) == QImode))
3759 /* This can happen because of the ior/and/etc patterns that accept FP
3760 registers as operands. If the third operand is a constant, then it
3761 needs to be reloaded into a FP register. */
3762 if (GET_CODE (x) == CONST_INT)
3765 /* This can happen because of register elimination in a muldi3 insn.
3766 E.g. `26107 * (unsigned long)&u'. */
3767 if (GET_CODE (x) == PLUS)
3772 /* ??? This happens if we cse/gcse a BImode value across a call,
3773 and the function has a nonlocal goto. This is because global
3774 does not allocate call crossing pseudos to hard registers when
3775 current_function_has_nonlocal_goto is true. This is relatively
3776 common for C++ programs that use exceptions. To reproduce,
3777 return NO_REGS and compile libstdc++. */
3778 if (GET_CODE (x) == MEM)
3781 /* This can happen when we take a BImode subreg of a DImode value,
3782 and that DImode value winds up in some non-GR register. */
3783 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
3788 /* Since we have no offsettable memory addresses, we need a temporary
3789 to hold the address of the second word. */
3802 /* Emit text to declare externally defined variables and functions, because
3803 the Intel assembler does not support undefined externals. */
3806 ia64_asm_output_external (file, decl, name)
3811 int save_referenced;
3813 /* GNU as does not need anything here. */
3817 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3818 the linker when we do this, so we need to be careful not to do this for
3819 builtin functions which have no library equivalent. Unfortunately, we
3820 can't tell here whether or not a function will actually be called by
3821 expand_expr, so we pull in library functions even if we may not need
3823 if (! strcmp (name, "__builtin_next_arg")
3824 || ! strcmp (name, "alloca")
3825 || ! strcmp (name, "__builtin_constant_p")
3826 || ! strcmp (name, "__builtin_args_info"))
3829 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3831 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
3832 if (TREE_CODE (decl) == FUNCTION_DECL)
3834 fprintf (file, "%s", TYPE_ASM_OP);
3835 assemble_name (file, name);
3837 fprintf (file, TYPE_OPERAND_FMT, "function");
3840 ASM_GLOBALIZE_LABEL (file, name);
3841 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
3844 /* Parse the -mfixed-range= option string. */
3847 fix_range (const_str)
3848 const char *const_str;
3851 char *str, *dash, *comma;
3853 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3854 REG2 are either register names or register numbers. The effect
3855 of this option is to mark the registers in the range from REG1 to
3856 REG2 as ``fixed'' so they won't be used by the compiler. This is
3857 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3859 i = strlen (const_str);
3860 str = (char *) alloca (i + 1);
3861 memcpy (str, const_str, i + 1);
3865 dash = strchr (str, '-');
3868 warning ("value of -mfixed-range must have form REG1-REG2");
3873 comma = strchr (dash + 1, ',');
3877 first = decode_reg_name (str);
3880 warning ("unknown register name: %s", str);
3884 last = decode_reg_name (dash + 1);
3887 warning ("unknown register name: %s", dash + 1);
3895 warning ("%s-%s is an empty range", str, dash + 1);
3899 for (i = first; i <= last; ++i)
3900 fixed_regs[i] = call_used_regs[i] = 1;
3910 /* Called to register all of our global variables with the garbage
3914 ia64_add_gc_roots ()
3916 ggc_add_rtx_root (&ia64_compare_op0, 1);
3917 ggc_add_rtx_root (&ia64_compare_op1, 1);
3921 ia64_init_machine_status (p)
3925 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3929 ia64_mark_machine_status (p)
3932 struct machine_function *machine = p->machine;
3936 ggc_mark_rtx (machine->ia64_eh_epilogue_sp);
3937 ggc_mark_rtx (machine->ia64_eh_epilogue_bsp);
3938 ggc_mark_rtx (machine->ia64_gp_save);
3943 ia64_free_machine_status (p)
3950 /* Handle TARGET_OPTIONS switches. */
3953 ia64_override_options ()
3955 if (TARGET_AUTO_PIC)
3956 target_flags |= MASK_CONST_GP;
3958 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
3960 warning ("cannot optimize division for both latency and throughput");
3961 target_flags &= ~MASK_INLINE_DIV_THR;
3964 if (ia64_fixed_range_string)
3965 fix_range (ia64_fixed_range_string);
3967 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
3968 flag_schedule_insns_after_reload = 0;
3970 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
3972 init_machine_status = ia64_init_machine_status;
3973 mark_machine_status = ia64_mark_machine_status;
3974 free_machine_status = ia64_free_machine_status;
3976 ia64_add_gc_roots ();
3979 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
3980 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
3981 static enum attr_type ia64_safe_type PARAMS((rtx));
3983 static enum attr_itanium_requires_unit0
3984 ia64_safe_itanium_requires_unit0 (insn)
3987 if (recog_memoized (insn) >= 0)
3988 return get_attr_itanium_requires_unit0 (insn);
3990 return ITANIUM_REQUIRES_UNIT0_NO;
3993 static enum attr_itanium_class
3994 ia64_safe_itanium_class (insn)
3997 if (recog_memoized (insn) >= 0)
3998 return get_attr_itanium_class (insn);
4000 return ITANIUM_CLASS_UNKNOWN;
4003 static enum attr_type
4004 ia64_safe_type (insn)
4007 if (recog_memoized (insn) >= 0)
4008 return get_attr_type (insn);
4010 return TYPE_UNKNOWN;
4013 /* The following collection of routines emit instruction group stop bits as
4014 necessary to avoid dependencies. */
4016 /* Need to track some additional registers as far as serialization is
4017 concerned so we can properly handle br.call and br.ret. We could
4018 make these registers visible to gcc, but since these registers are
4019 never explicitly used in gcc generated code, it seems wasteful to
4020 do so (plus it would make the call and return patterns needlessly
4022 #define REG_GP (GR_REG (1))
4023 #define REG_RP (BR_REG (0))
4024 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4025 /* This is used for volatile asms which may require a stop bit immediately
4026 before and after them. */
4027 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4028 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4029 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4031 /* For each register, we keep track of how it has been written in the
4032 current instruction group.
4034 If a register is written unconditionally (no qualifying predicate),
4035 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4037 If a register is written if its qualifying predicate P is true, we
4038 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4039 may be written again by the complement of P (P^1) and when this happens,
4040 WRITE_COUNT gets set to 2.
4042 The result of this is that whenever an insn attempts to write a register
4043 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4045 If a predicate register is written by a floating-point insn, we set
4046 WRITTEN_BY_FP to true.
4048 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4049 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4051 struct reg_write_state
4053 unsigned int write_count : 2;
4054 unsigned int first_pred : 16;
4055 unsigned int written_by_fp : 1;
4056 unsigned int written_by_and : 1;
4057 unsigned int written_by_or : 1;
4060 /* Cumulative info for the current instruction group. */
4061 struct reg_write_state rws_sum[NUM_REGS];
4062 /* Info for the current instruction. This gets copied to rws_sum after a
4063 stop bit is emitted. */
4064 struct reg_write_state rws_insn[NUM_REGS];
4066 /* Indicates whether this is the first instruction after a stop bit,
4067 in which case we don't need another stop bit. Without this, we hit
4068 the abort in ia64_variable_issue when scheduling an alloc. */
4069 static int first_instruction;
4071 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4072 RTL for one instruction. */
4075 unsigned int is_write : 1; /* Is register being written? */
4076 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4077 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4078 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4079 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4080 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4083 static void rws_update PARAMS ((struct reg_write_state *, int,
4084 struct reg_flags, int));
4085 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
4086 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
4087 static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
4088 static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
4089 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
4090 static void init_insn_group_barriers PARAMS ((void));
4091 static int group_barrier_needed_p PARAMS ((rtx));
4092 static int safe_group_barrier_needed_p PARAMS ((rtx));
4094 /* Update *RWS for REGNO, which is being written by the current instruction,
4095 with predicate PRED, and associated register flags in FLAGS. */
4098 rws_update (rws, regno, flags, pred)
4099 struct reg_write_state *rws;
4101 struct reg_flags flags;
4105 rws[regno].write_count++;
4107 rws[regno].write_count = 2;
4108 rws[regno].written_by_fp |= flags.is_fp;
4109 /* ??? Not tracking and/or across differing predicates. */
4110 rws[regno].written_by_and = flags.is_and;
4111 rws[regno].written_by_or = flags.is_or;
4112 rws[regno].first_pred = pred;
4115 /* Handle an access to register REGNO of type FLAGS using predicate register
4116 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4117 a dependency with an earlier instruction in the same group. */
4120 rws_access_regno (regno, flags, pred)
4122 struct reg_flags flags;
4125 int need_barrier = 0;
4127 if (regno >= NUM_REGS)
4130 if (! PR_REGNO_P (regno))
4131 flags.is_and = flags.is_or = 0;
4137 /* One insn writes same reg multiple times? */
4138 if (rws_insn[regno].write_count > 0)
4141 /* Update info for current instruction. */
4142 rws_update (rws_insn, regno, flags, pred);
4143 write_count = rws_sum[regno].write_count;
4145 switch (write_count)
4148 /* The register has not been written yet. */
4149 rws_update (rws_sum, regno, flags, pred);
4153 /* The register has been written via a predicate. If this is
4154 not a complementary predicate, then we need a barrier. */
4155 /* ??? This assumes that P and P+1 are always complementary
4156 predicates for P even. */
4157 if (flags.is_and && rws_sum[regno].written_by_and)
4159 else if (flags.is_or && rws_sum[regno].written_by_or)
4161 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4163 rws_update (rws_sum, regno, flags, pred);
4167 /* The register has been unconditionally written already. We
4169 if (flags.is_and && rws_sum[regno].written_by_and)
4171 else if (flags.is_or && rws_sum[regno].written_by_or)
4175 rws_sum[regno].written_by_and = flags.is_and;
4176 rws_sum[regno].written_by_or = flags.is_or;
4185 if (flags.is_branch)
4187 /* Branches have several RAW exceptions that allow to avoid
4190 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4191 /* RAW dependencies on branch regs are permissible as long
4192 as the writer is a non-branch instruction. Since we
4193 never generate code that uses a branch register written
4194 by a branch instruction, handling this case is
4198 if (REGNO_REG_CLASS (regno) == PR_REGS
4199 && ! rws_sum[regno].written_by_fp)
4200 /* The predicates of a branch are available within the
4201 same insn group as long as the predicate was written by
4202 something other than a floating-point instruction. */
4206 if (flags.is_and && rws_sum[regno].written_by_and)
4208 if (flags.is_or && rws_sum[regno].written_by_or)
4211 switch (rws_sum[regno].write_count)
4214 /* The register has not been written yet. */
4218 /* The register has been written via a predicate. If this is
4219 not a complementary predicate, then we need a barrier. */
4220 /* ??? This assumes that P and P+1 are always complementary
4221 predicates for P even. */
4222 if ((rws_sum[regno].first_pred ^ 1) != pred)
4227 /* The register has been unconditionally written already. We
4237 return need_barrier;
4241 rws_access_reg (reg, flags, pred)
4243 struct reg_flags flags;
4246 int regno = REGNO (reg);
4247 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4250 return rws_access_regno (regno, flags, pred);
4253 int need_barrier = 0;
4255 need_barrier |= rws_access_regno (regno + n, flags, pred);
4256 return need_barrier;
4260 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4261 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4264 update_set_flags (x, pflags, ppred, pcond)
4266 struct reg_flags *pflags;
4270 rtx src = SET_SRC (x);
4274 switch (GET_CODE (src))
4280 if (SET_DEST (x) == pc_rtx)
4281 /* X is a conditional branch. */
4285 int is_complemented = 0;
4287 /* X is a conditional move. */
4288 rtx cond = XEXP (src, 0);
4289 if (GET_CODE (cond) == EQ)
4290 is_complemented = 1;
4291 cond = XEXP (cond, 0);
4292 if (GET_CODE (cond) != REG
4293 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4296 if (XEXP (src, 1) == SET_DEST (x)
4297 || XEXP (src, 2) == SET_DEST (x))
4299 /* X is a conditional move that conditionally writes the
4302 /* We need another complement in this case. */
4303 if (XEXP (src, 1) == SET_DEST (x))
4304 is_complemented = ! is_complemented;
4306 *ppred = REGNO (cond);
4307 if (is_complemented)
4311 /* ??? If this is a conditional write to the dest, then this
4312 instruction does not actually read one source. This probably
4313 doesn't matter, because that source is also the dest. */
4314 /* ??? Multiple writes to predicate registers are allowed
4315 if they are all AND type compares, or if they are all OR
4316 type compares. We do not generate such instructions
4319 /* ... fall through ... */
4322 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4323 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4324 /* Set pflags->is_fp to 1 so that we know we're dealing
4325 with a floating point comparison when processing the
4326 destination of the SET. */
4329 /* Discover if this is a parallel comparison. We only handle
4330 and.orcm and or.andcm at present, since we must retain a
4331 strict inverse on the predicate pair. */
4332 else if (GET_CODE (src) == AND)
4334 else if (GET_CODE (src) == IOR)
4341 /* Subroutine of rtx_needs_barrier; this function determines whether the
4342 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4343 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4347 set_src_needs_barrier (x, flags, pred, cond)
4349 struct reg_flags flags;
4353 int need_barrier = 0;
4355 rtx src = SET_SRC (x);
4357 if (GET_CODE (src) == CALL)
4358 /* We don't need to worry about the result registers that
4359 get written by subroutine call. */
4360 return rtx_needs_barrier (src, flags, pred);
4361 else if (SET_DEST (x) == pc_rtx)
4363 /* X is a conditional branch. */
4364 /* ??? This seems redundant, as the caller sets this bit for
4366 flags.is_branch = 1;
4367 return rtx_needs_barrier (src, flags, pred);
4370 need_barrier = rtx_needs_barrier (src, flags, pred);
4372 /* This instruction unconditionally uses a predicate register. */
4374 need_barrier |= rws_access_reg (cond, flags, 0);
4377 if (GET_CODE (dst) == ZERO_EXTRACT)
4379 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4380 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4381 dst = XEXP (dst, 0);
4383 return need_barrier;
4386 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4387 Return 1 is this access creates a dependency with an earlier instruction
4388 in the same group. */
4391 rtx_needs_barrier (x, flags, pred)
4393 struct reg_flags flags;
4397 int is_complemented = 0;
4398 int need_barrier = 0;
4399 const char *format_ptr;
4400 struct reg_flags new_flags;
4408 switch (GET_CODE (x))
4411 update_set_flags (x, &new_flags, &pred, &cond);
4412 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4413 if (GET_CODE (SET_SRC (x)) != CALL)
4415 new_flags.is_write = 1;
4416 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4421 new_flags.is_write = 0;
4422 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4424 /* Avoid multiple register writes, in case this is a pattern with
4425 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4426 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4428 new_flags.is_write = 1;
4429 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4430 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4431 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4436 /* X is a predicated instruction. */
4438 cond = COND_EXEC_TEST (x);
4441 need_barrier = rtx_needs_barrier (cond, flags, 0);
4443 if (GET_CODE (cond) == EQ)
4444 is_complemented = 1;
4445 cond = XEXP (cond, 0);
4446 if (GET_CODE (cond) != REG
4447 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4449 pred = REGNO (cond);
4450 if (is_complemented)
4453 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4454 return need_barrier;
4458 /* Clobber & use are for earlier compiler-phases only. */
4463 /* We always emit stop bits for traditional asms. We emit stop bits
4464 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4465 if (GET_CODE (x) != ASM_OPERANDS
4466 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4468 /* Avoid writing the register multiple times if we have multiple
4469 asm outputs. This avoids an abort in rws_access_reg. */
4470 if (! rws_insn[REG_VOLATILE].write_count)
4472 new_flags.is_write = 1;
4473 rws_access_regno (REG_VOLATILE, new_flags, pred);
4478 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4479 We can not just fall through here since then we would be confused
4480 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4481 traditional asms unlike their normal usage. */
4483 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4484 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4489 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4491 rtx pat = XVECEXP (x, 0, i);
4492 if (GET_CODE (pat) == SET)
4494 update_set_flags (pat, &new_flags, &pred, &cond);
4495 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4497 else if (GET_CODE (pat) == USE
4498 || GET_CODE (pat) == CALL
4499 || GET_CODE (pat) == ASM_OPERANDS)
4500 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4501 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4504 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4506 rtx pat = XVECEXP (x, 0, i);
4507 if (GET_CODE (pat) == SET)
4509 if (GET_CODE (SET_SRC (pat)) != CALL)
4511 new_flags.is_write = 1;
4512 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4516 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4517 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4525 if (REGNO (x) == AR_UNAT_REGNUM)
4527 for (i = 0; i < 64; ++i)
4528 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4531 need_barrier = rws_access_reg (x, flags, pred);
4535 /* Find the regs used in memory address computation. */
4536 new_flags.is_write = 0;
4537 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4540 case CONST_INT: case CONST_DOUBLE:
4541 case SYMBOL_REF: case LABEL_REF: case CONST:
4544 /* Operators with side-effects. */
4545 case POST_INC: case POST_DEC:
4546 if (GET_CODE (XEXP (x, 0)) != REG)
4549 new_flags.is_write = 0;
4550 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4551 new_flags.is_write = 1;
4552 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4556 if (GET_CODE (XEXP (x, 0)) != REG)
4559 new_flags.is_write = 0;
4560 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4561 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4562 new_flags.is_write = 1;
4563 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4566 /* Handle common unary and binary ops for efficiency. */
4567 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4568 case MOD: case UDIV: case UMOD: case AND: case IOR:
4569 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4570 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4571 case NE: case EQ: case GE: case GT: case LE:
4572 case LT: case GEU: case GTU: case LEU: case LTU:
4573 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4574 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4577 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4578 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4579 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4580 case SQRT: case FFS:
4581 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4585 switch (XINT (x, 1))
4587 case UNSPEC_GR_SPILL:
4588 case UNSPEC_GR_RESTORE:
4590 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4591 HOST_WIDE_INT bit = (offset >> 3) & 63;
4593 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4594 new_flags.is_write = (XINT (x, 1) == 1);
4595 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4600 case UNSPEC_FR_SPILL:
4601 case UNSPEC_FR_RESTORE:
4603 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4606 case UNSPEC_PRED_REL_MUTEX:
4607 case UNSPEC_PIC_CALL:
4609 case UNSPEC_FETCHADD_ACQ:
4610 case UNSPEC_BSP_VALUE:
4611 case UNSPEC_FLUSHRS:
4612 case UNSPEC_BUNDLE_SELECTOR:
4616 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4619 case UNSPEC_FR_RECIP_APPROX:
4620 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4621 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4624 case UNSPEC_CMPXCHG_ACQ:
4625 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4626 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4634 case UNSPEC_VOLATILE:
4635 switch (XINT (x, 1))
4638 /* Alloc must always be the first instruction of a group.
4639 We force this by always returning true. */
4640 /* ??? We might get better scheduling if we explicitly check for
4641 input/local/output register dependencies, and modify the
4642 scheduler so that alloc is always reordered to the start of
4643 the current group. We could then eliminate all of the
4644 first_instruction code. */
4645 rws_access_regno (AR_PFS_REGNUM, flags, pred);
4647 new_flags.is_write = 1;
4648 rws_access_regno (REG_AR_CFM, new_flags, pred);
4651 case UNSPECV_SET_BSP:
4655 case UNSPECV_BLOCKAGE:
4656 case UNSPECV_INSN_GROUP_BARRIER:
4658 case UNSPECV_PSAC_ALL:
4659 case UNSPECV_PSAC_NORMAL:
4668 new_flags.is_write = 0;
4669 need_barrier = rws_access_regno (REG_RP, flags, pred);
4670 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
4672 new_flags.is_write = 1;
4673 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4674 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4678 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4679 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4680 switch (format_ptr[i])
4682 case '0': /* unused field */
4683 case 'i': /* integer */
4684 case 'n': /* note */
4685 case 'w': /* wide integer */
4686 case 's': /* pointer to string */
4687 case 'S': /* optional pointer to string */
4691 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4696 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4697 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4706 return need_barrier;
4709 /* Clear out the state for group_barrier_needed_p at the start of a
4710 sequence of insns. */
4713 init_insn_group_barriers ()
4715 memset (rws_sum, 0, sizeof (rws_sum));
4716 first_instruction = 1;
4719 /* Given the current state, recorded by previous calls to this function,
4720 determine whether a group barrier (a stop bit) is necessary before INSN.
4721 Return nonzero if so. */
4724 group_barrier_needed_p (insn)
4728 int need_barrier = 0;
4729 struct reg_flags flags;
4731 memset (&flags, 0, sizeof (flags));
4732 switch (GET_CODE (insn))
4738 /* A barrier doesn't imply an instruction group boundary. */
4742 memset (rws_insn, 0, sizeof (rws_insn));
4746 flags.is_branch = 1;
4747 flags.is_sibcall = SIBLING_CALL_P (insn);
4748 memset (rws_insn, 0, sizeof (rws_insn));
4750 /* Don't bundle a call following another call. */
4751 if ((pat = prev_active_insn (insn))
4752 && GET_CODE (pat) == CALL_INSN)
4758 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4762 flags.is_branch = 1;
4764 /* Don't bundle a jump following a call. */
4765 if ((pat = prev_active_insn (insn))
4766 && GET_CODE (pat) == CALL_INSN)
4774 if (GET_CODE (PATTERN (insn)) == USE
4775 || GET_CODE (PATTERN (insn)) == CLOBBER)
4776 /* Don't care about USE and CLOBBER "insns"---those are used to
4777 indicate to the optimizer that it shouldn't get rid of
4778 certain operations. */
4781 pat = PATTERN (insn);
4783 /* Ug. Hack hacks hacked elsewhere. */
4784 switch (recog_memoized (insn))
4786 /* We play dependency tricks with the epilogue in order
4787 to get proper schedules. Undo this for dv analysis. */
4788 case CODE_FOR_epilogue_deallocate_stack:
4789 case CODE_FOR_prologue_allocate_stack:
4790 pat = XVECEXP (pat, 0, 0);
4793 /* The pattern we use for br.cloop confuses the code above.
4794 The second element of the vector is representative. */
4795 case CODE_FOR_doloop_end_internal:
4796 pat = XVECEXP (pat, 0, 1);
4799 /* Doesn't generate code. */
4800 case CODE_FOR_pred_rel_mutex:
4801 case CODE_FOR_prologue_use:
4808 memset (rws_insn, 0, sizeof (rws_insn));
4809 need_barrier = rtx_needs_barrier (pat, flags, 0);
4811 /* Check to see if the previous instruction was a volatile
4814 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
4821 if (first_instruction)
4824 first_instruction = 0;
4827 return need_barrier;
4830 /* Like group_barrier_needed_p, but do not clobber the current state. */
4833 safe_group_barrier_needed_p (insn)
4836 struct reg_write_state rws_saved[NUM_REGS];
4837 int saved_first_instruction;
4840 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
4841 saved_first_instruction = first_instruction;
4843 t = group_barrier_needed_p (insn);
4845 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
4846 first_instruction = saved_first_instruction;
4851 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
4852 as necessary to eliminate dependendencies. This function assumes that
4853 a final instruction scheduling pass has been run which has already
4854 inserted most of the necessary stop bits. This function only inserts
4855 new ones at basic block boundaries, since these are invisible to the
4859 emit_insn_group_barriers (dump, insns)
4865 int insns_since_last_label = 0;
4867 init_insn_group_barriers ();
4869 for (insn = insns; insn; insn = NEXT_INSN (insn))
4871 if (GET_CODE (insn) == CODE_LABEL)
4873 if (insns_since_last_label)
4875 insns_since_last_label = 0;
4877 else if (GET_CODE (insn) == NOTE
4878 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4880 if (insns_since_last_label)
4882 insns_since_last_label = 0;
4884 else if (GET_CODE (insn) == INSN
4885 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4886 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
4888 init_insn_group_barriers ();
4891 else if (INSN_P (insn))
4893 insns_since_last_label = 1;
4895 if (group_barrier_needed_p (insn))
4900 fprintf (dump, "Emitting stop before label %d\n",
4901 INSN_UID (last_label));
4902 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
4905 init_insn_group_barriers ();
4913 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4914 This function has to emit all necessary group barriers. */
4917 emit_all_insn_group_barriers (dump, insns)
4918 FILE *dump ATTRIBUTE_UNUSED;
4923 init_insn_group_barriers ();
4925 for (insn = insns; insn; insn = NEXT_INSN (insn))
4927 if (GET_CODE (insn) == BARRIER)
4929 rtx last = prev_active_insn (insn);
4933 if (GET_CODE (last) == JUMP_INSN
4934 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
4935 last = prev_active_insn (last);
4936 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
4937 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
4939 init_insn_group_barriers ();
4941 else if (INSN_P (insn))
4943 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
4944 init_insn_group_barriers ();
4945 else if (group_barrier_needed_p (insn))
4947 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4948 init_insn_group_barriers ();
4949 group_barrier_needed_p (insn);
4955 static int errata_find_address_regs PARAMS ((rtx *, void *));
4956 static void errata_emit_nops PARAMS ((rtx));
4957 static void fixup_errata PARAMS ((void));
4959 /* This structure is used to track some details about the previous insns
4960 groups so we can determine if it may be necessary to insert NOPs to
4961 workaround hardware errata. */
4964 HARD_REG_SET p_reg_set;
4965 HARD_REG_SET gr_reg_conditionally_set;
4968 /* Index into the last_group array. */
4969 static int group_idx;
4971 /* Called through for_each_rtx; determines if a hard register that was
4972 conditionally set in the previous group is used as an address register.
4973 It ensures that for_each_rtx returns 1 in that case. */
4975 errata_find_address_regs (xp, data)
4977 void *data ATTRIBUTE_UNUSED;
4980 if (GET_CODE (x) != MEM)
4983 if (GET_CODE (x) == POST_MODIFY)
4985 if (GET_CODE (x) == REG)
4987 struct group *prev_group = last_group + (group_idx ^ 1);
4988 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
4996 /* Called for each insn; this function keeps track of the state in
4997 last_group and emits additional NOPs if necessary to work around
4998 an Itanium A/B step erratum. */
5000 errata_emit_nops (insn)
5003 struct group *this_group = last_group + group_idx;
5004 struct group *prev_group = last_group + (group_idx ^ 1);
5005 rtx pat = PATTERN (insn);
5006 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5007 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5008 enum attr_type type;
5011 if (GET_CODE (real_pat) == USE
5012 || GET_CODE (real_pat) == CLOBBER
5013 || GET_CODE (real_pat) == ASM_INPUT
5014 || GET_CODE (real_pat) == ADDR_VEC
5015 || GET_CODE (real_pat) == ADDR_DIFF_VEC
5016 || asm_noperands (PATTERN (insn)) >= 0)
5019 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5022 if (GET_CODE (set) == PARALLEL)
5025 set = XVECEXP (real_pat, 0, 0);
5026 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5027 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5028 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5035 if (set && GET_CODE (set) != SET)
5038 type = get_attr_type (insn);
5041 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5042 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5044 if ((type == TYPE_M || type == TYPE_A) && cond && set
5045 && REG_P (SET_DEST (set))
5046 && GET_CODE (SET_SRC (set)) != PLUS
5047 && GET_CODE (SET_SRC (set)) != MINUS
5048 && (GET_CODE (SET_SRC (set)) != ASHIFT
5049 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5050 && (GET_CODE (SET_SRC (set)) != MEM
5051 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5052 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5054 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5055 || ! REG_P (XEXP (cond, 0)))
5058 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5059 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5061 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5063 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5064 emit_insn_before (gen_nop (), insn);
5065 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5067 memset (last_group, 0, sizeof last_group);
5071 /* Emit extra nops if they are required to work around hardware errata. */
5078 if (! TARGET_B_STEP)
5082 memset (last_group, 0, sizeof last_group);
5084 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5089 if (ia64_safe_type (insn) == TYPE_S)
5092 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5095 errata_emit_nops (insn);
5099 /* Instruction scheduling support. */
5100 /* Describe one bundle. */
5104 /* Zero if there's no possibility of a stop in this bundle other than
5105 at the end, otherwise the position of the optional stop bit. */
5107 /* The types of the three slots. */
5108 enum attr_type t[3];
5109 /* The pseudo op to be emitted into the assembler output. */
5113 #define NR_BUNDLES 10
5115 /* A list of all available bundles. */
5117 static const struct bundle bundle[NR_BUNDLES] =
5119 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
5120 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
5121 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
5122 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
5123 #if NR_BUNDLES == 10
5124 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
5125 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
5127 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
5128 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
5129 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
5130 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
5131 it matches an L type insn. Otherwise we'll try to generate L type
5133 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
5136 /* Describe a packet of instructions. Packets consist of two bundles that
5137 are visible to the hardware in one scheduling window. */
5141 const struct bundle *t1, *t2;
5142 /* Precomputed value of the first split issue in this packet if a cycle
5143 starts at its beginning. */
5145 /* For convenience, the insn types are replicated here so we don't have
5146 to go through T1 and T2 all the time. */
5147 enum attr_type t[6];
5150 /* An array containing all possible packets. */
5151 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5152 static struct ia64_packet packets[NR_PACKETS];
5154 /* Map attr_type to a string with the name. */
5156 static const char *const type_names[] =
5158 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5161 /* Nonzero if we should insert stop bits into the schedule. */
5162 int ia64_final_schedule = 0;
5164 static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
5165 static rtx ia64_single_set PARAMS ((rtx));
5166 static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
5167 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
5168 static void maybe_rotate PARAMS ((FILE *));
5169 static void finish_last_head PARAMS ((FILE *, int));
5170 static void rotate_one_bundle PARAMS ((FILE *));
5171 static void rotate_two_bundles PARAMS ((FILE *));
5172 static void nop_cycles_until PARAMS ((int, FILE *));
5173 static void cycle_end_fill_slots PARAMS ((FILE *));
5174 static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
5175 static int get_split PARAMS ((const struct ia64_packet *, int));
5176 static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
5177 const struct ia64_packet *, int));
5178 static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
5179 rtx *, enum attr_type *, int));
5180 static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
5181 static void dump_current_packet PARAMS ((FILE *));
5182 static void schedule_stop PARAMS ((FILE *));
5183 static rtx gen_nop_type PARAMS ((enum attr_type));
5184 static void ia64_emit_nops PARAMS ((void));
5186 /* Map a bundle number to its pseudo-op. */
5192 return bundle[b].name;
5195 /* Compute the slot which will cause a split issue in packet P if the
5196 current cycle begins at slot BEGIN. */
5199 itanium_split_issue (p, begin)
5200 const struct ia64_packet *p;
5203 int type_count[TYPE_S];
5209 /* Always split before and after MMF. */
5210 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
5212 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
5214 /* Always split after MBB and BBB. */
5215 if (p->t[1] == TYPE_B)
5217 /* Split after first bundle in MIB BBB combination. */
5218 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
5222 memset (type_count, 0, sizeof type_count);
5223 for (i = begin; i < split; i++)
5225 enum attr_type t0 = p->t[i];
5226 /* An MLX bundle reserves the same units as an MFI bundle. */
5227 enum attr_type t = (t0 == TYPE_L ? TYPE_F
5228 : t0 == TYPE_X ? TYPE_I
5231 /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
5232 2 integer per cycle. */
5233 int max = (t == TYPE_B ? 3 : 2);
5234 if (type_count[t] == max)
5242 /* Return the maximum number of instructions a cpu can issue. */
5250 /* Helper function - like single_set, but look inside COND_EXEC. */
5253 ia64_single_set (insn)
5256 rtx x = PATTERN (insn), ret;
5257 if (GET_CODE (x) == COND_EXEC)
5258 x = COND_EXEC_CODE (x);
5259 if (GET_CODE (x) == SET)
5262 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5263 Although they are not classical single set, the second set is there just
5264 to protect it from moving past FP-relative stack accesses. */
5265 switch (recog_memoized (insn))
5267 case CODE_FOR_prologue_allocate_stack:
5268 case CODE_FOR_epilogue_deallocate_stack:
5269 ret = XVECEXP (x, 0, 0);
5273 ret = single_set_2 (insn, x);
5280 /* Adjust the cost of a scheduling dependency. Return the new cost of
5281 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5284 ia64_adjust_cost (insn, link, dep_insn, cost)
5285 rtx insn, link, dep_insn;
5288 enum attr_type dep_type;
5289 enum attr_itanium_class dep_class;
5290 enum attr_itanium_class insn_class;
5291 rtx dep_set, set, src, addr;
5293 if (GET_CODE (PATTERN (insn)) == CLOBBER
5294 || GET_CODE (PATTERN (insn)) == USE
5295 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5296 || GET_CODE (PATTERN (dep_insn)) == USE
5297 /* @@@ Not accurate for indirect calls. */
5298 || GET_CODE (insn) == CALL_INSN
5299 || ia64_safe_type (insn) == TYPE_S)
5302 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5303 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5306 dep_type = ia64_safe_type (dep_insn);
5307 dep_class = ia64_safe_itanium_class (dep_insn);
5308 insn_class = ia64_safe_itanium_class (insn);
5310 /* Compares that feed a conditional branch can execute in the same
5312 dep_set = ia64_single_set (dep_insn);
5313 set = ia64_single_set (insn);
5315 if (dep_type != TYPE_F
5317 && GET_CODE (SET_DEST (dep_set)) == REG
5318 && PR_REG (REGNO (SET_DEST (dep_set)))
5319 && GET_CODE (insn) == JUMP_INSN)
5322 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5324 /* ??? Can't find any information in the documenation about whether
5328 splits issue. Assume it doesn't. */
5332 src = set ? SET_SRC (set) : 0;
5336 if (GET_CODE (SET_DEST (set)) == MEM)
5337 addr = XEXP (SET_DEST (set), 0);
5338 else if (GET_CODE (SET_DEST (set)) == SUBREG
5339 && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM)
5340 addr = XEXP (SUBREG_REG (SET_DEST (set)), 0);
5344 if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0)
5345 addr = XVECEXP (addr, 0, 0);
5346 while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND)
5347 addr = XEXP (addr, 0);
5348 if (GET_CODE (addr) == MEM)
5349 addr = XEXP (addr, 0);
5355 if (addr && GET_CODE (addr) == POST_MODIFY)
5356 addr = XEXP (addr, 0);
5358 set = ia64_single_set (dep_insn);
5360 if ((dep_class == ITANIUM_CLASS_IALU
5361 || dep_class == ITANIUM_CLASS_ILOG
5362 || dep_class == ITANIUM_CLASS_LD)
5363 && (insn_class == ITANIUM_CLASS_LD
5364 || insn_class == ITANIUM_CLASS_ST))
5366 if (! addr || ! set)
5368 /* This isn't completely correct - an IALU that feeds an address has
5369 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5370 otherwise. Unfortunately there's no good way to describe this. */
5371 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5375 if ((dep_class == ITANIUM_CLASS_IALU
5376 || dep_class == ITANIUM_CLASS_ILOG
5377 || dep_class == ITANIUM_CLASS_LD)
5378 && (insn_class == ITANIUM_CLASS_MMMUL
5379 || insn_class == ITANIUM_CLASS_MMSHF
5380 || insn_class == ITANIUM_CLASS_MMSHFI))
5383 if (dep_class == ITANIUM_CLASS_FMAC
5384 && (insn_class == ITANIUM_CLASS_FMISC
5385 || insn_class == ITANIUM_CLASS_FCVTFX
5386 || insn_class == ITANIUM_CLASS_XMPY))
5389 if ((dep_class == ITANIUM_CLASS_FMAC
5390 || dep_class == ITANIUM_CLASS_FMISC
5391 || dep_class == ITANIUM_CLASS_FCVTFX
5392 || dep_class == ITANIUM_CLASS_XMPY)
5393 && insn_class == ITANIUM_CLASS_STF)
5396 /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4,
5397 but HP engineers say any non-MM operation. */
5398 if ((dep_class == ITANIUM_CLASS_MMMUL
5399 || dep_class == ITANIUM_CLASS_MMSHF
5400 || dep_class == ITANIUM_CLASS_MMSHFI)
5401 && insn_class != ITANIUM_CLASS_MMMUL
5402 && insn_class != ITANIUM_CLASS_MMSHF
5403 && insn_class != ITANIUM_CLASS_MMSHFI)
5409 /* Describe the current state of the Itanium pipeline. */
5412 /* The first slot that is used in the current cycle. */
5414 /* The next slot to fill. */
5416 /* The packet we have selected for the current issue window. */
5417 const struct ia64_packet *packet;
5418 /* The position of the split issue that occurs due to issue width
5419 limitations (6 if there's no split issue). */
5421 /* Record data about the insns scheduled so far in the same issue
5422 window. The elements up to but not including FIRST_SLOT belong
5423 to the previous cycle, the ones starting with FIRST_SLOT belong
5424 to the current cycle. */
5425 enum attr_type types[6];
5428 /* Nonzero if we decided to schedule a stop bit. */
5432 /* Temporary arrays; they have enough elements to hold all insns that
5433 can be ready at the same time while scheduling of the current block.
5434 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5435 static rtx *sched_ready;
5436 static enum attr_type *sched_types;
5438 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5442 insn_matches_slot (p, itype, slot, insn)
5443 const struct ia64_packet *p;
5444 enum attr_type itype;
5448 enum attr_itanium_requires_unit0 u0;
5449 enum attr_type stype = p->t[slot];
5453 u0 = ia64_safe_itanium_requires_unit0 (insn);
5454 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5457 for (i = sched_data.first_slot; i < slot; i++)
5458 if (p->t[i] == stype
5459 || (stype == TYPE_F && p->t[i] == TYPE_L)
5460 || (stype == TYPE_I && p->t[i] == TYPE_X))
5463 if (GET_CODE (insn) == CALL_INSN)
5465 /* Reject calls in multiway branch packets. We want to limit
5466 the number of multiway branches we generate (since the branch
5467 predictor is limited), and this seems to work fairly well.
5468 (If we didn't do this, we'd have to add another test here to
5469 force calls into the third slot of the bundle.) */
5472 if (p->t[1] == TYPE_B)
5477 if (p->t[4] == TYPE_B)
5485 if (itype == TYPE_A)
5486 return stype == TYPE_M || stype == TYPE_I;
5490 /* Like emit_insn_before, but skip cycle_display notes.
5491 ??? When cycle display notes are implemented, update this. */
5494 ia64_emit_insn_before (insn, before)
5497 emit_insn_before (insn, before);
5500 /* When rotating a bundle out of the issue window, insert a bundle selector
5501 insn in front of it. DUMP is the scheduling dump file or NULL. START
5502 is either 0 or 3, depending on whether we want to emit a bundle selector
5503 for the first bundle or the second bundle in the current issue window.
5505 The selector insns are emitted this late because the selected packet can
5506 be changed until parts of it get rotated out. */
5509 finish_last_head (dump, start)
5513 const struct ia64_packet *p = sched_data.packet;
5514 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5515 int bundle_type = b - bundle;
5519 if (! ia64_final_schedule)
5522 for (i = start; sched_data.insns[i] == 0; i++)
5525 insn = sched_data.insns[i];
5528 fprintf (dump, "// Emitting template before %d: %s\n",
5529 INSN_UID (insn), b->name);
5531 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5534 /* We can't schedule more insns this cycle. Fix up the scheduling state
5535 and advance FIRST_SLOT and CUR.
5536 We have to distribute the insns that are currently found between
5537 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5538 far, they are stored successively in the fields starting at FIRST_SLOT;
5539 now they must be moved to the correct slots.
5540 DUMP is the current scheduling dump file, or NULL. */
5543 cycle_end_fill_slots (dump)
5546 const struct ia64_packet *packet = sched_data.packet;
5548 enum attr_type tmp_types[6];
5551 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5552 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5554 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5556 enum attr_type t = tmp_types[i];
5557 if (t != ia64_safe_type (tmp_insns[i]))
5559 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5561 if (slot > sched_data.split)
5564 fprintf (dump, "// Packet needs %s, have %s\n",
5565 type_names[packet->t[slot]], type_names[t]);
5566 sched_data.types[slot] = packet->t[slot];
5567 sched_data.insns[slot] = 0;
5568 sched_data.stopbit[slot] = 0;
5570 /* ??? TYPE_L instructions always fill up two slots, but we don't
5571 support TYPE_L nops. */
5572 if (packet->t[slot] == TYPE_L)
5578 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5579 actual slot type later. */
5580 sched_data.types[slot] = packet->t[slot];
5581 sched_data.insns[slot] = tmp_insns[i];
5582 sched_data.stopbit[slot] = 0;
5585 /* TYPE_L instructions always fill up two slots. */
5588 sched_data.types[slot] = packet->t[slot];
5589 sched_data.insns[slot] = 0;
5590 sched_data.stopbit[slot] = 0;
5595 /* This isn't right - there's no need to pad out until the forced split;
5596 the CPU will automatically split if an insn isn't ready. */
5598 while (slot < sched_data.split)
5600 sched_data.types[slot] = packet->t[slot];
5601 sched_data.insns[slot] = 0;
5602 sched_data.stopbit[slot] = 0;
5607 sched_data.first_slot = sched_data.cur = slot;
5610 /* Bundle rotations, as described in the Itanium optimization manual.
5611 We can rotate either one or both bundles out of the issue window.
5612 DUMP is the current scheduling dump file, or NULL. */
5615 rotate_one_bundle (dump)
5619 fprintf (dump, "// Rotating one bundle.\n");
5621 finish_last_head (dump, 0);
5622 if (sched_data.cur > 3)
5624 sched_data.cur -= 3;
5625 sched_data.first_slot -= 3;
5626 memmove (sched_data.types,
5627 sched_data.types + 3,
5628 sched_data.cur * sizeof *sched_data.types);
5629 memmove (sched_data.stopbit,
5630 sched_data.stopbit + 3,
5631 sched_data.cur * sizeof *sched_data.stopbit);
5632 memmove (sched_data.insns,
5633 sched_data.insns + 3,
5634 sched_data.cur * sizeof *sched_data.insns);
5636 = &packets[(sched_data.packet->t2 - bundle) * NR_BUNDLES];
5641 sched_data.first_slot = 0;
5646 rotate_two_bundles (dump)
5650 fprintf (dump, "// Rotating two bundles.\n");
5652 if (sched_data.cur == 0)
5655 finish_last_head (dump, 0);
5656 if (sched_data.cur > 3)
5657 finish_last_head (dump, 3);
5659 sched_data.first_slot = 0;
5662 /* We're beginning a new block. Initialize data structures as necessary. */
5665 ia64_sched_init (dump, sched_verbose, max_ready)
5666 FILE *dump ATTRIBUTE_UNUSED;
5667 int sched_verbose ATTRIBUTE_UNUSED;
5670 static int initialized = 0;
5678 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5680 const struct bundle *t1 = bundle + b1;
5681 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
5683 const struct bundle *t2 = bundle + b2;
5689 for (i = 0; i < NR_PACKETS; i++)
5692 for (j = 0; j < 3; j++)
5693 packets[i].t[j] = packets[i].t1->t[j];
5694 for (j = 0; j < 3; j++)
5695 packets[i].t[j + 3] = packets[i].t2->t[j];
5696 packets[i].first_split = itanium_split_issue (packets + i, 0);
5701 init_insn_group_barriers ();
5703 memset (&sched_data, 0, sizeof sched_data);
5704 sched_types = (enum attr_type *) xmalloc (max_ready
5705 * sizeof (enum attr_type));
5706 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5709 /* See if the packet P can match the insns we have already scheduled. Return
5710 nonzero if so. In *PSLOT, we store the first slot that is available for
5711 more instructions if we choose this packet.
5712 SPLIT holds the last slot we can use, there's a split issue after it so
5713 scheduling beyond it would cause us to use more than one cycle. */
5716 packet_matches_p (p, split, pslot)
5717 const struct ia64_packet *p;
5721 int filled = sched_data.cur;
5722 int first = sched_data.first_slot;
5725 /* First, check if the first of the two bundles must be a specific one (due
5727 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
5729 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
5732 for (i = 0; i < first; i++)
5733 if (! insn_matches_slot (p, sched_data.types[i], i,
5734 sched_data.insns[i]))
5736 for (i = slot = first; i < filled; i++)
5738 while (slot < split)
5740 if (insn_matches_slot (p, sched_data.types[i], slot,
5741 sched_data.insns[i]))
5755 /* A frontend for itanium_split_issue. For a packet P and a slot
5756 number FIRST that describes the start of the current clock cycle,
5757 return the slot number of the first split issue. This function
5758 uses the cached number found in P if possible. */
5761 get_split (p, first)
5762 const struct ia64_packet *p;
5766 return p->first_split;
5767 return itanium_split_issue (p, first);
5770 /* Given N_READY insns in the array READY, whose types are found in the
5771 corresponding array TYPES, return the insn that is best suited to be
5772 scheduled in slot SLOT of packet P. */
5775 find_best_insn (ready, types, n_ready, p, slot)
5777 enum attr_type *types;
5779 const struct ia64_packet *p;
5784 while (n_ready-- > 0)
5786 rtx insn = ready[n_ready];
5789 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
5791 /* If we have equally good insns, one of which has a stricter
5792 slot requirement, prefer the one with the stricter requirement. */
5793 if (best >= 0 && types[n_ready] == TYPE_A)
5795 if (insn_matches_slot (p, types[n_ready], slot, insn))
5798 best_pri = INSN_PRIORITY (ready[best]);
5800 /* If there's no way we could get a stricter requirement, stop
5802 if (types[n_ready] != TYPE_A
5803 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
5811 /* Select the best packet to use given the current scheduler state and the
5813 READY is an array holding N_READY ready insns; TYPES is a corresponding
5814 array that holds their types. Store the best packet in *PPACKET and the
5815 number of insns that can be scheduled in the current cycle in *PBEST. */
5818 find_best_packet (pbest, ppacket, ready, types, n_ready)
5820 const struct ia64_packet **ppacket;
5822 enum attr_type *types;
5825 int first = sched_data.first_slot;
5828 const struct ia64_packet *best_packet = NULL;
5831 for (i = 0; i < NR_PACKETS; i++)
5833 const struct ia64_packet *p = packets + i;
5835 int split = get_split (p, first);
5837 int first_slot, last_slot;
5840 if (! packet_matches_p (p, split, &first_slot))
5843 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
5847 for (slot = first_slot; slot < split; slot++)
5851 /* Disallow a degenerate case where the first bundle doesn't
5852 contain anything but NOPs! */
5853 if (first_slot == 0 && win == 0 && slot == 3)
5859 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
5862 sched_ready[insn_nr] = 0;
5866 else if (p->t[slot] == TYPE_B)
5869 /* We must disallow MBB/BBB packets if any of their B slots would be
5870 filled with nops. */
5873 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
5878 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
5883 || (win == best && last_slot < lowest_end))
5886 lowest_end = last_slot;
5891 *ppacket = best_packet;
5894 /* Reorder the ready list so that the insns that can be issued in this cycle
5895 are found in the correct order at the end of the list.
5896 DUMP is the scheduling dump file, or NULL. READY points to the start,
5897 E_READY to the end of the ready list. MAY_FAIL determines what should be
5898 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5899 otherwise we return 0.
5900 Return 1 if any insns can be scheduled in this cycle. */
5903 itanium_reorder (dump, ready, e_ready, may_fail)
5909 const struct ia64_packet *best_packet;
5910 int n_ready = e_ready - ready;
5911 int first = sched_data.first_slot;
5912 int i, best, best_split, filled;
5914 for (i = 0; i < n_ready; i++)
5915 sched_types[i] = ia64_safe_type (ready[i]);
5917 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
5928 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
5929 best_packet->t1->name,
5930 best_packet->t2 ? best_packet->t2->name : NULL, best);
5933 best_split = itanium_split_issue (best_packet, first);
5934 packet_matches_p (best_packet, best_split, &filled);
5936 for (i = filled; i < best_split; i++)
5940 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
5943 rtx insn = ready[insn_nr];
5944 memmove (ready + insn_nr, ready + insn_nr + 1,
5945 (n_ready - insn_nr - 1) * sizeof (rtx));
5946 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
5947 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
5948 ready[--n_ready] = insn;
5952 sched_data.packet = best_packet;
5953 sched_data.split = best_split;
5957 /* Dump information about the current scheduling state to file DUMP. */
5960 dump_current_packet (dump)
5964 fprintf (dump, "// %d slots filled:", sched_data.cur);
5965 for (i = 0; i < sched_data.first_slot; i++)
5967 rtx insn = sched_data.insns[i];
5968 fprintf (dump, " %s", type_names[sched_data.types[i]]);
5970 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
5971 if (sched_data.stopbit[i])
5972 fprintf (dump, " ;;");
5974 fprintf (dump, " :::");
5975 for (i = sched_data.first_slot; i < sched_data.cur; i++)
5977 rtx insn = sched_data.insns[i];
5978 enum attr_type t = ia64_safe_type (insn);
5979 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
5981 fprintf (dump, "\n");
5984 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
5988 schedule_stop (dump)
5991 const struct ia64_packet *best = sched_data.packet;
5996 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
5998 if (sched_data.cur == 0)
6001 fprintf (dump, "// At start of bundle, so nothing to do.\n");
6003 rotate_two_bundles (NULL);
6007 for (i = -1; i < NR_PACKETS; i++)
6009 /* This is a slight hack to give the current packet the first chance.
6010 This is done to avoid e.g. switching from MIB to MBB bundles. */
6011 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
6012 int split = get_split (p, sched_data.first_slot);
6013 const struct bundle *compare;
6016 if (! packet_matches_p (p, split, &next))
6019 compare = next > 3 ? p->t2 : p->t1;
6022 if (compare->possible_stop)
6023 stoppos = compare->possible_stop;
6027 if (stoppos < next || stoppos >= best_stop)
6029 if (compare->possible_stop == 0)
6031 stoppos = (next > 3 ? 6 : 3);
6033 if (stoppos < next || stoppos >= best_stop)
6037 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
6038 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
6041 best_stop = stoppos;
6045 sched_data.packet = best;
6046 cycle_end_fill_slots (dump);
6047 while (sched_data.cur < best_stop)
6049 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
6050 sched_data.insns[sched_data.cur] = 0;
6051 sched_data.stopbit[sched_data.cur] = 0;
6054 sched_data.stopbit[sched_data.cur - 1] = 1;
6055 sched_data.first_slot = best_stop;
6058 dump_current_packet (dump);
6061 /* If necessary, perform one or two rotations on the scheduling state.
6062 This should only be called if we are starting a new cycle. */
6068 cycle_end_fill_slots (dump);
6069 if (sched_data.cur == 6)
6070 rotate_two_bundles (dump);
6071 else if (sched_data.cur >= 3)
6072 rotate_one_bundle (dump);
6073 sched_data.first_slot = sched_data.cur;
6076 /* The clock cycle when ia64_sched_reorder was last called. */
6077 static int prev_cycle;
6079 /* The first insn scheduled in the previous cycle. This is the saved
6080 value of sched_data.first_slot. */
6081 static int prev_first;
6083 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
6084 pad out the delay between MM (shifts, etc.) and integer operations. */
6087 nop_cycles_until (clock_var, dump)
6091 int prev_clock = prev_cycle;
6092 int cycles_left = clock_var - prev_clock;
6093 bool did_stop = false;
6095 /* Finish the previous cycle; pad it out with NOPs. */
6096 if (sched_data.cur == 3)
6098 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6100 maybe_rotate (dump);
6102 else if (sched_data.cur > 0)
6105 int split = itanium_split_issue (sched_data.packet, prev_first);
6107 if (sched_data.cur < 3 && split > 3)
6113 if (split > sched_data.cur)
6116 for (i = sched_data.cur; i < split; i++)
6118 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6119 sched_data.types[i] = sched_data.packet->t[i];
6120 sched_data.insns[i] = t;
6121 sched_data.stopbit[i] = 0;
6123 sched_data.cur = split;
6126 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
6130 for (i = sched_data.cur; i < 6; i++)
6132 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6133 sched_data.types[i] = sched_data.packet->t[i];
6134 sched_data.insns[i] = t;
6135 sched_data.stopbit[i] = 0;
6142 if (need_stop || sched_data.cur == 6)
6144 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6147 maybe_rotate (dump);
6151 while (cycles_left > 0)
6153 sched_emit_insn (gen_bundle_selector (GEN_INT (0)));
6154 sched_emit_insn (gen_nop_type (TYPE_M));
6155 sched_emit_insn (gen_nop_type (TYPE_I));
6156 if (cycles_left > 1)
6158 sched_emit_insn (gen_insn_group_barrier (GEN_INT (2)));
6161 sched_emit_insn (gen_nop_type (TYPE_I));
6162 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6168 init_insn_group_barriers ();
6171 /* We are about to being issuing insns for this clock cycle.
6172 Override the default sort algorithm to better slot instructions. */
6175 ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready,
6176 reorder_type, clock_var)
6177 FILE *dump ATTRIBUTE_UNUSED;
6178 int sched_verbose ATTRIBUTE_UNUSED;
6181 int reorder_type, clock_var;
6184 int n_ready = *pn_ready;
6185 rtx *e_ready = ready + n_ready;
6190 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
6191 dump_current_packet (dump);
6194 /* Work around the pipeline flush that will occurr if the results of
6195 an MM instruction are accessed before the result is ready. Intel
6196 documentation says this only happens with IALU, ISHF, ILOG, LD,
6197 and ST consumers, but experimental evidence shows that *any* non-MM
6198 type instruction will incurr the flush. */
6199 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
6201 for (insnp = ready; insnp < e_ready; insnp++)
6203 rtx insn = *insnp, link;
6204 enum attr_itanium_class t = ia64_safe_itanium_class (insn);
6206 if (t == ITANIUM_CLASS_MMMUL
6207 || t == ITANIUM_CLASS_MMSHF
6208 || t == ITANIUM_CLASS_MMSHFI)
6211 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6212 if (REG_NOTE_KIND (link) == 0)
6214 rtx other = XEXP (link, 0);
6215 enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
6216 if (t0 == ITANIUM_CLASS_MMSHF || t0 == ITANIUM_CLASS_MMMUL)
6218 nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
6226 prev_first = sched_data.first_slot;
6227 prev_cycle = clock_var;
6229 if (reorder_type == 0)
6230 maybe_rotate (sched_verbose ? dump : NULL);
6232 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6234 for (insnp = ready; insnp < e_ready; insnp++)
6235 if (insnp < e_ready)
6238 enum attr_type t = ia64_safe_type (insn);
6239 if (t == TYPE_UNKNOWN)
6241 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6242 || asm_noperands (PATTERN (insn)) >= 0)
6244 rtx lowest = ready[n_asms];
6245 ready[n_asms] = insn;
6251 rtx highest = ready[n_ready - 1];
6252 ready[n_ready - 1] = insn;
6254 if (ia64_final_schedule && group_barrier_needed_p (insn))
6256 schedule_stop (sched_verbose ? dump : NULL);
6257 sched_data.last_was_stop = 1;
6258 maybe_rotate (sched_verbose ? dump : NULL);
6265 if (n_asms < n_ready)
6267 /* Some normal insns to process. Skip the asms. */
6271 else if (n_ready > 0)
6273 /* Only asm insns left. */
6274 if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1]))
6276 schedule_stop (sched_verbose ? dump : NULL);
6277 sched_data.last_was_stop = 1;
6278 maybe_rotate (sched_verbose ? dump : NULL);
6280 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6284 if (ia64_final_schedule)
6286 int nr_need_stop = 0;
6288 for (insnp = ready; insnp < e_ready; insnp++)
6289 if (safe_group_barrier_needed_p (*insnp))
6292 /* Schedule a stop bit if
6293 - all insns require a stop bit, or
6294 - we are starting a new cycle and _any_ insns require a stop bit.
6295 The reason for the latter is that if our schedule is accurate, then
6296 the additional stop won't decrease performance at this point (since
6297 there's a split issue at this point anyway), but it gives us more
6298 freedom when scheduling the currently ready insns. */
6299 if ((reorder_type == 0 && nr_need_stop)
6300 || (reorder_type == 1 && n_ready == nr_need_stop))
6302 schedule_stop (sched_verbose ? dump : NULL);
6303 sched_data.last_was_stop = 1;
6304 maybe_rotate (sched_verbose ? dump : NULL);
6305 if (reorder_type == 1)
6312 /* Move down everything that needs a stop bit, preserving relative
6314 while (insnp-- > ready + deleted)
6315 while (insnp >= ready + deleted)
6318 if (! safe_group_barrier_needed_p (insn))
6320 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6326 if (deleted != nr_need_stop)
6331 return itanium_reorder (sched_verbose ? dump : NULL,
6332 ready, e_ready, reorder_type == 1);
6336 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
6343 return ia64_internal_sched_reorder (dump, sched_verbose, ready,
6344 pn_ready, 0, clock_var);
6347 /* Like ia64_sched_reorder, but called after issuing each insn.
6348 Override the default sort algorithm to better slot instructions. */
6351 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6352 FILE *dump ATTRIBUTE_UNUSED;
6353 int sched_verbose ATTRIBUTE_UNUSED;
6358 if (sched_data.last_was_stop)
6361 /* Detect one special case and try to optimize it.
6362 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6363 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6364 if (sched_data.first_slot == 1
6365 && sched_data.stopbit[0]
6366 && ((sched_data.cur == 4
6367 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6368 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6369 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6370 || (sched_data.cur == 3
6371 && (sched_data.types[1] == TYPE_M
6372 || sched_data.types[1] == TYPE_A)
6373 && (sched_data.types[2] != TYPE_M
6374 && sched_data.types[2] != TYPE_I
6375 && sched_data.types[2] != TYPE_A))))
6379 rtx stop = sched_data.insns[1];
6381 /* Search backward for the stop bit that must be there. */
6386 stop = PREV_INSN (stop);
6387 if (GET_CODE (stop) != INSN)
6389 insn_code = recog_memoized (stop);
6391 /* Ignore .pred.rel.mutex.
6393 ??? Update this to ignore cycle display notes too
6394 ??? once those are implemented */
6395 if (insn_code == CODE_FOR_pred_rel_mutex
6396 || insn_code == CODE_FOR_prologue_use)
6399 if (insn_code == CODE_FOR_insn_group_barrier)
6404 /* Adjust the stop bit's slot selector. */
6405 if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1)
6407 XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3);
6409 sched_data.stopbit[0] = 0;
6410 sched_data.stopbit[2] = 1;
6412 sched_data.types[5] = sched_data.types[3];
6413 sched_data.types[4] = sched_data.types[2];
6414 sched_data.types[3] = sched_data.types[1];
6415 sched_data.insns[5] = sched_data.insns[3];
6416 sched_data.insns[4] = sched_data.insns[2];
6417 sched_data.insns[3] = sched_data.insns[1];
6418 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6419 sched_data.cur += 2;
6420 sched_data.first_slot = 3;
6421 for (i = 0; i < NR_PACKETS; i++)
6423 const struct ia64_packet *p = packets + i;
6424 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6426 sched_data.packet = p;
6430 rotate_one_bundle (sched_verbose ? dump : NULL);
6433 for (i = 0; i < NR_PACKETS; i++)
6435 const struct ia64_packet *p = packets + i;
6436 int split = get_split (p, sched_data.first_slot);
6439 /* Disallow multiway branches here. */
6440 if (p->t[1] == TYPE_B)
6443 if (packet_matches_p (p, split, &next) && next < best)
6446 sched_data.packet = p;
6447 sched_data.split = split;
6456 int more = ia64_internal_sched_reorder (dump, sched_verbose,
6461 /* Did we schedule a stop? If so, finish this cycle. */
6462 if (sched_data.cur == sched_data.first_slot)
6467 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
6469 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6471 dump_current_packet (dump);
6475 /* We are about to issue INSN. Return the number of insns left on the
6476 ready queue that can be issued this cycle. */
6479 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6483 int can_issue_more ATTRIBUTE_UNUSED;
6485 enum attr_type t = ia64_safe_type (insn);
6487 if (sched_data.last_was_stop)
6489 int t = sched_data.first_slot;
6492 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6493 init_insn_group_barriers ();
6494 sched_data.last_was_stop = 0;
6497 if (t == TYPE_UNKNOWN)
6500 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
6501 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6502 || asm_noperands (PATTERN (insn)) >= 0)
6504 /* This must be some kind of asm. Clear the scheduling state. */
6505 rotate_two_bundles (sched_verbose ? dump : NULL);
6506 if (ia64_final_schedule)
6507 group_barrier_needed_p (insn);
6512 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6513 important state info. Don't delete this test. */
6514 if (ia64_final_schedule
6515 && group_barrier_needed_p (insn))
6518 sched_data.stopbit[sched_data.cur] = 0;
6519 sched_data.insns[sched_data.cur] = insn;
6520 sched_data.types[sched_data.cur] = t;
6524 fprintf (dump, "// Scheduling insn %d of type %s\n",
6525 INSN_UID (insn), type_names[t]);
6527 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6529 schedule_stop (sched_verbose ? dump : NULL);
6530 sched_data.last_was_stop = 1;
6536 /* Free data allocated by ia64_sched_init. */
6539 ia64_sched_finish (dump, sched_verbose)
6544 fprintf (dump, "// Finishing schedule.\n");
6545 rotate_two_bundles (NULL);
6550 /* Emit pseudo-ops for the assembler to describe predicate relations.
6551 At present this assumes that we only consider predicate pairs to
6552 be mutex, and that the assembler can deduce proper values from
6553 straight-line code. */
6556 emit_predicate_relation_info ()
6560 for (i = n_basic_blocks - 1; i >= 0; --i)
6562 basic_block bb = BASIC_BLOCK (i);
6564 rtx head = bb->head;
6566 /* We only need such notes at code labels. */
6567 if (GET_CODE (head) != CODE_LABEL)
6569 if (GET_CODE (NEXT_INSN (head)) == NOTE
6570 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6571 head = NEXT_INSN (head);
6573 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6574 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6576 rtx p = gen_rtx_REG (BImode, r);
6577 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6578 if (head == bb->end)
6584 /* Look for conditional calls that do not return, and protect predicate
6585 relations around them. Otherwise the assembler will assume the call
6586 returns, and complain about uses of call-clobbered predicates after
6588 for (i = n_basic_blocks - 1; i >= 0; --i)
6590 basic_block bb = BASIC_BLOCK (i);
6591 rtx insn = bb->head;
6595 if (GET_CODE (insn) == CALL_INSN
6596 && GET_CODE (PATTERN (insn)) == COND_EXEC
6597 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6599 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6600 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6601 if (bb->head == insn)
6603 if (bb->end == insn)
6607 if (insn == bb->end)
6609 insn = NEXT_INSN (insn);
6614 /* Generate a NOP instruction of type T. We will never generate L type
6624 return gen_nop_m ();
6626 return gen_nop_i ();
6628 return gen_nop_b ();
6630 return gen_nop_f ();
6632 return gen_nop_x ();
6638 /* After the last scheduling pass, fill in NOPs. It's easier to do this
6639 here than while scheduling. */
6645 const struct bundle *b = 0;
6648 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6652 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
6653 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
6655 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == UNSPEC_BUNDLE_SELECTOR)
6656 || GET_CODE (insn) == CODE_LABEL)
6659 while (bundle_pos < 3)
6661 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6664 if (GET_CODE (insn) != CODE_LABEL)
6665 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
6671 else if (GET_CODE (pat) == UNSPEC_VOLATILE
6672 && XINT (pat, 1) == UNSPECV_INSN_GROUP_BARRIER)
6674 int t = INTVAL (XVECEXP (pat, 0, 0));
6676 while (bundle_pos < t)
6678 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6684 if (bundle_pos == 3)
6687 if (b && INSN_P (insn))
6689 t = ia64_safe_type (insn);
6690 if (asm_noperands (PATTERN (insn)) >= 0
6691 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
6693 while (bundle_pos < 3)
6695 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6701 if (t == TYPE_UNKNOWN)
6703 while (bundle_pos < 3)
6705 if (t == b->t[bundle_pos]
6706 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
6707 || b->t[bundle_pos] == TYPE_I)))
6710 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6719 /* Perform machine dependent operations on the rtl chain INSNS. */
6725 /* We are freeing block_for_insn in the toplev to keep compatibility
6726 with old MDEP_REORGS that are not CFG based. Recompute it now. */
6727 compute_bb_for_insn (get_max_uid ());
6729 /* If optimizing, we'll have split before scheduling. */
6731 split_all_insns (0);
6733 update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES,
6736 if (ia64_flag_schedule_insns2)
6738 timevar_push (TV_SCHED2);
6739 ia64_final_schedule = 1;
6740 schedule_ebbs (rtl_dump_file);
6741 ia64_final_schedule = 0;
6742 timevar_pop (TV_SCHED2);
6744 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6745 place as they were during scheduling. */
6746 emit_insn_group_barriers (rtl_dump_file, insns);
6750 emit_all_insn_group_barriers (rtl_dump_file, insns);
6752 /* A call must not be the last instruction in a function, so that the
6753 return address is still within the function, so that unwinding works
6754 properly. Note that IA-64 differs from dwarf2 on this point. */
6755 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
6760 insn = get_last_insn ();
6761 if (! INSN_P (insn))
6762 insn = prev_active_insn (insn);
6763 if (GET_CODE (insn) == INSN
6764 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6765 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6768 insn = prev_active_insn (insn);
6770 if (GET_CODE (insn) == CALL_INSN)
6773 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6774 emit_insn (gen_break_f ());
6775 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6780 emit_predicate_relation_info ();
6783 /* Return true if REGNO is used by the epilogue. */
6786 ia64_epilogue_uses (regno)
6792 /* When a function makes a call through a function descriptor, we
6793 will write a (potentially) new value to "gp". After returning
6794 from such a call, we need to make sure the function restores the
6795 original gp-value, even if the function itself does not use the
6797 return (TARGET_CONST_GP && !(TARGET_AUTO_PIC || TARGET_NO_PIC));
6799 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
6800 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
6801 /* For functions defined with the syscall_linkage attribute, all
6802 input registers are marked as live at all function exits. This
6803 prevents the register allocator from using the input registers,
6804 which in turn makes it possible to restart a system call after
6805 an interrupt without having to save/restore the input registers.
6806 This also prevents kernel data from leaking to application code. */
6807 return lookup_attribute ("syscall_linkage",
6808 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
6811 /* Conditional return patterns can't represent the use of `b0' as
6812 the return address, so we force the value live this way. */
6816 /* Likewise for ar.pfs, which is used by br.ret. */
6824 /* Return true if REGNO is used by the frame unwinder. */
6827 ia64_eh_uses (regno)
6830 if (! reload_completed)
6833 if (current_frame_info.reg_save_b0
6834 && regno == current_frame_info.reg_save_b0)
6836 if (current_frame_info.reg_save_pr
6837 && regno == current_frame_info.reg_save_pr)
6839 if (current_frame_info.reg_save_ar_pfs
6840 && regno == current_frame_info.reg_save_ar_pfs)
6842 if (current_frame_info.reg_save_ar_unat
6843 && regno == current_frame_info.reg_save_ar_unat)
6845 if (current_frame_info.reg_save_ar_lc
6846 && regno == current_frame_info.reg_save_ar_lc)
6852 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6854 We add @ to the name if this goes in small data/bss. We can only put
6855 a variable in small data/bss if it is defined in this module or a module
6856 that we are statically linked with. We can't check the second condition,
6857 but TREE_STATIC gives us the first one. */
6859 /* ??? If we had IPA, we could check the second condition. We could support
6860 programmer added section attributes if the variable is not defined in this
6863 /* ??? See the v850 port for a cleaner way to do this. */
6865 /* ??? We could also support own long data here. Generating movl/add/ld8
6866 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6867 code faster because there is one less load. This also includes incomplete
6868 types which can't go in sdata/sbss. */
6871 ia64_in_small_data_p (exp)
6874 if (TARGET_NO_SDATA)
6877 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
6879 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
6880 if (strcmp (section, ".sdata") == 0
6881 || strcmp (section, ".sbss") == 0)
6886 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
6888 /* If this is an incomplete type with size 0, then we can't put it
6889 in sdata because it might be too big when completed. */
6890 if (size > 0 && size <= ia64_section_threshold)
6898 ia64_encode_section_info (decl, first)
6900 int first ATTRIBUTE_UNUSED;
6902 const char *symbol_str;
6903 bool is_local, is_small;
6906 if (TREE_CODE (decl) == FUNCTION_DECL)
6908 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
6912 /* Careful not to prod global register variables. */
6913 if (TREE_CODE (decl) != VAR_DECL
6914 || GET_CODE (DECL_RTL (decl)) != MEM
6915 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
6918 symbol = XEXP (DECL_RTL (decl), 0);
6919 symbol_str = XSTR (symbol, 0);
6921 /* A variable is considered "local" if it is defined by this module. */
6923 if (MODULE_LOCAL_P (decl))
6925 /* Otherwise, variables defined outside this object may not be local. */
6926 else if (DECL_EXTERNAL (decl))
6928 /* Linkonce and weak data are never local. */
6929 else if (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
6931 /* Static variables are always local. */
6932 else if (! TREE_PUBLIC (decl))
6934 /* If PIC, then assume that any global name can be overridden by
6935 symbols resolved from other modules. */
6938 /* Uninitialized COMMON variable may be unified with symbols
6939 resolved from other modules. */
6940 else if (DECL_COMMON (decl)
6941 && (DECL_INITIAL (decl) == NULL
6942 || DECL_INITIAL (decl) == error_mark_node))
6944 /* Otherwise we're left with initialized (or non-common) global data
6945 which is of necessity defined locally. */
6949 /* Determine if DECL will wind up in .sdata/.sbss. */
6950 is_small = ia64_in_small_data_p (decl);
6952 /* Finally, encode this into the symbol string. */
6953 if (is_local && is_small)
6958 if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
6961 len = strlen (symbol_str) + 1;
6962 newstr = alloca (len + 1);
6963 newstr[0] = SDATA_NAME_FLAG_CHAR;
6964 memcpy (newstr + 1, symbol_str, len);
6966 XSTR (symbol, 0) = ggc_alloc_string (newstr, len);
6969 /* This decl is marked as being in small data/bss but it shouldn't be;
6970 one likely explanation for this is that the decl has been moved into
6971 a different section from the one it was in when ENCODE_SECTION_INFO
6972 was first called. Remove the '@'. */
6973 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
6974 XSTR (symbol, 0) = ggc_strdup (symbol_str + 1);
6977 /* Output assembly directives for prologue regions. */
6979 /* The current basic block number. */
6981 static int block_num;
6983 /* True if we need a copy_state command at the start of the next block. */
6985 static int need_copy_state;
6987 /* The function emits unwind directives for the start of an epilogue. */
6992 /* If this isn't the last block of the function, then we need to label the
6993 current state, and copy it back in at the start of the next block. */
6995 if (block_num != n_basic_blocks - 1)
6997 fprintf (asm_out_file, "\t.label_state 1\n");
6998 need_copy_state = 1;
7001 fprintf (asm_out_file, "\t.restore sp\n");
7004 /* This function processes a SET pattern looking for specific patterns
7005 which result in emitting an assembly directive required for unwinding. */
7008 process_set (asm_out_file, pat)
7012 rtx src = SET_SRC (pat);
7013 rtx dest = SET_DEST (pat);
7014 int src_regno, dest_regno;
7016 /* Look for the ALLOC insn. */
7017 if (GET_CODE (src) == UNSPEC_VOLATILE
7018 && XINT (src, 1) == UNSPECV_ALLOC
7019 && GET_CODE (dest) == REG)
7021 dest_regno = REGNO (dest);
7023 /* If this isn't the final destination for ar.pfs, the alloc
7024 shouldn't have been marked frame related. */
7025 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7028 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7029 ia64_dbx_register_number (dest_regno));
7033 /* Look for SP = .... */
7034 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7036 if (GET_CODE (src) == PLUS)
7038 rtx op0 = XEXP (src, 0);
7039 rtx op1 = XEXP (src, 1);
7040 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7042 if (INTVAL (op1) < 0)
7044 fputs ("\t.fframe ", asm_out_file);
7045 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
7047 fputc ('\n', asm_out_file);
7050 process_epilogue ();
7055 else if (GET_CODE (src) == REG
7056 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7057 process_epilogue ();
7064 /* Register move we need to look at. */
7065 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7067 src_regno = REGNO (src);
7068 dest_regno = REGNO (dest);
7073 /* Saving return address pointer. */
7074 if (dest_regno != current_frame_info.reg_save_b0)
7076 fprintf (asm_out_file, "\t.save rp, r%d\n",
7077 ia64_dbx_register_number (dest_regno));
7081 if (dest_regno != current_frame_info.reg_save_pr)
7083 fprintf (asm_out_file, "\t.save pr, r%d\n",
7084 ia64_dbx_register_number (dest_regno));
7087 case AR_UNAT_REGNUM:
7088 if (dest_regno != current_frame_info.reg_save_ar_unat)
7090 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7091 ia64_dbx_register_number (dest_regno));
7095 if (dest_regno != current_frame_info.reg_save_ar_lc)
7097 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7098 ia64_dbx_register_number (dest_regno));
7101 case STACK_POINTER_REGNUM:
7102 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7103 || ! frame_pointer_needed)
7105 fprintf (asm_out_file, "\t.vframe r%d\n",
7106 ia64_dbx_register_number (dest_regno));
7110 /* Everything else should indicate being stored to memory. */
7115 /* Memory store we need to look at. */
7116 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7122 if (GET_CODE (XEXP (dest, 0)) == REG)
7124 base = XEXP (dest, 0);
7127 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7128 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7130 base = XEXP (XEXP (dest, 0), 0);
7131 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7136 if (base == hard_frame_pointer_rtx)
7138 saveop = ".savepsp";
7141 else if (base == stack_pointer_rtx)
7146 src_regno = REGNO (src);
7150 if (current_frame_info.reg_save_b0 != 0)
7152 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7156 if (current_frame_info.reg_save_pr != 0)
7158 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7162 if (current_frame_info.reg_save_ar_lc != 0)
7164 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7168 if (current_frame_info.reg_save_ar_pfs != 0)
7170 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7173 case AR_UNAT_REGNUM:
7174 if (current_frame_info.reg_save_ar_unat != 0)
7176 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7183 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7184 1 << (src_regno - GR_REG (4)));
7192 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7193 1 << (src_regno - BR_REG (1)));
7200 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7201 1 << (src_regno - FR_REG (2)));
7204 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7205 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7206 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7207 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7208 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7209 1 << (src_regno - FR_REG (12)));
7221 /* This function looks at a single insn and emits any directives
7222 required to unwind this insn. */
7224 process_for_unwind_directive (asm_out_file, insn)
7228 if (flag_unwind_tables
7229 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7233 if (GET_CODE (insn) == NOTE
7234 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7236 block_num = NOTE_BASIC_BLOCK (insn)->index;
7238 /* Restore unwind state from immediately before the epilogue. */
7239 if (need_copy_state)
7241 fprintf (asm_out_file, "\t.body\n");
7242 fprintf (asm_out_file, "\t.copy_state 1\n");
7243 need_copy_state = 0;
7247 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7250 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7252 pat = XEXP (pat, 0);
7254 pat = PATTERN (insn);
7256 switch (GET_CODE (pat))
7259 process_set (asm_out_file, pat);
7265 int limit = XVECLEN (pat, 0);
7266 for (par_index = 0; par_index < limit; par_index++)
7268 rtx x = XVECEXP (pat, 0, par_index);
7269 if (GET_CODE (x) == SET)
7270 process_set (asm_out_file, x);
7283 ia64_init_builtins ()
7285 tree psi_type_node = build_pointer_type (integer_type_node);
7286 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7287 tree endlink = void_list_node;
7289 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7290 tree si_ftype_psi_si_si
7291 = build_function_type (integer_type_node,
7292 tree_cons (NULL_TREE, psi_type_node,
7293 tree_cons (NULL_TREE, integer_type_node,
7294 tree_cons (NULL_TREE,
7298 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7299 tree di_ftype_pdi_di_di
7300 = build_function_type (long_integer_type_node,
7301 tree_cons (NULL_TREE, pdi_type_node,
7302 tree_cons (NULL_TREE,
7303 long_integer_type_node,
7304 tree_cons (NULL_TREE,
7305 long_integer_type_node,
7307 /* __sync_synchronize */
7308 tree void_ftype_void
7309 = build_function_type (void_type_node, endlink);
7311 /* __sync_lock_test_and_set_si */
7312 tree si_ftype_psi_si
7313 = build_function_type (integer_type_node,
7314 tree_cons (NULL_TREE, psi_type_node,
7315 tree_cons (NULL_TREE, integer_type_node, endlink)));
7317 /* __sync_lock_test_and_set_di */
7318 tree di_ftype_pdi_di
7319 = build_function_type (long_integer_type_node,
7320 tree_cons (NULL_TREE, pdi_type_node,
7321 tree_cons (NULL_TREE, long_integer_type_node,
7324 /* __sync_lock_release_si */
7326 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
7329 /* __sync_lock_release_di */
7331 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
7334 #define def_builtin(name, type, code) \
7335 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
7337 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7338 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7339 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7340 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7341 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7342 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7343 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
7344 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7346 def_builtin ("__sync_synchronize", void_ftype_void,
7347 IA64_BUILTIN_SYNCHRONIZE);
7349 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7350 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7351 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7352 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7353 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7354 IA64_BUILTIN_LOCK_RELEASE_SI);
7355 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7356 IA64_BUILTIN_LOCK_RELEASE_DI);
7358 def_builtin ("__builtin_ia64_bsp",
7359 build_function_type (ptr_type_node, endlink),
7362 def_builtin ("__builtin_ia64_flushrs",
7363 build_function_type (void_type_node, endlink),
7364 IA64_BUILTIN_FLUSHRS);
7366 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7367 IA64_BUILTIN_FETCH_AND_ADD_SI);
7368 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7369 IA64_BUILTIN_FETCH_AND_SUB_SI);
7370 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7371 IA64_BUILTIN_FETCH_AND_OR_SI);
7372 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7373 IA64_BUILTIN_FETCH_AND_AND_SI);
7374 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7375 IA64_BUILTIN_FETCH_AND_XOR_SI);
7376 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7377 IA64_BUILTIN_FETCH_AND_NAND_SI);
7379 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7380 IA64_BUILTIN_ADD_AND_FETCH_SI);
7381 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7382 IA64_BUILTIN_SUB_AND_FETCH_SI);
7383 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7384 IA64_BUILTIN_OR_AND_FETCH_SI);
7385 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7386 IA64_BUILTIN_AND_AND_FETCH_SI);
7387 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7388 IA64_BUILTIN_XOR_AND_FETCH_SI);
7389 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7390 IA64_BUILTIN_NAND_AND_FETCH_SI);
7392 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7393 IA64_BUILTIN_FETCH_AND_ADD_DI);
7394 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7395 IA64_BUILTIN_FETCH_AND_SUB_DI);
7396 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7397 IA64_BUILTIN_FETCH_AND_OR_DI);
7398 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7399 IA64_BUILTIN_FETCH_AND_AND_DI);
7400 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7401 IA64_BUILTIN_FETCH_AND_XOR_DI);
7402 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7403 IA64_BUILTIN_FETCH_AND_NAND_DI);
7405 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7406 IA64_BUILTIN_ADD_AND_FETCH_DI);
7407 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7408 IA64_BUILTIN_SUB_AND_FETCH_DI);
7409 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7410 IA64_BUILTIN_OR_AND_FETCH_DI);
7411 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7412 IA64_BUILTIN_AND_AND_FETCH_DI);
7413 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7414 IA64_BUILTIN_XOR_AND_FETCH_DI);
7415 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7416 IA64_BUILTIN_NAND_AND_FETCH_DI);
7421 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7429 cmpxchgsz.acq tmp = [ptr], tmp
7430 } while (tmp != ret)
7434 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7436 enum machine_mode mode;
7440 rtx ret, label, tmp, ccv, insn, mem, value;
7443 arg0 = TREE_VALUE (arglist);
7444 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7445 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7446 value = expand_expr (arg1, NULL_RTX, mode, 0);
7448 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7449 MEM_VOLATILE_P (mem) = 1;
7451 if (target && register_operand (target, mode))
7454 ret = gen_reg_rtx (mode);
7456 emit_insn (gen_mf ());
7458 /* Special case for fetchadd instructions. */
7459 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7462 insn = gen_fetchadd_acq_si (ret, mem, value);
7464 insn = gen_fetchadd_acq_di (ret, mem, value);
7469 tmp = gen_reg_rtx (mode);
7470 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7471 emit_move_insn (tmp, mem);
7473 label = gen_label_rtx ();
7475 emit_move_insn (ret, tmp);
7476 emit_move_insn (ccv, tmp);
7478 /* Perform the specific operation. Special case NAND by noticing
7479 one_cmpl_optab instead. */
7480 if (binoptab == one_cmpl_optab)
7482 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7483 binoptab = and_optab;
7485 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7488 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7490 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7493 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
7498 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7506 cmpxchgsz.acq tmp = [ptr], ret
7507 } while (tmp != old)
7511 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7513 enum machine_mode mode;
7517 rtx old, label, tmp, ret, ccv, insn, mem, value;
7520 arg0 = TREE_VALUE (arglist);
7521 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7522 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7523 value = expand_expr (arg1, NULL_RTX, mode, 0);
7525 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7526 MEM_VOLATILE_P (mem) = 1;
7528 if (target && ! register_operand (target, mode))
7531 emit_insn (gen_mf ());
7532 tmp = gen_reg_rtx (mode);
7533 old = gen_reg_rtx (mode);
7534 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7536 emit_move_insn (tmp, mem);
7538 label = gen_label_rtx ();
7540 emit_move_insn (old, tmp);
7541 emit_move_insn (ccv, tmp);
7543 /* Perform the specific operation. Special case NAND by noticing
7544 one_cmpl_optab instead. */
7545 if (binoptab == one_cmpl_optab)
7547 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7548 binoptab = and_optab;
7550 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7553 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7555 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7558 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
7563 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7567 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7570 For bool_ it's the same except return ret == oldval.
7574 ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7575 enum machine_mode mode;
7580 tree arg0, arg1, arg2;
7581 rtx mem, old, new, ccv, tmp, insn;
7583 arg0 = TREE_VALUE (arglist);
7584 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7585 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7586 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7587 old = expand_expr (arg1, NULL_RTX, mode, 0);
7588 new = expand_expr (arg2, NULL_RTX, mode, 0);
7590 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7591 MEM_VOLATILE_P (mem) = 1;
7593 if (! register_operand (old, mode))
7594 old = copy_to_mode_reg (mode, old);
7595 if (! register_operand (new, mode))
7596 new = copy_to_mode_reg (mode, new);
7598 if (! boolp && target && register_operand (target, mode))
7601 tmp = gen_reg_rtx (mode);
7603 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7604 emit_move_insn (ccv, old);
7605 emit_insn (gen_mf ());
7607 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7609 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7615 target = gen_reg_rtx (mode);
7616 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
7622 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7625 ia64_expand_lock_test_and_set (mode, arglist, target)
7626 enum machine_mode mode;
7631 rtx mem, new, ret, insn;
7633 arg0 = TREE_VALUE (arglist);
7634 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7635 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7636 new = expand_expr (arg1, NULL_RTX, mode, 0);
7638 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7639 MEM_VOLATILE_P (mem) = 1;
7640 if (! register_operand (new, mode))
7641 new = copy_to_mode_reg (mode, new);
7643 if (target && register_operand (target, mode))
7646 ret = gen_reg_rtx (mode);
7649 insn = gen_xchgsi (ret, mem, new);
7651 insn = gen_xchgdi (ret, mem, new);
7657 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7660 ia64_expand_lock_release (mode, arglist, target)
7661 enum machine_mode mode;
7663 rtx target ATTRIBUTE_UNUSED;
7668 arg0 = TREE_VALUE (arglist);
7669 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7671 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7672 MEM_VOLATILE_P (mem) = 1;
7674 emit_move_insn (mem, const0_rtx);
7680 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
7683 rtx subtarget ATTRIBUTE_UNUSED;
7684 enum machine_mode mode ATTRIBUTE_UNUSED;
7685 int ignore ATTRIBUTE_UNUSED;
7687 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7688 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7689 tree arglist = TREE_OPERAND (exp, 1);
7693 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7694 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7695 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7696 case IA64_BUILTIN_LOCK_RELEASE_SI:
7697 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7698 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7699 case IA64_BUILTIN_FETCH_AND_OR_SI:
7700 case IA64_BUILTIN_FETCH_AND_AND_SI:
7701 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7702 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7703 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7704 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7705 case IA64_BUILTIN_OR_AND_FETCH_SI:
7706 case IA64_BUILTIN_AND_AND_FETCH_SI:
7707 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7708 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7712 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7713 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7714 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7715 case IA64_BUILTIN_LOCK_RELEASE_DI:
7716 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7717 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7718 case IA64_BUILTIN_FETCH_AND_OR_DI:
7719 case IA64_BUILTIN_FETCH_AND_AND_DI:
7720 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7721 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7722 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7723 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7724 case IA64_BUILTIN_OR_AND_FETCH_DI:
7725 case IA64_BUILTIN_AND_AND_FETCH_DI:
7726 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7727 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7737 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7738 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7739 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
7741 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7742 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7743 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
7745 case IA64_BUILTIN_SYNCHRONIZE:
7746 emit_insn (gen_mf ());
7749 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7750 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7751 return ia64_expand_lock_test_and_set (mode, arglist, target);
7753 case IA64_BUILTIN_LOCK_RELEASE_SI:
7754 case IA64_BUILTIN_LOCK_RELEASE_DI:
7755 return ia64_expand_lock_release (mode, arglist, target);
7757 case IA64_BUILTIN_BSP:
7758 if (! target || ! register_operand (target, DImode))
7759 target = gen_reg_rtx (DImode);
7760 emit_insn (gen_bsp_value (target));
7763 case IA64_BUILTIN_FLUSHRS:
7764 emit_insn (gen_flushrs ());
7767 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7768 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7769 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
7771 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7772 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7773 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
7775 case IA64_BUILTIN_FETCH_AND_OR_SI:
7776 case IA64_BUILTIN_FETCH_AND_OR_DI:
7777 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
7779 case IA64_BUILTIN_FETCH_AND_AND_SI:
7780 case IA64_BUILTIN_FETCH_AND_AND_DI:
7781 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
7783 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7784 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7785 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
7787 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7788 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7789 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
7791 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7792 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7793 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
7795 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7796 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7797 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
7799 case IA64_BUILTIN_OR_AND_FETCH_SI:
7800 case IA64_BUILTIN_OR_AND_FETCH_DI:
7801 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
7803 case IA64_BUILTIN_AND_AND_FETCH_SI:
7804 case IA64_BUILTIN_AND_AND_FETCH_DI:
7805 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
7807 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7808 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7809 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
7811 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7812 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7813 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
7822 /* For the HP-UX IA64 aggregate parameters are passed stored in the
7823 most significant bits of the stack slot. */
7826 ia64_hpux_function_arg_padding (mode, type)
7827 enum machine_mode mode;
7830 /* Exception to normal case for structures/unions/etc. */
7832 if (type && AGGREGATE_TYPE_P (type)
7833 && int_size_in_bytes (type) < UNITS_PER_WORD)
7836 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
7837 hardwired to be true. */
7839 return((mode == BLKmode
7840 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7841 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
7842 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
7843 ? downward : upward);
7846 /* It is illegal to have relocations in shared segments on AIX.
7847 Pretend flag_pic is always set. */
7850 ia64_aix_select_section (exp, reloc, align)
7853 unsigned HOST_WIDE_INT align;
7855 int save_pic = flag_pic;
7857 default_elf_select_section (exp, reloc, align);
7858 flag_pic = save_pic;
7862 ia64_aix_unique_section (decl, reloc)
7866 int save_pic = flag_pic;
7868 default_unique_section (decl, reloc);
7869 flag_pic = save_pic;